In [11]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, Dropout
import numpy as np
from tensorflow import keras
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [12]:
# Đọc dữ liệu từ file CSV
df = pd.read_csv('dataC3.csv')

# Với cột cuối là nhãn
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X = scaler.fit_transform(X)

# One-hot encoding cho nhãn
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
fold_scores = []

# Tạo thư mục để lưu các plot và mô hình
model_dir = 'models'
os.makedirs(model_dir, exist_ok=True)

# Danh sách để lưu tên file của các mô hình đã train
model_filenames = []

# Tạo DataFrame để lưu history của loss và accuracy
history_df = pd.DataFrame(columns=['fold', 'epoch', 'train_loss', 'test_loss', 'train_accuracy', 'test_accuracy'])

for fold_index, (train_index, test_index) in enumerate(kfold.split(X), 1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Chia thành tập huấn luyện và tập kiểm tra
    time_steps = 10
    n_features = X.shape[1]

    # Chuẩn bị đầu vào và đầu ra cho mô hình Bi-LSTM
    def prepare_data(data, labels, time_steps):
        X = []
        y = []
        for i in range(len(data) - time_steps):
            X.append(data[i:i+time_steps])
            y.append(labels[i+time_steps])
        return np.array(X), np.array(y)
    X_train, y_train = prepare_data(X_train, y_train, time_steps)
    X_test, y_test = prepare_data(X_test, y_test, time_steps)

    # Xây dựng mô hình Bi-LSTM
    model = Sequential()
    model.add(Bidirectional(LSTM(units=16, activation='relu'), input_shape=(time_steps, n_features)))
    model.add(Dense(units=8, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(units=3, activation='softmax'))

    # Biên dịch mô hình
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Đặt callbacks để dừng sau 10 epochs
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)

    # Huấn luyện mô hình và lưu history
    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    # Lưu mô hình
    model_filename = f"BiLSTM_Brainwave_{fold_index}_accuracy_{history.history['val_accuracy'][-1]*100:.2f}.h5"
    # model.save(os.path.join(model_dir, model_filename))
    model_filenames.append(model_filename)

    # Đánh giá mô hình trên tập kiểm tra
    _, accuracy = model.evaluate(X_test, y_test)
    fold_scores.append(accuracy)

    # Vẽ đồ thị loss và accuracy
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper right')
    plt.savefig(f"{model_dir}/{os.path.splitext(model_filename)[0]}_loss.png")
    plt.clf()

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='lower right')
    plt.savefig(f"{model_dir}/{os.path.splitext(model_filename)[0]}_accuracy.png")
    plt.clf()

    # Lưu accuracy và loss của fold hiện tại vào DataFrame
    for epoch, (train_loss, test_loss, train_accuracy, test_accuracy) in enumerate(zip(
        history.history['loss'],
        history.history['val_loss'],
        history.history['accuracy'],
        history.history['val_accuracy']
    ), 1):
        history_df = pd.concat([history_df, pd.DataFrame({
            'fold': [fold_index],
            'epoch': [epoch],
            'train_loss': [train_loss],
            'test_loss': [test_loss],
            'train_accuracy': [train_accuracy],
            'test_accuracy': [test_accuracy]
        })], ignore_index=True)

    # Lưu thông tin của fold hiện tại vào file txt
    with open(f'{model_dir}/{os.path.splitext(model_filename)[0]}.txt', 'w') as file:
        file.write(f"Train Loss: {history.history['loss'][-1]}\n")
        file.write(f"Train Accuracy: {history.history['accuracy'][-1]}\n")
        file.write(f"Validation Loss: {history.history['val_loss'][-1]}\n")
        file.write(f"Validation Accuracy: {history.history['val_accuracy'][-1]}\n")

# In kết quả
for i, score in enumerate(fold_scores, 1):
    print(f"Fold {i}: Accuracy = {score}")

print("Average Accuracy:", np.mean(fold_scores))

# Lưu DataFrame vào file CSV
history_df.to_csv('history_bi_lstm.csv', index=False)


Epoch 1/100


  super().__init__(**kwargs)


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.4217 - loss: 1.1125 - val_accuracy: 0.4888 - val_loss: 1.0653
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6063 - loss: 0.8667 - val_accuracy: 0.5094 - val_loss: 1.2578
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6998 - loss: 0.6764 - val_accuracy: 0.5103 - val_loss: 1.6800
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7792 - loss: 0.5463 - val_accuracy: 0.5192 - val_loss: 2.0759
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8155 - loss: 0.4783 - val_accuracy: 0.5468 - val_loss: 2.6171
Epoch 6/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8665 - loss: 0.3323 - val_accuracy: 0.5156 - val_loss: 3.6002
Epoch 7/100
[1m141/141[0m [32m━

  history_df = pd.concat([history_df, pd.DataFrame({


Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.4303 - loss: 1.0760 - val_accuracy: 0.5424 - val_loss: 0.9435
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6817 - loss: 0.7395 - val_accuracy: 0.5361 - val_loss: 1.2942
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7974 - loss: 0.5197 - val_accuracy: 0.5227 - val_loss: 1.7761
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8314 - loss: 0.4110 - val_accuracy: 0.5326 - val_loss: 1.9055
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9110 - loss: 0.2475 - val_accuracy: 0.5343 - val_loss: 2.2851
Epoch 6/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9217 - loss: 0.2127 - val_accuracy: 0.4951 - val_loss: 2.7614
Epoch 7/100
[1m141/14

<Figure size 640x480 with 0 Axes>