In [1]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, Dropout
import numpy as np
from tensorflow import keras
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers
from keras.optimizers import Adam

# Đọc dữ liệu từ file CSV
df = pd.read_csv('dataC3.csv')
# Với cột cuối là nhãn
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [2]:
# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X = scaler.fit_transform(X)

# One-hot encoding cho nhãn
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# K-fold cross-validation
kfold = KFold(n_splits=5)
fold_scores = []

# Tạo thư mục để lưu các plot và mô hình
model_dir = 'models'
os.makedirs(model_dir, exist_ok=True)

# Danh sách để lưu tên file của các mô hình đã train
model_filenames = []

# Tạo DataFrame để lưu history của loss và accuracy
history_df = pd.DataFrame(columns=['fold', 'epoch', 'train_loss', 'test_loss', 'train_accuracy', 'test_accuracy'])


In [16]:
# # Chia thành tập huấn luyện và tập kiểm tra
# time_steps = 80
# n_features = X.shape[1]
# 
# # Chuẩn bị đầu vào và đầu ra cho mô hình Bi-LSTM
# def prepare_data(data, labels, time_steps):
#     X = []
#     y = []
#     for j in range(len(data)):
#         for i in range(0,data.shape[1] - time_steps,40):  # Duyệt theo các cột
#             X.append(data[j, i:i+time_steps])
#             y.append(labels[j])
#             # print(labels[j])
#     return np.array(X), np.array(y)
# X_,Y_ = prepare_data(X, y, time_steps)
# # print(y)
# X_,Y_,X_.shape,Y_.shape

(array([[ 0.22027051,  0.40022461,  0.16577296, ..., -0.12085386,
         -0.16462238, -0.10885405],
        [ 0.04403516, -0.05445208, -0.74253486, ...,  0.124518  ,
          0.62695137,  0.72130362],
        [ 0.07607795, -0.02197518, -0.16886677, ..., -0.02270511,
         -0.47155915, -0.36929568],
        ...,
        [-1.20563364, -1.23985918, -1.28433251, ...,  0.10815988,
          0.23924178,  0.24925318],
        [ 0.62080538,  0.36774771,  0.22951386, ...,  0.20630862,
          0.40078744,  0.4445844 ],
        [ 0.68489096,  0.61132451,  0.61195926, ..., -0.61159758,
         -0.73003221, -0.74368051]]),
 array([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        ...,
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.]]),
 (6768, 655),
 (6768, 3))

In [23]:
for fold_index, (train_index, test_index) in enumerate(kfold.split(X), 1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Chia thành tập huấn luyện và tập kiểm tra
    time_steps = 655

    # Chuẩn bị đầu vào và đầu ra cho mô hình Bi-LSTM
    def prepare_data(data, labels, time_steps):
        X = []
        y = []
        for j in range(len(data)):
            for i in range(data.shape[1] - time_steps):
                X.append(data[j, i:i+time_steps].reshape(1, time_steps))  # Reshape input data
                y.append(labels[j])
        return np.array(X), np.array(y)
    
    X_train, y_train = prepare_data(X_train, y_train, time_steps)
    X_test, y_test = prepare_data(X_test, y_test, time_steps)

    # Xây dựng mô hình Bi-LSTM
    model = Sequential()
    model.add(Bidirectional(LSTM(units=16, activation='relu', kernel_regularizer=regularizers.l2(0.01)), input_shape=(1, time_steps)))
    # model.add(Bidirectional(LSTM(units=16, activation='relu'), input_shape=(time_steps, n_features)))
    model.add(Dropout(0.1))
    # model.add(Bidirectional(LSTM(units=16, activation='relu', kernel_regularizer=regularizers.l2(0.01)), input_shape=(1, time_steps)))
    model.add(Dense(units=3, activation='softmax'))

    #learning rate
    lr = 0.0001
    optimizer = Adam(learning_rate=lr)

    # Biên dịch mô hình
    model.compile(optimizer=optimizer , loss='categorical_crossentropy', metrics=['accuracy'])

    # Đặt callbacks để dừng sau 10 epochs
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=100)

    # Huấn luyện mô hình và lưu history
    history = model.fit(X_train, y_train, epochs=2000, batch_size=512, validation_data=(X_test, y_test), callbacks=[early_stopping])

    # Lưu mô hình
    model_filename = f"BiLSTM_Brainwave_{fold_index}_accuracy_{history.history['val_accuracy'][-1]*100:.2f}.h5"
    model.save(os.path.join(model_dir, model_filename))
    model_filenames.append(model_filename)

    # Đánh giá mô hình trên tập kiểm tra
    _, accuracy = model.evaluate(X_test, y_test)
    fold_scores.append(accuracy)

    # Vẽ đồ thị loss và accuracy
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper right')
    plt.savefig(f"{model_dir}/{os.path.splitext(model_filename)[0]}_loss.png")
    plt.clf()

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='lower right')
    plt.savefig(f"{model_dir}/{os.path.splitext(model_filename)[0]}_accuracy.png")
    plt.clf()

    # Lưu accuracy và loss của fold hiện tại vào DataFrame
    for epoch, (train_loss, test_loss, train_accuracy, test_accuracy) in enumerate(zip(
        history.history['loss'],
        history.history['val_loss'],
        history.history['accuracy'],
        history.history['val_accuracy']
    ), 1):
        history_df = pd.concat([history_df, pd.DataFrame({
            'fold': [fold_index],
            'epoch': [epoch],
            'train_loss': [train_loss],
            'test_loss': [test_loss],
            'train_accuracy': [train_accuracy],
            'test_accuracy': [test_accuracy]
        })], ignore_index=True)

    # Lưu thông tin của fold hiện tại vào file txt
    with open(f'{model_dir}/{os.path.splitext(model_filename)[0]}.txt', 'w') as file:
        file.write(f"Train Loss: {history.history['loss'][-1]}\n")
        file.write(f"Train Accuracy: {history.history['accuracy'][-1]}\n")
        file.write(f"Validation Loss: {history.history['val_loss'][-1]}\n")
        file.write(f"Validation Accuracy: {history.history['val_accuracy'][-1]}\n")
    break
# In kết quả
for i, score in enumerate(fold_scores, 1):
    print(f"Fold {i}: Accuracy = {score}")

print("Average Accuracy:", np.mean(fold_scores))

# Lưu DataFrame vào file CSV
history_df.to_csv('history_bi_lstm.csv', index=False)

Epoch 1/2000


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - accuracy: 0.3203 - loss: 3.4581 - val_accuracy: 0.3575 - val_loss: 3.3825
Epoch 2/2000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3725 - loss: 3.3734 - val_accuracy: 0.3390 - val_loss: 3.3408
Epoch 3/2000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4131 - loss: 3.2869 - val_accuracy: 0.3235 - val_loss: 3.2967
Epoch 4/2000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4352 - loss: 3.2209 - val_accuracy: 0.3102 - val_loss: 3.2547
Epoch 5/2000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4528 - loss: 3.1555 - val_accuracy: 0.2925 - val_loss: 3.2118
Epoch 6/2000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4599 - loss: 3.0942 - val_accuracy: 0.2858 - val_loss: 3.1703
Epoch 7/2000
[1m11/11[0m [32m━━━━━━━━



[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0820 - loss: 1.8760     
Fold 1: Accuracy = 0.0010667979950085282
Fold 2: Accuracy = 0.10495650768280029
Fold 3: Accuracy = 0.0
Fold 4: Accuracy = 0.17577548325061798
Fold 5: Accuracy = 0.4025110900402069
Fold 6: Accuracy = 0.2370753288269043
Fold 7: Accuracy = 0.1277695745229721
Fold 8: Accuracy = 0.008493352681398392
Fold 9: Accuracy = 0.053175777196884155
Fold 10: Accuracy = 0.08862629532814026
Average Accuracy: 0.11994502075249329


<Figure size 640x480 with 0 Axes>