In [None]:
import os
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split
from keras.layers import Input, TimeDistributed, Conv2D, Flatten, Dense, Dropout, BatchNormalization, Activation, Reshape, GRU, LeakyReLU, Reshape, Permute, ConvLSTM2D, UpSampling2D, ZeroPadding2D, LSTM, RepeatVector
from keras.models import Model
from keras.utils import to_categorical
import tensorflow as tf

# === Step 1: Define functions for data preprocessing ===
def preprocess_eeg_data(file_path):
    """
    Load and preprocess EEG data from a .mat file.
    - Normalize the data between 0 and 1.
    - Extract segments and labels.

    Args:
        file_path (str): Path to the .mat file.

    Returns:
        np.ndarray: Preprocessed EEG data.
        np.ndarray: Labels for the data (4-class classification).
    """
    mat_data = scipy.io.loadmat(file_path)
    eeg_data = mat_data['EEG_Seg']  # Assuming 'EEG_Seg' contains EEG data (62, 250, segments)
    eeg_data = np.transpose(eeg_data, (2, 1, 0))  # Rearrange to (segments, 250, 62)

    # Normalize EEG data to range [0, 1]
    eeg_data = (eeg_data - np.min(eeg_data)) / (np.max(eeg_data) - np.min(eeg_data))

    # Determine label based on file index (4-class labels)
    file_index = int(os.path.basename(file_path).split('Data')[1].split('Segmented')[0])
    if file_index <= 12:
        label = 0  # Class 0: Healthy - Group 1
    elif 12 < file_index <= 24:
        label = 1  # Class 1: Impaired - Group 1
    elif 24 < file_index <= 36:
        label = 2  # Class 2: Healthy - Group 2
    else:
        label = 3  # Class 3: Impaired - Group 2

    labels = np.full((eeg_data.shape[0],), label)  # Assign the label to all epochs
    return eeg_data, labels

# === Step 2: Process all files and prepare dataset ===
def prepare_dataset(data_folder):
    """
    Preprocess all EEG files in a folder and prepare the dataset.

    Args:
        data_folder (str): Path to the folder containing .mat files.

    Returns:
        np.ndarray: Preprocessed EEG data.
        np.ndarray: Corresponding labels.
    """
    eeg_data_list = []
    labels_list = []

    for file_name in os.listdir(data_folder):
        if file_name.endswith('.mat'):
            file_path = os.path.join(data_folder, file_name)
            eeg_data, labels = preprocess_eeg_data(file_path)
            eeg_data_list.append(eeg_data)
            labels_list.append(labels)

    eeg_data_combined = np.vstack(eeg_data_list)
    labels_combined = np.concatenate(labels_list)
    return eeg_data_combined, labels_combined

# === Step 3: Hybrid LSTM Model ===
def hybrid_LSTM(depth=2, conv_size=16, dense_size=512, input_dim=(250, 62, 1), dropoutRate=0.2, num_classes=4):
    """
    Autoencoder model builder composes of CNNs and a LSTM
    Args:
        depth (int): number of CNN blocks, each has 3 CNN layers with BN and a dropout
        conv_size (int): initial CNN filter size, doubled in each depth level
        dense_size (int): size of latent vector and a number of filters of LSTM
        input_dim (tuple): input dimension
        dropoutRate (float): dropout rate used in all nodes
        num_classes (int): number of classes for classification
    Return:
        keras model
    """
    temp_filter = conv_size
    model_input = Input(shape=input_dim, name='input')
    X = model_input

    # CNN Encoder
    for i in range(depth):
        for j in range(3):
            if j == 0:
                X = Conv2D(2 * temp_filter, (3, 3), padding='same', strides=(2, 2), name=f'encoder_{i}_{j}_conv2D')(X)
            else:
                X = Conv2D(temp_filter, (3, 3), padding='same', name=f'encoder_{i}_{j}_conv2D')(X)
            X = BatchNormalization(name=f'encoder_{i}_{j}_BN')(X)
            X = LeakyReLU(alpha=0.1, name=f'encoder_{i}_{j}_relu')(X)
            X = Dropout(dropoutRate, name=f'encoder_{i}_{j}_drop')(X)
        temp_filter *= 2

    X = Flatten()(X)
    X = LSTM(dense_size, recurrent_dropout=dropoutRate, return_sequences=False)(Reshape((1, -1))(X))
    latent = X

    # Classification output
    output = Dense(num_classes, activation='softmax', name='classifier')(latent)
    return Model(inputs=model_input, outputs=output)

# === Step 4: Main Execution ===
if __name__ == "__main__":
    # Specify data folder
    data_folder = "./Data"  # Replace with your data folder path

    # Preprocess all files in the folder
    print("Preprocessing EEG data...")
    eeg_data, labels = prepare_dataset(data_folder)
    print(f"Data preprocessing complete! Total samples: {eeg_data.shape[0]}")

    # Reshape data for CNN input
    eeg_data = eeg_data[..., np.newaxis]  # Add channel dimension

    # Convert labels to categorical (one-hot encoding)
    labels = to_categorical(labels, num_classes=4)

    # Split the dataset into training and testing sets
    train_data, test_data, train_labels, test_labels = train_test_split(eeg_data, labels, test_size=0.2, random_state=42)
    print(f"Training set size: {train_data.shape[0]}, Testing set size: {test_data.shape[0]}")

    # Build and compile the Hybrid LSTM model
    model = hybrid_LSTM(input_dim=(250, 62, 1), num_classes=4)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    print("Training the Hybrid LSTM model...")
    history = model.fit(train_data, train_labels, epochs=20, batch_size=32, validation_data=(test_data, test_labels))

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(test_data, test_labels)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
