In [None]:
from glob import glob
import os
import mne
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras

# Define data loading and preprocessing functions
def read_data(file_path):
    datax = mne.io.read_raw_edf(file_path, preload=True)
    datax.set_eeg_reference()
    datax.filter(l_freq=1, h_freq=45)
    epochs = mne.make_fixed_length_epochs(datax, duration=25, overlap=0)
    epochs = epochs.get_data()
    return epochs  # trials, channels, length

all_file_path = glob('sampledb/*.edf')
healthy_file_path = [i for i in all_file_path if 'h' in i.split('\\')[1]]
patient_file_path = [i for i in all_file_path if 's' in i.split('\\')[1]]

control_epochs_array = [read_data(subject) for subject in healthy_file_path]
patients_epochs_array = [read_data(subject) for subject in patient_file_path]

control_epochs_labels = [len(i) * [0] for i in control_epochs_array]
patients_epochs_labels = [len(i) * [1] for i in patients_epochs_array]

# Combine and preprocess the data
data_list = control_epochs_array + patients_epochs_array
label_list = control_epochs_labels + patients_epochs_labels
groups_list = [[i] * len(j) for i, j in enumerate(data_list)]

data_array = np.vstack(data_list)
label_array = np.hstack(label_list)
group_array = np.hstack(groups_list)
data_array = np.moveaxis(data_array, 1, 2)

# Define an ANN model
def ann_model():
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(6250, 7)))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Split the data into train and test sets
X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
    data_array, label_array, group_array, test_size=0.2, random_state=42, stratify=label_array)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Train and evaluate the model using GroupKFold
gkf = GroupKFold(n_splits=5)
accuracy = []

for train_index, val_index in gkf.split(X_train, y_train, groups=group_train):
    train_features, train_labels = X_train[train_index], y_train[train_index]
    val_features, val_labels = X_train[val_index], y_train[val_index]
    
    model = ann_model()
    model.fit(train_features, train_labels, epochs=50, batch_size=10, validation_data=(val_features, val_labels), verbose=0)
    val_preds = model.predict(val_features)
    val_preds = (val_preds > 0.5).astype(int)
    accuracy.append(accuracy_score(val_labels, val_preds))

mean_accuracy = np.mean(accuracy)
print(f'Mean Validation Accuracy: {mean_accuracy}')

# Evaluate the model on the test set
test_preds = model.predict(X_test)
test_preds = (test_preds > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, test_preds)
print(f'Test Set Accuracy: {test_accuracy}')
