In [3]:
from glob import glob
import scipy.io
from scipy.io import loadmat
import numpy as np
import mne

In [4]:
ADHD_data_path='Dataset_proc/ADHD_full'
Control_data_path='Dataset_proc/Control_full'

In [5]:
def convertmat2mne(data):
    ch_names =  ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2','F7','F8','T7','T8','P7','P8','Fz','Cz','Pz']
    ch_types = ['eeg'] * 19
    sampling_freq=128
    info = mne.create_info(ch_names, ch_types=ch_types, sfreq=sampling_freq)
    #info.set_montage('standard_1020')
    data=mne.io.RawArray(data, info)
    data.set_eeg_reference()
    data.filter(l_freq=1,h_freq=35)
    epochs=mne.make_fixed_length_epochs(data,duration=3.5,overlap=0)
    return epochs.get_data()

In [6]:
%%capture
ADHD_subject=[]
for AD in glob(ADHD_data_path + '/*.mat'):
    try:
        mat_data = scipy.io.loadmat(AD, squeeze_me=True, struct_as_record=False)
        for key in mat_data.keys():
            if not key.startswith('__'):
                data = mat_data[key]
                data=np.transpose(data)
                data=convertmat2mne(data)
                ADHD_subject.append(data)
    except Exception as e:
        print(f"Error loading file {AD}: {e}")
        

In [7]:
%%capture
Control_subject=[]
for CO in glob(Control_data_path + '/*.mat'):
    try:
        mat_data = scipy.io.loadmat(CO, squeeze_me=True, struct_as_record=False)
        for key in mat_data.keys():
            if not key.startswith('__'):
                data = mat_data[key]
                data=np.transpose(data)
                data=convertmat2mne(data)
                Control_subject.append(data)
    except Exception as e:
        print(f"Error loading file {CO}: {e}")
        

In [8]:
len(ADHD_subject),len(Control_subject)

(61, 60)

In [9]:
ADHD_epochs_labels=[len(i)*[1] for i in ADHD_subject]
Control_epochs_labels=[len(i)*[0] for i in Control_subject]
print(len(ADHD_epochs_labels),len(Control_epochs_labels))

61 60


In [10]:
data_list=ADHD_subject+Control_subject
label_list=ADHD_epochs_labels+Control_epochs_labels

groups_list=[[i]*len(j) for i, j in enumerate(data_list)]
'''
Subject1:
-epoch0
-epoch1
-epoch2
Subject2:
-epoch0
-epoch1
-epoch2
_________
Train
Subject1:
-epoch0
                                            Testيبقى تبع ال Subjectتاني لنفس ال epoch و Tain معين يبقى تبع Subjectل epoch يحصل ان Split ممكن و انا بعمل
                         و دي حاجة انا مش عاوزها        
                                        
                                                
Test
Subject1:
-epoch1
_________
Train
Subject1:
-epoch0
-epoch1
-epoch2                               

                                                          Test او Train تبقى تبع  Subjectلنفس ال epochsبحيث ان كل ال   Groupsل  DATAف انا قسمت ال 
Test
Subject2:
-epoch0
-epoch1
-epoch2


'''
print(len(data_list),len(label_list),len(groups_list))

121 121 121


In [11]:
from sklearn.model_selection import GroupKFold,LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler
gkf=GroupKFold()
from sklearn.base import TransformerMixin,BaseEstimator
from sklearn.preprocessing import StandardScaler
#https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix
class StandardScaler3D(BaseEstimator,TransformerMixin):
    #batch, sequence, channels
    def __init__(self):
        self.scaler = StandardScaler()

    def fit(self,X,y=None):
        self.scaler.fit(X.reshape(-1, X.shape[2]))
        return self

    def transform(self,X):
        return self.scaler.transform(X.reshape( -1,X.shape[2])).reshape(X.shape)

In [12]:
import numpy as np
data_array=np.concatenate(data_list)
label_array=np.concatenate(label_list)
group_array=np.concatenate(groups_list)
data_array=np.moveaxis(data_array,1,2)

print(data_array.shape,label_array.shape,group_array.shape)

(4770, 448, 19) (4770,) (4770,)


In [13]:
accuracy=[]
#for train_index, val_index in gkf.split(data_array, label_array, groups=group_array):
    #train_features,train_labels=data_array[train_index],label_array[train_index]
    #val_features,val_labels=data_array[val_index],label_array[val_index]
    #scaler=StandardScaler3D()
    #train_features=scaler.fit_transform(train_features)
    #val_features=scaler.transform(val_features)
    #break
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
for fold, (train_index, val_index) in enumerate(kf.split(data_array, label_array, groups=group_array)):
    train_features, val_features = data_array[train_index], data_array[val_index]
    train_labels, val_labels = label_array[train_index], label_array[val_index]
    scaler=StandardScaler3D()
    train_features=scaler.fit_transform(train_features)
    val_features=scaler.transform(val_features)
    break
    '''
# Save X_train
np.save('train_features.npy', train_features)
# Save y_train
np.save('train_labels.npy', train_labels)
# Save X_test
np.save('val_features.npy', val_features)
# Save y_test
np.save('val_labels.npy', val_labels)    
'''

In [14]:
# In subsequent runs
train_features = np.load('train_features.npy')
train_labels = np.load('train_labels.npy')
val_features = np.load('val_features.npy')
val_labels = np.load('val_labels.npy')

In [15]:
train_labels.shape

(3816,)

In [16]:
from tensorflow.keras.layers import Input,Dense,concatenate,Flatten,GRU,Conv1D
from tensorflow.keras.models import Model
#resource:https://github.com/dll-ncai/eeg_pre-diagnostic_screening/blob/master/code/chrononet/chrono.py




In [27]:
from tensorflow.keras.layers import Conv1D,BatchNormalization,LeakyReLU,MaxPool1D,\
GlobalAveragePooling1D,Dense,Dropout,AveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session
def cnnmodel():
    clear_session()
    model = Sequential()
    
    # Block 1
    model.add(Conv1D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(448, 19)))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool1D(pool_size=2, strides=2))

    # Block 2
    model.add(Conv1D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool1D(pool_size=2, strides=2))



    model.add(GlobalAveragePooling1D())
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model=cnnmodel()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 448, 64)           3712      
                                                                 
 batch_normalization (Batch  (None, 448, 64)           256       
 Normalization)                                                  
                                                                 
 conv1d_1 (Conv1D)           (None, 448, 64)           12352     
                                                                 
 batch_normalization_1 (Bat  (None, 448, 64)           256       
 chNormalization)                                                
                                                                 
 max_pooling1d (MaxPooling1  (None, 224, 64)           0         
 D)                                                              
                                                        

In [28]:
model=cnnmodel()
model.fit(train_features,train_labels,epochs=25,batch_size=40,validation_data=(val_features,val_labels))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x21680ab7750>

In [29]:
model.evaluate(val_features,val_labels)



[0.12549765408039093, 0.9706498980522156]

In [30]:
predictions = model.predict(val_features)



In [31]:
from sklearn.metrics import accuracy_score

# Convert the list of predictions to a numpy array
predictions_array = np.squeeze(np.array(predictions))

# Round the predictions to get the predicted class (0 or 1)
rounded_predictions = np.round(predictions_array).astype(int)

# Calculate accuracy
accuracy = accuracy_score(val_labels, rounded_predictions)
print(f"Accuracy on val_labely: {accuracy * 100:.2f}%")

Accuracy on val_labely: 97.06%


In [32]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(val_labels, rounded_predictions)
print(cm)
accuracy_score(val_labels, rounded_predictions)

[[391  21]
 [  7 535]]


0.9706498951781971