# Time series classification 

## Imports

In [4]:
import os       # using operating system dependent functionality (folders)
import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import copy     # Can Copy and Deepcopy files so original file is untouched.
import matplotlib.pyplot as plt
import mne

import sys
sys.path.insert(0, '../eegyolk') # path to helper functions
import helper_functions as hf # library useful for eeg and erp data cleaning
import epod_helper
import initialization_functions

from tensorflow.keras.layers import Conv1D,BatchNormalization,LeakyReLU,MaxPool1D,\
GlobalAveragePooling1D,Dense,Dropout,AveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session

from sklearn.model_selection import GroupKFold,LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler

In [5]:
metadata = pd.read_csv('metadata.csv', sep = ',')

In [6]:
metadata.head()

Unnamed: 0,eeg_file,ParticipantID,test,sex,age_months,dyslexic_parent,Group_AccToParents,path_eeg,path_epoch,path_eventmarkers,epoch_file
0,105a,105,a,f,17,f,At risk,../../volume-ceph/ePodium_projectfolder/dataset,../../volume-ceph/nadine_storage/processed_epochs,../../volume-ceph/ePodium_projectfolder/events,105a_epo.fif
1,107a,107,a,f,16,m,At risk,../../volume-ceph/ePodium_projectfolder/dataset,../../volume-ceph/nadine_storage/processed_epochs,../../volume-ceph/ePodium_projectfolder/events,107a_epo.fif
2,106a,106,a,m,19,f,At risk,../../volume-ceph/ePodium_projectfolder/dataset,../../volume-ceph/nadine_storage/processed_epochs,../../volume-ceph/ePodium_projectfolder/events,106a_epo.fif
3,109a,109,a,m,21,m,At risk,../../volume-ceph/ePodium_projectfolder/dataset,../../volume-ceph/nadine_storage/processed_epochs,../../volume-ceph/ePodium_projectfolder/events,109a_epo.fif
4,110a,110,a,m,17,m,At risk,../../volume-ceph/ePodium_projectfolder/dataset,../../volume-ceph/nadine_storage/processed_epochs,../../volume-ceph/ePodium_projectfolder/events,110a_epo.fif


In [7]:
metadata['Group_AccToParents'] = np.where(
    (metadata['Group_AccToParents']=='At risk'), 1,0)

# Get input data

In [8]:
control_files= metadata.loc[metadata['Group_AccToParents'] == 0]
atrisk_files = metadata.loc[metadata['Group_AccToParents'] == 1]

In [9]:
def read_filtered_data(metadata, to_array=False, verbose=False):
    epochs = []
    for index, file in metadata.iterrows():
        print(f"Checking out file: {file['epoch_file']}")
        path = os.path.join(file['path_epoch'], file['epoch_file'])
        epoch = mne.read_epochs(path, preload=False, verbose=verbose)
        if to_array ==True: 
            epoch = epoch.get_data()
        epochs.append(epoch)
    return epochs

In [10]:
control_epochs = initialization_functions.read_filtered_data(control_files, to_array=True)

Checking out file: 117a_epo.fif
Loading data for 2435 events and 2049 original time points ...
Checking out file: 118a_epo.fif
Loading data for 2418 events and 2049 original time points ...
Checking out file: 119a_epo.fif
Loading data for 2325 events and 2049 original time points ...
Checking out file: 124a_epo.fif
Loading data for 2402 events and 2049 original time points ...
Checking out file: 127a_epo.fif
Loading data for 2367 events and 2049 original time points ...
Checking out file: 126a_epo.fif
Loading data for 2333 events and 2049 original time points ...
Checking out file: 131a_epo.fif
Loading data for 1628 events and 2049 original time points ...
Checking out file: 135a_epo.fif
Loading data for 2440 events and 2049 original time points ...
Checking out file: 133a_epo.fif
Loading data for 1628 events and 2049 original time points ...
Checking out file: 138a_epo.fif
Loading data for 1585 events and 2049 original time points ...
Checking out file: 139a_epo.fif
Loading data for 2

In [None]:
atrisk_epochs = initialization_functions.read_filtered_data(atrisk_files, to_array=True)

Checking out file: 105a_epo.fif
Loading data for 2425 events and 2049 original time points ...
Checking out file: 107a_epo.fif
Loading data for 2421 events and 2049 original time points ...
Checking out file: 106a_epo.fif
Loading data for 901 events and 2049 original time points ...
Checking out file: 109a_epo.fif
Loading data for 2385 events and 2049 original time points ...
Checking out file: 110a_epo.fif
Loading data for 2334 events and 2049 original time points ...
Checking out file: 112a_epo.fif
Loading data for 2395 events and 2049 original time points ...
Checking out file: 111a_epo.fif
Loading data for 2406 events and 2049 original time points ...
Checking out file: 114a_epo.fif
Loading data for 2114 events and 2049 original time points ...
Checking out file: 115a_epo.fif
Loading data for 2439 events and 2049 original time points ...
Checking out file: 116a_epo.fif
Loading data for 2158 events and 2049 original time points ...
Checking out file: 123a_epo.fif
Loading data for 18

In [1]:
control_labels = control_files['Group_AccToParents'][:3].tolist()
atrisk_labels = atrisk_files['Group_AccToParents'][:3].tolist()

NameError: name 'control_files' is not defined

In [None]:
control_labels=[len(i)*[0] for i in control_epochs]
atrisk_labels=[len(i)*[1] for i in atrisk_epochs]

In [None]:
data_list = control_epochs+atrisk_epochs
label_list = control_labels+atrisk_labels

In [None]:
# a list to 
groups_list=[[i]*len(j) for i, j in enumerate(data_list)]

In [None]:
data_array=np.vstack(data_list)
label_array=np.hstack(label_list)
group_array=np.hstack(groups_list)
data_array=np.moveaxis(data_array,1,2)

print(data_array.shape,label_array.shape,group_array.shape) #number of segments, length, channels

In [None]:
#def input_ts_prep(epoch, standard_events, deviant_events): 
#    print('checkpoint')
#    std_evoked = epoch[standard_events].average() 
#    dev_evoked = epoch[deviant_events].average()
#
#    # calculate the mismatch response between standard and deviant evoked
#    evoked_diff = mne.combine_evoked([std_evoked, dev_evoked], weights=[1, -1])#.get_data() # mismatch for all channels per participant
#        
#  
#    return evoked_diff

In [None]:
standard_events = ['GiepM_S'] # standards: 'GiepM_S','GiepS_S','GopM_S','GopS_S'
deviant_events = ['GiepM_D'] # deviants: 'GiepM_D','GiepS_D','GopM_D','GopS_D'

In [None]:
def cnnmodel():
    clear_session()
    model=Sequential()
    model.add(Conv1D(filters=5,kernel_size=3,strides=1,input_shape=(2049, 32)))#1
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(MaxPool1D(pool_size=2,strides=2))#2
    model.add(Conv1D(filters=5,kernel_size=3,strides=1))#3
    model.add(LeakyReLU())
    model.add(MaxPool1D(pool_size=2,strides=2))#4
    model.add(Dropout(0.5))
    model.add(Conv1D(filters=5,kernel_size=3,strides=1))#5
    model.add(LeakyReLU())
    model.add(AveragePooling1D(pool_size=2,strides=2))#6
    model.add(Dropout(0.5))
    model.add(Conv1D(filters=5,kernel_size=3,strides=1))#7
    model.add(LeakyReLU())
    model.add(AveragePooling1D(pool_size=2,strides=2))#8
    model.add(Conv1D(filters=5,kernel_size=3,strides=1))#9
    model.add(LeakyReLU())
    model.add(GlobalAveragePooling1D())#10
    model.add(Dense(1,activation='sigmoid'))#11
    
    model.compile('adam',loss='binary_crossentropy',metrics=['accuracy'])
    return model

model=cnnmodel()
model.summary()

In [None]:
gkf=GroupKFold(n_splits=4)

In [None]:
accuracy=[]
for train_index, val_index in gkf.split(data_array, label_array, groups=group_array):
    train_features,train_labels=data_array[train_index],label_array[train_index]
    val_features,val_labels=data_array[val_index],label_array[val_index]
    scaler=StandardScaler()
    train_features = scaler.fit_transform(train_features.reshape(-1, train_features.shape[-1])).reshape(train_features.shape)
    val_features = scaler.transform(val_features.reshape(-1, val_features.shape[-1])).reshape(val_features.shape)
    model=cnnmodel()
    model.fit(train_features,train_labels,epochs=50,batch_size=32,validation_data=(val_features,val_labels))
    accuracy.append(model.evaluate(val_features,val_labels)[1])

In [None]:
acc = np.mean(accuracy)
acc

# Garbage 

In [None]:
tot_epoch = []
for epoch in epochs:
    arr_epoch = input_ts_prep(epoch, standard_events, deviant_events)
    tot_epoch.append(arr_epoch)

In [None]:
X = tot_epoch

In [None]:
test = epochs[6].get_data()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
X_train.shape #no of epochs, channels, length of signal

In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

In [None]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)