# CNN for many subjects

$$ x = y^2 + 2$$

In [1]:
%pylab
%matplotlib inline

import glob
import os
import mne
CORPORA_PATH = "~/corpora/sets"

file_path = os.path.expanduser(CORPORA_PATH)
files = glob.glob(os.path.join(file_path, "*.set"))

def normalize_subject(X):
    mean = X.mean(axis=(0, 2)).reshape(-1, 1)
    std = X.std(axis=(0, 2)).reshape(-1, 1)
    return (X - mean) / std

def load_data(filename, normalize=True):
    data_mne = mne.io.read_raw_eeglab(filename, preload=True, event_id={"0": 1, "1": 2})
    data_mne.filter(0, 20)
    events = mne.find_events(data_mne)
    epochs = mne.Epochs(
        data_mne, events,
        baseline=(None, 0), tmin=-0.1, tmax=0.7)

    epochs.load_data()
    
    ch_names = epochs.ch_names
    
    X = epochs.get_data()[:, :-1]
    y = (events[:, 2] == 2).astype('float')

    if len(events) != len(epochs):
        raise ValueError("Epochs events mismatch")
    if normalize: 
        X = normalize_subject(X)
    
    
    return X, y 


Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


Targets appear as 2 in the third column


We remove last channel as well

# CNN with more data

In [2]:
%%capture 

filenames = files


X = None
y = None
print(filenames)
for filename in filenames:
    try:
        X_subject, y_subject = load_data(filename)

        if X is None:
            X, y = X_subject, y_subject
        else:
            X = np.vstack((X, X_subject))
            y = np.vstack((y.reshape(-1,1), y_subject.reshape(-1,1)))
    except ValueError as e:
        print(e)

In [3]:
X.shape

(287640, 14, 104)

In [4]:
X_t = X[:, :, :, np.newaxis]

X_t.shape

(287640, 14, 104, 1)

In [5]:
 sum(y) / len(y)

0.16666666666666666

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test= train_test_split(X_t, y, test_size=0.1, stratify=y)

X_train.shape, y_train.shape

((258876, 14, 104, 1), (258876, 1))

In [13]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout

model = Sequential()

n_kernels = 10
model.add(Conv2D(n_kernels, (14, 1), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(Conv2D(5*n_kernels, (1, 13), padding='same',
                activation='relu'))
model.add(Flatten())
model.add(Dropout(0.35))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.50))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy


In [14]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='model.2.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)


model.fit(
    X_train, y_train, epochs=50, 
    batch_size=256, class_weight={0:1, 1:6}, validation_split=0.1,
    callbacks=[checkpointer, early_stopping]
)

Train on 232988 samples, validate on 25888 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50


<keras.callbacks.History at 0x7feb1a6dbf98>

In [17]:
from keras.models import load_model

model = load_model("model.2.h5")

y_pred = model.predict_classes(X_test)
y_prob = model.predict(X_test)


from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
accuracy = accuracy_score(y_test, y_pred)

print("""
Accuracy   = {}
Precision  = {}
Recall     = {}
ROC AUC    = {}
""".format(accuracy, precision, recall, auc))


Accuracy   = 0.43391044361006814
Precision  = 0.19811866099111883
Recall     = 0.7863996662494785
ROC AUC    = 0.6359560579217973



## Model with two layers

In [18]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout, MaxPool2D

model = Sequential()

n_kernels = 10
model.add(Conv2D(2*n_kernels, (14, 1), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(Conv2D(5*n_kernels, (1, 13), padding='same',
                activation='relu'))
model.add(MaxPool2D((1, 4)))
model.add(Dropout(0.35))
model.add(Conv2D(n_kernels, (14, 1), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(Conv2D(2*n_kernels, (1, 13), padding='same',
                activation='relu'))
model.add(MaxPool2D((1, 4)))

model.add(Flatten())
model.add(Dropout(0.35))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.35))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='adam', 
              metrics=['accuracy']) # reporting the accuracy

In [19]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='model.2conv_with_maxpool.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

model.fit(
    X_train, y_train, epochs=30, 
    batch_size=256, class_weight={0:1, 1:6}, validation_split=0.1,
    callbacks=[checkpointer, early_stopping]
)

Train on 232988 samples, validate on 25888 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


<keras.callbacks.History at 0x7feb1a75c518>

In [21]:
from keras.models import load_model

model = load_model("model.2conv_with_maxpool.h5")

y_pred = model.predict_classes(X_test)
y_prob = model.predict(X_test)


from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
accuracy = accuracy_score(y_test, y_pred)

print("""
Accuracy   = {}
Precision  = {}
Recall     = {}
ROC AUC    = {}
""".format(accuracy, precision, recall, auc))


Accuracy   = 0.5206160478375748
Precision  = 0.21790127328608166
Recall     = 0.7246558197747184
ROC AUC    = 0.6565931740221097



## Conv with two layers but one simple

In [28]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout, MaxPool2D

model = Sequential()

n_kernels = 10
model.add(Conv2D(n_kernels, (14, 14), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(MaxPool2D((1, 4)))

model.add(Flatten())
model.add(Dropout(0.35))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.50))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy

In [29]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='model.3conv_with_maxpool.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=2)

model.fit(
    X_train, y_train, epochs=10, 
    batch_size=256, class_weight={0:1, 1:6}, validation_split=0.1,
    callbacks=[checkpointer, early_stopping]
)

Train on 232988 samples, validate on 25888 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


<keras.callbacks.History at 0x7feb19cfee80>

In [30]:
from keras.models import load_model


y_pred = model.predict_classes(X_test)
y_prob = model.predict(X_test)


from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
accuracy = accuracy_score(y_test, y_pred)

print("""
Accuracy   = {}
Precision  = {}
Recall     = {}
ROC AUC    = {}
""".format(accuracy, precision, recall, auc))


Accuracy   = 0.4183354192740926
Precision  = 0.1965838035687052
Recall     = 0.8066332916145181
ROC AUC    = 0.6368764216291085

