# CNN for many subjects


Primero:

Cargamos los datos y los normalizamos. Para esto, primero pasamos un filtro pasa-bajo de 0 a 20hz, luego lo normalizamos a $N(0, 1)$

In [1]:
%pylab
%matplotlib inline

import sys
if ".." not in sys.path:
    sys.path.append("..")
import glob
import os
import mne
from keras import backend as K
from p300.preprocessing import normalize_subject, load_data

print("GPU's disponibles = {}".format(K.tensorflow_backend._get_available_gpus()))

CORPORA_PATH = "~/projects/corpora/P3Speller/P3Speller-old-y-datos/sets"

file_path = os.path.expanduser(CORPORA_PATH)
files = glob.glob(os.path.join(file_path, "*.set"))



Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


GPU's disponibles = ['/job:localhost/replica:0/task:0/device:GPU:0']


Targets appear as 2 in the third column


We remove last channel as well

In [2]:
%%capture

# this line is to avoid output

no_subjects_to_use = 30

training_files = files[:no_subjects_to_use]


X_train, y_train = load_data(training_files)


In [3]:
from sklearn.utils import class_weight
y_t = y_train.reshape(-1)
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_t), y_t)

class_weights = dict(zip([0,1], class_weights))

print("Class weights: {}".format(class_weights))
print(X_train.shape)

Class weights: {0: 0.6, 1: 3.0}
(58500, 14, 104, 1)


In [4]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout

model = Sequential()
activation = 'relu'

n_kernels = 12
model.add(Conv2D(n_kernels, (14, 1), padding='same',
                activation=activation, input_shape=(14, 104, 1)))
model.add(Conv2D(5*n_kernels, (1, 13), padding='same',
                activation=activation))
model.add(Flatten())
model.add(Dropout(0.45))
model.add(Dense(128, activation=activation))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy


Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='models/model_cnn_1.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

model.fit(
    X_train, y_train, epochs=40, 
    batch_size=256, class_weight=class_weights, validation_split=0.10,
    callbacks=[checkpointer, early_stopping]
)

Train on 52650 samples, validate on 5850 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40


<keras.callbacks.History at 0x7f83fc39f7f0>

Let's fix the first four layers.

In [6]:

for i in range(4):
    model.layers[i].trainable = False



model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy
[(l, "Trainable: {}".format(l.trainable)) for l in model.layers]

[(<keras.layers.convolutional.Conv2D at 0x7f83f9531a20>, 'Trainable: False'),
 (<keras.layers.convolutional.Conv2D at 0x7f83f95319b0>, 'Trainable: False'),
 (<keras.layers.core.Flatten at 0x7f83fc0f4b00>, 'Trainable: False'),
 (<keras.layers.core.Dropout at 0x7f83f95ab0f0>, 'Trainable: False'),
 (<keras.layers.core.Dense at 0x7f83f95b0d30>, 'Trainable: True'),
 (<keras.layers.core.Dense at 0x7f83f95b0f60>, 'Trainable: True')]

Now, the idea is to train each subject and fine tune the last layers.

In [10]:
%%capture
from p300.preprocessing import normalize_subject, load_data, load_data_from_subject

file = files[130]

X_sub, y_sub = load_data([file])

length = X_sub.shape[0] 
limit = int(length / 2)
X_sub_train, X_sub_test = X_sub[:limit], X_sub[limit:]
y_sub_train, y_sub_test = y_sub[:limit], y_sub[limit:]


In [12]:
X_sub_train.shape, X_sub_test.shape

((900, 14, 104, 1), (900, 14, 104, 1))

In [15]:
model.fit(
        X_sub_train, y_sub_train, epochs=10, 
        batch_size=64, class_weight={0:1, 1:6}, validation_split=0.01,
        callbacks=[early_stopping]
    )

Train on 891 samples, validate on 9 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f8370668ac8>

In [17]:
from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score, f1_score

def print_results(model, X_test, y_test):
    y_pred = model.predict_classes(X_test)
    y_prob = model.predict(X_test)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print("""
    Accuracy   = {}
    Precision  = {}
    Recall     = {}
    ROC AUC    = {}
    F1         = {}
    """.format(accuracy, precision, recall, auc, f1))
    
print_results(model, X_sub_test, y_sub_test)


    Accuracy   = 0.7488888888888889
    Precision  = 0.3
    Recall     = 0.38
    ROC AUC    = 0.6240711111111111
    F1         = 0.3352941176470588
    


In [None]:

def fix_layers(model, fixed_layers):
    for i in range(fixed_layers):
        model.layers[i].trainable = False
    
    model.compile(loss='binary_crossentropy',
              optimizer='rmsprop', 
              metrics=['accuracy'])

def fine_tune(model, fixed_layers):
    fix_layers(model, fixed_layers)
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)

    model.fit(
        X_train, y_train, epochs=10, 
        batch_size=64, class_weight={0:1, 1:6}, validation_split=0.01,
        callbacks=[early_stopping]
    )
    
    return model

def get_analysis(filename, fixed_layers=4):
    K.clear_session()
    X, y = load_data(files[143])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)
    
    ret = {"file" : basename(filename)}
    metrics = {"ft_0_{}".format(k):v for k,v in get_metrics(model, X_test, y_test).items()}
    ret.update(metrics)
    
    model = fine_tune(4)
    metrics = {"ft_4_{}".format(k):v for k,v in get_metrics(model, X_test, y_test).items()}
    ret.update(metrics)
    
    model = fine_tune(5)
    metrics = {"ft_5_{}".format(k):v for k,v in get_metrics(model, X_test, y_test).items()}
    ret.update(metrics)
    
    K.clear_session()
    return ret

get_analysis(files[100], 4)

In [6]:
from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score, f1_score


def print_results(model):
    y_pred = model.predict_classes(X_test)
    y_prob = model.predict(X_test)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print("""
    Accuracy   = {}
    Precision  = {}
    Recall     = {}
    ROC AUC    = {}
    F1         = {}
    """.format(accuracy, precision, recall, auc, f1))
    

print_results(model)


    Accuracy   = 0.5842592592592593
    Precision  = 0.22590384179938108
    Recall     = 0.6158436213991769
    ROC AUC    = 0.6375042930447594
    F1         = 0.3305538682423105
    


In [7]:
from keras.models import load_model

model_2 = load_model('models/model_cnn_1.h5')

print_results(model_2)


    Accuracy   = 0.6500342935528121
    Precision  = 0.24240963855421688
    Recall     = 0.5174897119341564
    ROC AUC    = 0.6364045919490593
    F1         = 0.33016081391532653
    


In [None]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout, MaxPool2D

model = Sequential()

n_kernels = 10
model.add(Conv2D(n_kernels, (14, 1), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(Conv2D(5*n_kernels, (1, 13), padding='same',
                activation='relu'))
model.add(MaxPool2D((1, 4)))
model.add(Flatten())
model.add(Dropout(0.45))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy

In [25]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='models/model_with_maxpool.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

model.fit(
    X_train, y_train, epochs=30, 
    batch_size=256, class_weight={0:1, 1:6}, validation_split=0.01,
    callbacks=[checkpointer]
)

Train on 87674 samples, validate on 886 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7ff1f8884048>

In [26]:
from keras.models import load_model


y_pred = model.predict_classes(X_test)
y_prob = model.predict(X_test)


from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
accuracy = accuracy_score(y_test, y_pred)

print("""
Accuracy   = {}
Precision  = {}
Recall     = {}
ROC AUC    = {}
""".format(accuracy, precision, recall, auc))


Accuracy   = 0.6778472222222223
Precision  = 0.20906964656964658
Recall     = 0.33520833333333333
ROC AUC    = 0.5621011501736111



## Model with two layers

In [28]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout, MaxPool2D

model = Sequential()

n_kernels = 10
model.add(Conv2D(2*n_kernels, (14, 1), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(Conv2D(5*n_kernels, (1, 13), padding='same',
                activation='relu'))
model.add(MaxPool2D((1, 4)))
model.add(Dropout(0.35))
model.add(Conv2D(n_kernels, (14, 1), padding='same', 
                activation='relu', input_shape=(14, 104, 1)))
model.add(Conv2D(2*n_kernels, (1, 13), padding='same',
                activation='relu'))
model.add(MaxPool2D((1, 4)))

model.add(Flatten())
model.add(Dropout(0.35))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy

In [29]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='model.2conv_with_maxpool.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

model.fit(
    X_train, y_train, epochs=30, 
    batch_size=256, class_weight={0:1, 1:6}, validation_split=0.01,
    callbacks=[checkpointer]
)

Train on 87674 samples, validate on 886 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7ff1f3efd5c0>

In [30]:
from keras.models import load_model
from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score


y_pred = model.predict_classes(X_test)
y_prob = model.predict(X_test)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
accuracy = accuracy_score(y_test, y_pred)

print("""
Accuracy   = {}
Precision  = {}
Recall     = {}
ROC AUC    = {}
""".format(accuracy, precision, recall, auc))


Accuracy   = 0.4284375
Precision  = 0.18658280922431866
Recall     = 0.723125
ROC AUC    = 0.5781777647569444



## Conv with two layers but one simple

In [32]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='model.2conv_with_maxpool.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

model.fit(
    X_train, y_train, epochs=30, 
    batch_size=256, class_weight={0:1, 1:6}, validation_split=0.01,
    callbacks=[checkpointer, early_stopping]
)

Train on 87674 samples, validate on 886 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<keras.callbacks.History at 0x7ff1f3f581d0>

In [33]:
from keras.models import load_model


y_pred = model.predict_classes(X_test)
y_prob = model.predict(X_test)


from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
accuracy = accuracy_score(y_test, y_pred)

print("""
Accuracy   = {}
Precision  = {}
Recall     = {}
ROC AUC    = {}
""".format(accuracy, precision, recall, auc))


Accuracy   = 0.48006944444444444
Precision  = 0.19064704451471662
Recall     = 0.653125
ROC AUC    = 0.5707269444444445

