# CNN for many subjects


Primero:

Cargamos los datos y los normalizamos. Para esto, primero pasamos un filtro pasa-bajo de 0 a 20hz, luego lo normalizamos a $N(0, 1)$

In [1]:
%pylab
%matplotlib inline

import sys
if ".." not in sys.path:
    sys.path.append("..")
import glob
import os
import mne
from keras import backend as K
from p300.preprocessing import normalize_subject, load_data

print("GPU's disponibles = {}".format(K.tensorflow_backend._get_available_gpus()))

CORPORA_PATH = "~/projects/corpora/P3Speller/P3Speller-old-y-datos/sets"

file_path = os.path.expanduser(CORPORA_PATH)
files = glob.glob(os.path.join(file_path, "*.set"))



Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


GPU's disponibles = ['/job:localhost/replica:0/task:0/device:GPU:0']


Targets appear as 2 in the third column


We remove last channel as well

In [2]:
%%capture

# this line is to avoid output

no_subjects_to_use = 30

training_files = files[:no_subjects_to_use]


X_train, y_train = load_data(training_files)


In [3]:
from sklearn.utils import class_weight
y_t = y_train.reshape(-1)
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_t), y_t)

class_weights = dict(zip([0,1], class_weights))

print("Class weights: {}".format(class_weights))
print(X_train.shape)

Class weights: {0: 0.6, 1: 3.0}
(58500, 14, 104, 1)


In [4]:
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Flatten, Dense, Dropout

model = Sequential()
activation = 'relu'

n_kernels = 12
model.add(Conv2D(n_kernels, (14, 1), padding='same',
                activation=activation, input_shape=(14, 104, 1)))
model.add(Conv2D(5*n_kernels, (1, 13), padding='same',
                activation=activation))
model.add(Flatten())
model.add(Dropout(0.45))
model.add(Dense(128, activation=activation))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy


Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpointer = ModelCheckpoint(filepath='models/model_cnn_1.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

model.fit(
    X_train, y_train, epochs=40, 
    batch_size=256, class_weight=class_weights, validation_split=0.10,
    callbacks=[checkpointer, early_stopping]
)

Train on 52650 samples, validate on 5850 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40


<keras.callbacks.History at 0x7f1097dabe48>

Let's fix the first four layers.

In [6]:

def fix_layers(model, fixed_layers):
    for i in range(fixed_layers):
        model.layers[i].trainable = False
    
    model.compile(loss='binary_crossentropy',
              optimizer='rmsprop', 
              metrics=['accuracy'])
    
fix_layers(model, 4)    

[(l, "Trainable: {}".format(l.trainable)) for l in model.layers]

[(<keras.layers.convolutional.Conv2D at 0x7f1097da4978>, 'Trainable: False'),
 (<keras.layers.convolutional.Conv2D at 0x7f1097da4908>, 'Trainable: False'),
 (<keras.layers.core.Flatten at 0x7f1097da4be0>, 'Trainable: False'),
 (<keras.layers.core.Dropout at 0x7f1097dc2160>, 'Trainable: False'),
 (<keras.layers.core.Dense at 0x7f1097dce748>, 'Trainable: True'),
 (<keras.layers.core.Dense at 0x7f1097dced30>, 'Trainable: True')]

Now, the idea is to train each subject and fine tune the last layers.

In [17]:
%%capture output
from keras import backend as K
from keras.models import load_model
from sklearn.metrics import (
    precision_score, 
    recall_score, 
    roc_auc_score, 
    accuracy_score, 
    f1_score
)
from p300.preprocessing import normalize_subject, load_data, load_data_from_subject

file = files[130]

def get_fine_tune_results(model_path, file):
    K.clear_session()
    
    model = load_model(model_path)
    
    fix_layers(model, 4)

    X_sub, y_sub = load_data([file])

    length = X_sub.shape[0] 
    limit = int(length / 2)
    X_sub_train, X_sub_test = X_sub[:limit], X_sub[limit:]
    y_sub_train, y_sub_test = y_sub[:limit], y_sub[limit:]
    
    model.fit(
        X_sub_train, y_sub_train, epochs=20, 
        batch_size=64, class_weight=class_weights, validation_split=0.01,
        callbacks=[early_stopping]
    )
    
    y_pred = model.predict_classes(X_sub_test)
    y_prob = model.predict(X_sub_test)

    precision = precision_score(y_sub_test, y_pred)
    recall = recall_score(y_sub_test, y_pred)
    auc = roc_auc_score(y_sub_test, y_prob)
    accuracy = accuracy_score(y_sub_test, y_pred)
    f1 = f1_score(y_sub_test, y_pred)
    
    subject_name = file.split("/")[-1].split(".")[0].split("_")[-1]
    K.clear_session()
    return {
        "subject": subject_name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1": f1
    }, model
    



results, _ = get_fine_tune_results('models/model_cnn_1.h5', file)



In [18]:
print(output)

results

Reading /home/jmperez/projects/corpora/P3Speller/P3Speller-old-y-datos/sets/PruebasMuseo_17436001.fdt
Reading 0 ... 63231  =      0.000 ...   493.992 secs...
Setting up low-pass filter at 20 Hz
h_trans_bandwidth chosen to be 5.0 Hz
Filter length of 169 samples (1.320 sec) selected
1800 events found
Events id: [1 2]
1800 matching events found
0 projection items activated
Loading data for 1800 events and 104 original time points ...
0 bad epochs dropped
Train on 891 samples, validate on 9 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20



{'Accuracy': 0.7244444444444444,
 'F1': 0.3333333333333333,
 'Precision': 0.27927927927927926,
 'Recall': 0.41333333333333333,
 'subject': '17436001'}