# Fine Tuning of the convolutional network

In [1]:
%pylab
%matplotlib inline

import glob
import os
import mne
CORPORA_PATH = "~/corpora/sets"

file_path = os.path.expanduser(CORPORA_PATH)
files = glob.glob(os.path.join(file_path, "*.set"))

def normalize_subject(X):
    mean = X.mean(axis=(0, 2)).reshape(-1, 1)
    std = X.std(axis=(0, 2)).reshape(-1, 1)
    return (X - mean) / std

def load_data(filename, normalize=True):
    data_mne = mne.io.read_raw_eeglab(filename, preload=True, event_id={"0": 1, "1": 2})
    data_mne.filter(0, 20)
    events = mne.find_events(data_mne)
    epochs = mne.Epochs(
        data_mne, events,
        baseline=(None, 0), tmin=-0.1, tmax=0.7)

    epochs.load_data()
    
    ch_names = epochs.ch_names
    
    X = epochs.get_data()[:, :-1]
    y = (events[:, 2] == 2).astype('float')

    if len(events) != len(epochs):
        raise ValueError("Epochs events mismatch")
    if normalize: 
        X = normalize_subject(X)
    X = X[..., np.newaxis]
    
    return X, y 


Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
from keras.models import load_model
channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4', 'STI 014']

model = load_model("models/model.h5")

Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
X, y = load_data(files[143])

Reading /home/jmperez/corpora/sets/PruebasMuseo_358001.fdt
Reading 0 ... 63231  =      0.000 ...   493.992 secs...
Setting up low-pass filter at 20 Hz
h_trans_bandwidth chosen to be 5.0 Hz
Filter length of 169 samples (1.320 sec) selected
1800 events found
Events id: [1 2]
1800 matching events found
0 projection items activated
Loading data for 1800 events and 104 original time points ...
0 bad epochs dropped


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.1, stratify=y)

In [5]:


from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

def get_metrics(model, X_test, y_test):
    y_pred = model.predict_classes(X_test)
    y_prob = model.predict(X_test)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    accuracy = accuracy_score(y_test, y_pred)

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall, 
        "roc_auc": auc
    }
    
    
get_metrics(model, X_test, y_test)

{'accuracy': 0.56111111111111112,
 'precision': 0.21176470588235294,
 'recall': 0.59999999999999998,
 'roc_auc': 0.57822222222222219}

In [6]:
model.layers

[<keras.layers.convolutional.Conv2D at 0x7efe0a358c18>,
 <keras.layers.convolutional.Conv2D at 0x7efe0a358f28>,
 <keras.layers.core.Flatten at 0x7efe0a358ef0>,
 <keras.layers.core.Dropout at 0x7efe0a3204a8>,
 <keras.layers.core.Dense at 0x7efe0a317dd8>,
 <keras.layers.core.Dense at 0x7efe0a2c7fd0>]

Let's fix the first two convolutional layers

In [None]:

for i in range(4):
    model.layers[i].trainable = False



model.compile(loss='binary_crossentropy', # using the cross-entropy loss function
              optimizer='rmsprop', 
              metrics=['accuracy']) # reporting the accuracy
[(l, "Trainable: {}".format(l.trainable)) for l in model.layers]

[(<keras.layers.convolutional.Conv2D at 0x7efe0a358c18>, 'Trainable: False'),
 (<keras.layers.convolutional.Conv2D at 0x7efe0a358f28>, 'Trainable: False'),
 (<keras.layers.core.Flatten at 0x7efe0a358ef0>, 'Trainable: False'),
 (<keras.layers.core.Dropout at 0x7efe0a3204a8>, 'Trainable: False'),
 (<keras.layers.core.Dense at 0x7efe0a317dd8>, 'Trainable: True'),
 (<keras.layers.core.Dense at 0x7efe0a2c7fd0>, 'Trainable: True')]

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=3)

model.fit(
    X_train, y_train, epochs=30, 
    batch_size=64, class_weight={0:1, 1:6}, validation_split=0.01,
    callbacks=[early_stopping]
)

Train on 1603 samples, validate on 17 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


In [None]:
get_metrics(model, X_test, y_test)

In [None]:
from os.path import basename
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import backend as K

def fix_layers(model, fixed_layers):
    for i in range(fixed_layers):
        model.layers[i].trainable = False
    
    model.compile(loss='binary_crossentropy',
              optimizer='rmsprop', 
              metrics=['accuracy'])
    
def fine_tune(fixed_layers):
    model = load_model("models/model.h5")
    
    fix_layers(model, fixed_layers)
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)

    model.fit(
        X_train, y_train, epochs=10, 
        batch_size=64, class_weight={0:1, 1:6}, validation_split=0.01,
        callbacks=[early_stopping]
    )
    
    return model

def get_analysis(filename, fixed_layers=4):
    K.clear_session()
    model = load_model("models/model.h5")
    X, y = load_data(files[143])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)
    
    ret = {"file" : basename(filename)}
    metrics = {"ft_0_{}".format(k):v for k,v in get_metrics(model, X_test, y_test).items()}
    ret.update(metrics)
    
    model = fine_tune(4)
    metrics = {"ft_4_{}".format(k):v for k,v in get_metrics(model, X_test, y_test).items()}
    ret.update(metrics)
    
    model = fine_tune(5)
    metrics = {"ft_5_{}".format(k):v for k,v in get_metrics(model, X_test, y_test).items()}
    ret.update(metrics)
    
    K.clear_session()
    return ret

get_analysis(files[100], 4)

In [None]:
analysis = [get_analysis(file) for file in files[-10:]]
    

In [None]:
import pandas as pd

df = pd.DataFrame(df)

In [None]:
df