In [1]:
import pandas as pd
import numpy as np
from numpy.fft import rfft
from sklearn.preprocessing import normalize
from numpy.linalg import norm, eig
from scipy.signal import cwt, morlet, ricker

In [2]:
data = pd.read_csv('train.csv')

## Preprocessing

In [3]:
patients_data = {p: {} for p in ['p1', 'p2', 'p3', 'p4']}

data = data[data.Stimulus_ID != -1]

for p in ['p1', 'p2', 'p3', 'p4']:
    pat = data[data.PatientID == p]
    pat = pat.select(lambda x: pat[x].iloc[0]!=-999999, axis=1)
    
    ans = pat[pat.Stimulus_Type != 101].groupby(['Stimulus_ID']).head(1).Stimulus_Type.apply(lambda x: 0 if x<51 else 1) 
    patients_data[p]['ans'] = list(ans)
    
    num_epoch = len(patients_data[p]['ans'])
    channels = pat.shape[1] - 3
    time = 800
    pat_data = patients_data[p]['data'] = np.zeros((num_epoch, 1, channels, time)) 
    
    for epoch in range(1, num_epoch+1):
        epoch_data = pat[pat.Stimulus_ID == epoch].iloc[:time, 1:-2]
        pat_data[epoch-1, 0] = ((epoch_data - np.mean(epoch_data, axis=0))/np.std(epoch_data, axis=0)).T
        

## Training

In [None]:
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution1D, MaxPooling2D, AveragePooling2D
from keras.layers.core import Dense, Flatten, Dropout
from keras.optimizers import SGD, Adam, Adadelta, RMSprop, Adagrad
from keras.layers.noise import GaussianNoise
from keras.regularizers import l2
import numpy as np, keras, pickle

class SaveBestModel(keras.callbacks.Callback):
    def __init__(self, patient):
        self.max_acc = 0
        self.min_loss = 100
        self.patient = patient
        
    def on_epoch_end(self, epoch, logs={}):
        val_acc = logs.get('val_acc')
        val_loss = logs.get('val_loss') 
        if val_acc > self.max_acc and val_loss < self.min_loss:
            np.savez(self.patient, data=self.model.get_weights())
            self.max_acc = val_acc
            self.min_loss = val_loss
            

PERCENTAGE = 0.8

for p in ['p1', 'p2', 'p3', 'p4']:
    X, y = patients_data[p]['data'], np.array(patients_data[p]['ans'])
    TRAIN_TEST_SPLIT = int(X.shape[0] * PERCENTAGE)
    X_train, X_test = X[:TRAIN_TEST_SPLIT], X[TRAIN_TEST_SPLIT:]
    Y_train, Y_test = y[:TRAIN_TEST_SPLIT], y[TRAIN_TEST_SPLIT:]

    model = Sequential()
    if p!='p4':
        model.add(GaussianNoise(1, input_shape=X.shape[1:]))
    model.add(Convolution2D(10, X.shape[2], 1, input_shape=X.shape[1:], activation='relu', init='normal'))
    model.add(Convolution2D(5, 1, 16, subsample=(1, 8), activation='relu', init='normal'))
    model.add(Flatten())
    model.add(Dense(30, activation='sigmoid', init='normal'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid', init='normal'))

    model.compile(loss='mse', optimizer=Adagrad(), metrics=['accuracy'])
    
    checkpoint = SaveBestModel(p)
    bsize = 8 if p=='p4' else 4
    model.fit(X_train, Y_train, batch_size=bsize, nb_epoch=30, validation_data=(X_test, Y_test),
             callbacks=[checkpoint])

## Submission preparation

In [5]:
!python2.7 submission.py create sub      