In [None]:
import pandas as pd
import numpy as np
from numpy.fft import rfft
from sklearn.preprocessing import normalize
from numpy.linalg import norm, eig
from scipy.signal import cwt, morlet, ricker

In [None]:
data = pd.read_csv('train.csv')

## Preprocessing

In [None]:
patients_data = {p: {} for p in ['p1', 'p2', 'p3', 'p4']}

data = data[data.Stimulus_ID != -1]

for p in ['p1', 'p2', 'p3', 'p4']:
    pat = data[data.PatientID == p]
    pat = pat.select(lambda x: pat[x].iloc[0]!=-999999, axis=1)
    
    ans = pat[pat.Stimulus_Type != 101].groupby(['Stimulus_ID']).head(1).Stimulus_Type.apply(lambda x: 0 if x<51 else 1) 
    patients_data[p]['ans'] = list(ans)
    
    num_epoch = len(patients_data[p]['ans'])
    channels = pat.shape[1] - 3
    if p=='p1':
        left, right = 0, 800
    elif p=='p2':
        left, right = 0, 800
    else:
        left, right = 0, 800
    pat_data = patients_data[p]['data'] = np.zeros((num_epoch, 1, channels, right-left))
    
    for epoch in range(1, num_epoch+1):
        epoch_data = pat[pat.Stimulus_ID == epoch].iloc[left:right, 1:-2]
        pat_data[epoch-1, 0] = ((epoch_data - np.mean(epoch_data, axis=0))/np.std(epoch_data, axis=0)).T
        

## Single classifier for each patient

In [None]:
from keras.models import Sequential, Model
from keras.layers.convolutional import Convolution2D, Convolution1D, MaxPooling2D, AveragePooling2D
from keras.layers.core import Dense, Flatten, Dropout
from keras.layers import Input
from keras.optimizers import SGD, Adam, Adadelta, RMSprop, Adagrad
from keras.layers.noise import GaussianNoise
from keras.regularizers import l2, l1, activity_l1l2
from keras.callbacks import EarlyStopping
from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import accuracy_score
import numpy as np, keras, pickle
from scipy.stats import gmean
import json, theano

In [None]:
def create_model(patient, shape):
    
    model = Sequential()
    model.add(Convolution2D(5, shape[1], 1, input_shape=shape, activation='tanh', init='normal'))
    model.add(Convolution2D(5, 1, 10, subsample=(1, 5), activation='tanh', init='normal'))
    model.add(Flatten())
    model.add(Dense(10, activation='sigmoid', init='normal'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid', init='normal'))

    model.compile(loss='mse', optimizer=Adagrad(), metrics=['accuracy'])
    
    return model

def get_activations(model, layer, X_batch):
    get_activations = theano.function([model.layers[0].input], model.layers[layer].output, 
                                      allow_input_downcast=True)
    activations = get_activations(X_batch)
    return activations

In [None]:
np.random.seed(seed=42)

validate = False
FOLDS = 5

for p in ['p1']:
    X, y = patients_data[p]['data'][:, :, :, 600:700], np.array(patients_data[p]['ans'])
    features = patients_data[p]['features'] = np.zeros((200, 10))

    if validate:
        skf = StratifiedKFold(y, n_folds=FOLDS, random_state=42, shuffle=True)

    nb_epoch = 100 
    bsize = 80

    if validate:
        acc = []
        for train_index, test_index in skf:
            model = create_model(p, X.shape[1:])
            model.fit(X[train_index], y[train_index], batch_size=bsize, nb_epoch=nb_epoch, verbose=0)


            ans = model.predict_classes(X[test_index], verbose=0, batch_size=test_index.shape[0])
            acc.append(accuracy_score(y[test_index], ans))
            print(accuracy_score(y[test_index], ans))
        print('left: ', left, 'right: ', right)
        print('mean: ', np.mean(acc))
        print('std: ', np.std(acc))
        print('--------------')

    else:
        model = create_model(p, X.shape[1:])
        model.fit(X, y, batch_size=bsize, nb_epoch=nb_epoch, verbose=0)
        np.savez(p, data=model.get_weights())

## Ensembles (crops by time)

In [None]:
np.random.seed(seed=42)
            
validate = True
FOLDS = 5

windows = {'p1': [[200, 800], [300, 800], [600, 700], [0, 800]],
           'p2': [[100, 800], [200, 700], [600, 700], [0, 800]],
           'p3': [[200, 700], [200, 800], [400, 700], [500, 800], [0, 800]],
           'p4': [[100, 800], [300, 700], [500, 800], [0, 800]]}

models = {'p1': [], 'p2': [], 'p3': [], 'p4': []}

for p in ['p1', 'p2', 'p3', 'p4']:
    X, y = patients_data[p]['data'], np.array(patients_data[p]['ans'])
    if validate:
        skf = StratifiedKFold(y, n_folds=FOLDS, random_state=42, shuffle=True)

    nb_epoch = 100 
    bsize=80
    
    if validate:
        acc = []
        for train_index, test_index in skf:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            for w in windows[p]:
                model = create_model(p, (X_train.shape[1], X_train.shape[2], w[1]-w[0]))
                models[p].append(model)
                models[p][-1].fit(X_train[:, :, :, w[0]:w[1]], y_train, batch_size=bsize, 
                                  nb_epoch=nb_epoch, verbose=0)

            probs = np.zeros((test_index.shape[0], len(windows[p])))
            for i, w in enumerate(windows[p]):
                probs[:, i] = models[p][i].predict_proba(X_test[:, :, :, w[0]:w[1]], verbose=0, 
                                                         batch_size=test_index.shape[0]).flatten()
            
            models[p].clear()
            ans = calibrate(gmean(probs, axis=1))
            acc.append(accuracy_score(y_test, ans))
            print(accuracy_score(y[test_index], ans))
        print('mean: ', np.mean(acc))
        print('std: ', np.std(acc))
        print('--------------')

    else:
        for i, w in enumerate(windows[p]):
            model = create_model(p, (X.shape[1], X.shape[2], w[1]-w[0]))
            model.fit(X[:, :, :, w[0]:w[1]], y, batch_size=bsize, 
                              nb_epoch=nb_epoch, verbose=0)
            np.savez(p+'_'+str(i), data=model.get_weights())
        

## Submission preparation

In [None]:
!python2.7 submission.py create sub_ensemble