In [None]:
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import interp
from itertools import cycle
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from sklearn.model_selection import cross_val_score
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D,Flatten

Used this https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html

In [None]:
path = 'parkinsons_disease_mobile_tracker/processed_data/'
X_train,y_train = pd.read_csv(path+'X_train_final.csv'), list(pd.read_csv(path+'y_train.csv')['on_off'])
X_test,y_test = pd.read_csv(path+'X_test_final.csv'),list(pd.read_csv(path+'y_test.csv')['on_off'])

In [None]:
X_train_list, X_test_list = [],[]
path = 'cis_training_data/'
for index,row in X_train.iterrows():
    measurement_id = row['measurement_id']
    features = np.array([row[:-2]]) # ignores subject_id and measurement_ids
    time_steps = pd.read_csv(path + measurement_id +'.csv').drop(['Timestamp'], axis=1).to_numpy()
    features = np.repeat(features,len(time_steps),axis=0) 
    X_train_list.append(np.append(time_steps,features,axis=1))
for index,row in X_test.iterrows():
    measurement_id = row['measurement_id']
    features = np.array([row[:-2]])
    time_steps = pd.read_csv(path + measurement_id +'.csv').drop(['Timestamp'], axis=1).to_numpy()
    features = np.repeat(features,len(time_steps),axis=0) 
    X_test_list.append(np.append(time_steps,features,axis=1))

In [None]:
max_train_shape = max(example.shape[0] for example in X_train_list)
max_test_shape = max(example.shape[0] for example in X_test_list)
max_shape = max(max_train_shape,max_test_shape)

for i in range(len(X_train_list)):
    example = X_train_list[i]
    if example.shape[0] < max_shape:
        padding = np.array([np.zeros(24) for _ in range(max_shape - example.shape[0])])
        example = np.concatenate((example, padding))
    X_train_list[i] = example
for i in range(len(X_test_list)):
    example = X_test_list[i]
    if example.shape[0] < max_shape:
        padding = np.array([np.zeros(24) for _ in range(max_shape - example.shape[0])])
        example = np.concatenate((example, padding))
    X_test_list[i] = example
    
X_train_list = np.array(X_train_list)
X_test_list = np.array(X_test_list)

In [None]:
mapping_dict = {0:0,1:1,2:1,3:1,4:1}
y_train_list = [mapping_dict[i] for i in y_train]#label_binarize([mapping_dict[i] for i in y_train], classes=[0, 1])
y_test_list = [mapping_dict[i] for i in y_test]#label_binarize([mapping_dict[i] for i in y_test], classes=[0, 1])

taken from https://stackoverflow.com/questions/41032551/how-to-compute-receiving-operating-characteristic-roc-and-auc-in-keras

In [None]:
from sklearn.metrics import roc_auc_score
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
from keras.callbacks import Callback

class RocCallback(Callback):
    def __init__(self,training_data,validation_data):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]


    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        print('----------- predicting -----------')
        y_pred_train = self.model.predict_proba(self.x)
        roc_train = roc_auc_score(self.y, y_pred_train)
        y_pred_val = self.model.predict_proba(self.x_val)
        roc_val = roc_auc_score(self.y_val, y_pred_val)
        print('Custom Accuracy Train:', sum(int(abs(y_pred_train[i][0]>0.5 - self.y[i])) for i in range(len(y_pred_train)))/len(y_pred_train))
        print('Custom Accuracy Val:', sum(int(abs(y_pred_val[i][0]>0.5 - self.y_val[i])) for i in range(len(y_pred_val)))/len(y_pred_val))
        print('\rroc-auc_train: %s - roc-auc_val: %s' % (str(round(roc_train,4)),str(round(roc_val,4))),end=100*' '+'\n')
        # calculate the fpr and tpr for all thresholds of the classification
        print('----------- plotting -----------')
        
        train_preds = 1- y_pred_train
        train_fpr, train_tpr, threshold = metrics.roc_curve(self.y, train_preds,pos_label=1)
        
        val_preds = 1 - y_pred_val
        val_fpr, val_tpr, threshold = metrics.roc_curve(self.y_val, val_preds,pos_label=1)
        # method I: plt
        plt.title('Receiver Operating Characteristic')
        plt.plot(train_fpr, train_tpr, 'tan', label = 'train AUC = %0.2f' % roc_train)
        plt.plot(val_fpr, val_tpr, color='navy', label = 'val AUC = %0.2f' % roc_val)
        plt.legend(loc = 'lower right')
        plt.plot([0, 1], [0, 1],'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.show()
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

roc = RocCallback(training_data=(X_train_list, y_train_list),
                  validation_data=(X_test_list, y_test_list))

In [None]:
def create_model1():
    model = Sequential()
    model.add(Conv1D(100, kernel_size=100, activation='relu', input_shape=(max_shape,24)))
    model.add(MaxPooling1D(10))
    model.add(Conv1D(50, kernel_size=20, activation='relu'))
    model.add(MaxPooling1D(5))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_accuracy'])
    return model

model1 = create_model1()
model1.fit(X_train_list, y_train_list, epochs=10,batch_size=30,verbose=1,\
         validation_data=(X_test_list,y_test_list),callbacks=[roc])

In [None]:
def create_model2():
    model = Sequential()
    model.add(Conv1D(100, kernel_size=50, activation='relu', input_shape=(max_shape,24)))
    model.add(Conv1D(100, 50, activation='relu', input_shape=(max_shape, 24)))
    model.add(MaxPooling1D(3))
    model.add(Conv1D(50, 10, activation='relu'))
    model.add(Conv1D(50, 10, activation='relu'))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy',])
    return model
model2 = create_model2()
model2.fit(X_train_list, y_train_list, epochs=10,batch_size=30,verbose=1,\
         validation_data=(X_test_list,y_test_list),callbacks=[roc])
probs2 = model2.predict_proba(X_test_list)

In [None]:
def create_model3():
    model = Sequential()
    model.add(Conv1D(100, kernel_size=50, activation='relu', input_shape=(max_shape,24)))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='softmax'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_accuracy',keras.metrics.AUC()])
    return model
model3 = create_model3()
model3.fit(X_train_list, y_train_list, epochs=10,batch_size=30,verbose=1,\
         validation_data=(X_test_list,y_test_list),callbacks=[roc])
probs3 = model3.predict_proba(X_test_list)

In [None]:
def create_model4():
    model = Sequential()
    model.add(Conv1D(100, kernel_size=50, activation='relu', input_shape=(max_shape,24)))
    model.add(MaxPooling1D(3))
    model.add(Conv1D(50, 10, activation='relu'))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_accuracy',keras.metrics.AUC()])
    return model
model4 = create_model4()
model4.fit(X_train_list, y_train_list, epochs=10,batch_size=30,verbose=1,\
         validation_data=(X_test_list,y_test_list),callbacks=[roc])
probs4 = model4.predict_proba(X_test_list)

In [None]:
def create_model5():
    model = Sequential()
    model.add(Conv1D(100, kernel_size=50, activation='relu', input_shape=(max_shape,24)))
    model.add(Conv1D(50, 10, activation='relu'))
    model.add(MaxPooling1D(3))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_accuracy',keras.metrics.AUC()])
    return model
model5 = create_model5()
model5.fit(X_train_list, y_train_list, epochs=10,batch_size=30,verbose=1,\
         validation_data=(X_test_list,y_test_list),callbacks=[roc])
probs5 = model5.predict_proba(X_test_list)