In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from datetime import datetime
import os
import random
from keras.preprocessing import sequence
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from keras.layers import Bidirectional
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import ConvLSTM2D
from keras import backend as K

Using TensorFlow backend.


In [2]:
ConditionGroupFileNames = os.listdir('data/condition')
ControlGroupFileNames = os.listdir('data/control')

In [3]:
X = []
y = []

In [4]:
for fileName in ConditionGroupFileNames:
    df = pd.read_csv('data/condition/'+str(fileName))
    dates = df['date'].unique()
    activityLevelsPerDay = []
    for date in dates:
        if len(df[df['date']==date]) == 1440:
            temp = pd.DataFrame(df[df['date']==date]).drop(columns=['timestamp','date'])
            activityLevelsPerDay.append(temp)
    for dailyActivityLevel in activityLevelsPerDay:
        activityVector = np.array(dailyActivityLevel["activity"])
        if len(activityVector) == 1440:
            X.append(activityVector)
            y.append(1)

In [5]:
for fileName in ControlGroupFileNames:
    df = pd.read_csv('data/control/'+str(fileName))
    dates = df['date'].unique()
    activityLevelsPerDay = []
    for date in dates:
        if len(df[df['date']==date]) == 1440:
            temp = pd.DataFrame(df[df['date']==date]).drop(columns=['timestamp','date'])
            activityLevelsPerDay.append(temp)
    for dailyActivityLevel in activityLevelsPerDay:
        activityVector = np.array(dailyActivityLevel["activity"])
        if len(activityVector) == 1440:
            X.append(activityVector)
            y.append(0)

In [6]:
combinedDict = list(zip(X, y))
random.shuffle(combinedDict)
X[:], y[:] = zip(*combinedDict)

In [7]:
X = np.array(X)
y = np.array(y)

In [8]:
X = np.reshape(X, (X.shape[0], 1, X.shape[1]))

In [9]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [10]:
seed = 7
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
accuracy_scores = []
prec_scores = []
rec_scores = []
f1_scores = []

In [11]:
for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(LSTM(64, input_shape=(1, 1440), return_sequences=True))
    model.add(LSTM(64, return_sequences=True))
    model.add(LSTM(64))
    model.add(Dense(1, activation='sigmoid'))
    
    adam = Adam(lr=0.001)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', recall_m, precision_m, f1_m])
    
    model.fit(X[train], y[train], epochs=10, batch_size=128, verbose=0)
    scores = model.evaluate(X[test], y[test], verbose=0)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]))
    print("%s: %.2f%%" % (model.metrics_names[3], scores[3]))
    print("%s: %.2f%%" % (model.metrics_names[4], scores[4]))
    print("\n")
    accuracy_scores.append(scores[1] * 100)
    prec_scores.append(scores[2])
    rec_scores.append(scores[3])
    f1_scores.append(scores[4])
    
print("%.2f%% (+/- %.2f%%)" % (np.mean(accuracy_scores), np.std(accuracy_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(prec_scores), np.std(prec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(rec_scores), np.std(rec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(f1_scores), np.std(f1_scores)))

accuracy: 65.05%
recall_m: 0.16%
precision_m: 0.42%
f1_m: 0.23%


accuracy: 65.05%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


accuracy: 65.05%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


accuracy: 65.05%
recall_m: 0.36%
precision_m: 0.54%
f1_m: 0.40%


accuracy: 65.05%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


accuracy: 65.05%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


accuracy: 63.11%
recall_m: 0.13%
precision_m: 0.35%
f1_m: 0.19%


accuracy: 64.08%
recall_m: 0.12%
precision_m: 0.29%
f1_m: 0.16%


accuracy: 65.05%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


accuracy: 65.69%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


64.82% (+/- 0.68%)
0.08% (+/- 0.11%)
0.16% (+/- 0.20%)
0.10% (+/- 0.13%)


In [12]:
accuracy_scores = []
prec_scores = []
rec_scores = []
f1_scores = []

In [13]:
for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(Bidirectional(LSTM(128, input_shape=(1, 1440))))
    model.add(Dense(1, activation='sigmoid'))
    
    adam = Adam(lr=0.001)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', recall_m, precision_m, f1_m])
    
    model.fit(X[train], y[train], epochs=10, batch_size=128, verbose=0)
    scores = model.evaluate(X[test], y[test], verbose=0)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]))
    print("%s: %.2f%%" % (model.metrics_names[3], scores[3]))
    print("%s: %.2f%%" % (model.metrics_names[4], scores[4]))
    print("\n")
    accuracy_scores.append(scores[1] * 100)
    prec_scores.append(scores[2])
    rec_scores.append(scores[3])
    f1_scores.append(scores[4])
    
print("%.2f%% (+/- %.2f%%)" % (np.mean(accuracy_scores), np.std(accuracy_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(prec_scores), np.std(prec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(rec_scores), np.std(rec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(f1_scores), np.std(f1_scores)))

accuracy: 68.93%
recall_m: 0.26%
precision_m: 0.79%
f1_m: 0.35%


accuracy: 64.08%
recall_m: 0.10%
precision_m: 0.33%
f1_m: 0.15%


accuracy: 62.14%
recall_m: 0.06%
precision_m: 0.21%
f1_m: 0.10%


accuracy: 65.05%
recall_m: 0.05%
precision_m: 0.37%
f1_m: 0.09%


accuracy: 67.96%
recall_m: 0.06%
precision_m: 0.50%
f1_m: 0.10%


accuracy: 64.08%
recall_m: 0.02%
precision_m: 0.12%
f1_m: 0.03%


accuracy: 66.99%
recall_m: 0.07%
precision_m: 0.62%
f1_m: 0.12%


accuracy: 63.11%
recall_m: 0.00%
precision_m: 0.00%
f1_m: 0.00%


accuracy: 66.02%
recall_m: 0.07%
precision_m: 0.58%
f1_m: 0.12%


accuracy: 66.67%
recall_m: 0.15%
precision_m: 0.50%
f1_m: 0.21%


65.50% (+/- 2.07%)
0.08% (+/- 0.07%)
0.40% (+/- 0.23%)
0.13% (+/- 0.09%)


In [14]:
accuracy_scores = []
prec_scores = []
rec_scores = []
f1_scores = []

In [15]:
for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(1, 1440), data_format='channels_first'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    
    adam = Adam(lr=0.001)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', recall_m, precision_m, f1_m])
    
    model.fit(X[train], y[train], epochs=10, batch_size=128, verbose=0)
    scores = model.evaluate(X[test], y[test], verbose=0)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]))
    print("%s: %.2f%%" % (model.metrics_names[3], scores[3]))
    print("%s: %.2f%%" % (model.metrics_names[4], scores[4]))
    print("\n")
    accuracy_scores.append(scores[1] * 100)
    prec_scores.append(scores[2])
    rec_scores.append(scores[3])
    f1_scores.append(scores[4])
    
print("%.2f%% (+/- %.2f%%)" % (np.mean(accuracy_scores), np.std(accuracy_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(prec_scores), np.std(prec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(rec_scores), np.std(rec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(f1_scores), np.std(f1_scores)))

accuracy: 72.82%
recall_m: 0.57%
precision_m: 0.66%
f1_m: 0.60%


accuracy: 66.02%
recall_m: 0.55%
precision_m: 0.62%
f1_m: 0.58%


accuracy: 61.17%
recall_m: 0.20%
precision_m: 0.31%
f1_m: 0.24%


accuracy: 69.90%
recall_m: 0.39%
precision_m: 0.46%
f1_m: 0.42%


accuracy: 67.96%
recall_m: 0.57%
precision_m: 0.57%
f1_m: 0.55%


accuracy: 66.99%
recall_m: 0.56%
precision_m: 0.56%
f1_m: 0.54%


accuracy: 62.14%
recall_m: 0.27%
precision_m: 0.36%
f1_m: 0.30%


accuracy: 61.17%
recall_m: 0.32%
precision_m: 0.36%
f1_m: 0.34%


accuracy: 67.96%
recall_m: 0.44%
precision_m: 0.64%
f1_m: 0.52%


accuracy: 67.65%
recall_m: 0.28%
precision_m: 0.42%
f1_m: 0.33%


66.38% (+/- 3.65%)
0.42% (+/- 0.14%)
0.50% (+/- 0.12%)
0.44% (+/- 0.13%)


In [16]:
accuracy_scores = []
prec_scores = []
rec_scores = []
f1_scores = []

In [17]:
for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(1, 1440), data_format='channels_first'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(LSTM(64, return_sequences=True))
    model.add(LSTM(64))
    model.add(Dense(1, activation='sigmoid'))
    
    adam = Adam(lr=0.001)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', recall_m, precision_m, f1_m])
    
    model.fit(X[train], y[train], epochs=10, batch_size=128, verbose=0)
    scores = model.evaluate(X[test], y[test], verbose=0)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]))
    print("%s: %.2f%%" % (model.metrics_names[3], scores[3]))
    print("%s: %.2f%%" % (model.metrics_names[4], scores[4]))
    print("\n")
    accuracy_scores.append(scores[1] * 100)
    prec_scores.append(scores[2])
    rec_scores.append(scores[3])
    f1_scores.append(scores[4])
    
print("%.2f%% (+/- %.2f%%)" % (np.mean(accuracy_scores), np.std(accuracy_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(prec_scores), np.std(prec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(rec_scores), np.std(rec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(f1_scores), np.std(f1_scores)))

accuracy: 65.05%
recall_m: 0.32%
precision_m: 0.47%
f1_m: 0.37%


accuracy: 69.90%
recall_m: 0.44%
precision_m: 0.41%
f1_m: 0.42%


accuracy: 69.90%
recall_m: 0.20%
precision_m: 0.55%
f1_m: 0.29%


accuracy: 62.14%
recall_m: 0.14%
precision_m: 0.40%
f1_m: 0.20%


accuracy: 69.90%
recall_m: 0.73%
precision_m: 0.59%
f1_m: 0.64%


accuracy: 60.19%
recall_m: 0.68%
precision_m: 0.46%
f1_m: 0.54%


accuracy: 70.87%
recall_m: 0.70%
precision_m: 0.57%
f1_m: 0.61%


accuracy: 63.11%
recall_m: 0.08%
precision_m: 0.46%
f1_m: 0.13%


accuracy: 63.11%
recall_m: 0.55%
precision_m: 0.51%
f1_m: 0.53%


accuracy: 68.63%
recall_m: 0.18%
precision_m: 0.50%
f1_m: 0.27%


66.28% (+/- 3.77%)
0.40% (+/- 0.24%)
0.49% (+/- 0.06%)
0.40% (+/- 0.17%)
