In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [62]:
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score, \
        f1_score, precision_score, recall_score
import matplotlib.pyplot as plt

# Prepare Data

In [40]:
df = pd.read_csv("gaze_labels.csv", index_col = "Participant_ID")

In [41]:
df.head()

Unnamed: 0_level_0,PHQ_Score,PHQ_Binary
Participant_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
300,2,0
301,3,0
302,4,0
303,0,0
304,6,0


In [42]:
X = np.load("gaze_features.npy")
y = np.array(df['PHQ_Binary'])

In [43]:
X.shape

(181, 12447, 14)

In [44]:
RANDOM_STATE = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=RANDOM_STATE)

In [45]:
def balance_data(X,y):
    new_y = []
    new_X = []
    dep_count = sum(y)
    count = 0
    for i in range(X.shape[0]):
        if y[i]==0:
            if count<dep_count:
                new_y.append(y[i])
                new_X.append(X[i])
                count+=1
        else:
            new_y.append(y[i])
            new_X.append(X[i])
    return np.array(new_X), np.array(new_y)

In [46]:
X_train, y_train =balance_data(X_train,y_train)

In [47]:
X_train.shape

(102, 12447, 14)

# Evaluation Metrics

In [48]:
def evaluate_on_training_set(y_test, y_pred):
    # Calculate AUC
    print("AUC is: ", roc_auc_score(y_test, y_pred))

    # print out recall and precision
    print(classification_report(y_test, y_pred))

    # print out confusion matrix
    print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))

    # # calculate points for ROC curve
    fpr, tpr, thresholds = roc_curve(y_test, y_pred)

    # Plot ROC curve
    plt.plot(fpr, tpr, label='ROC curve (area = %0.3f)' % roc_auc_score(y_test, y_pred))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')

# Model

In [101]:
n_input = X_train.shape[1]
n_features = X_train.shape[2]
input_shape = (n_input, n_features)

In [102]:
model = Sequential()
model.add(LSTM(100, input_shape=input_shape, return_sequences=True))
model.add(LSTM(50, input_shape=input_shape))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [103]:
es = EarlyStopping(monitor='val_loss', mode='min', patience=5)
checkpoint_filepath = 'model.h5'
checkpoint = ModelCheckpoint(filepath=checkpoint_filepath, monitor='val_loss', mode='min', save_best_only=True)

In [None]:
model.fit(X_train, y_train, batch_size=16, epochs=30, validation_split=0.1, callbacks=[es, checkpoint])

Train on 91 samples, validate on 11 samples
Epoch 1/30


In [None]:
model = load_model(checkpoint_filepath)

In [None]:
predictions = model.predict(X_test)
y_pred = np.where(predictions > 0.5, 1, 0).flatten()

print(y_pred)
print(y_test, '\n')

evaluate_on_training_set(y_test, y_pred)