In [1]:
# Template Notebook for Engagement Classification Model

# ToDo/Models to run here: https://docs.google.com/document/d/18EJpFesMEl9Q7C1AZzDeq6uy7c8tDMvEJ_j58-MmiBI/edit?usp=sharing
# Use p8_data_processed.csv here: https://drive.google.com/drive/folders/19aJUAlkTMz7PcZE1q4hogFkjVtwYGcMT
# Upload code to help-seeking github: https://github.com/interaction-lab/help_seeking
# Record model results here: https://docs.google.com/spreadsheets/d/16ye54fSSEuAuDL_j56UIeDB-rIIrxq_kPbtyPRQOrVI/edit?usp=sharing

In [2]:
# Imports

import pandas as pd
import numpy as np
from pandas import *
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import random

In [8]:
# Load Data
# Warning: this will probably take some time
# Adjust file path based on your computer

file8 = 'p8_data_FS4new.csv'
data8 = pd.read_csv(file8)

In [10]:
data8.head()

Unnamed: 0,of_confidence,of_success,of_gaze_0_x,of_gaze_0_y,of_gaze_0_z,of_gaze_1_x,of_gaze_1_y,of_gaze_1_z,of_gaze_angle_x,of_gaze_angle_y,...,ts_attempt,skill_NC,skill_OS,skill_EM,diff_1,diff_2,diff_3,diff_4,diff_5,no_game
0,0.0,0.0,0.49709,0.500431,0.519632,0.51422,0.502452,0.531622,0.588569,0.514524,...,0.0,0,0,0,0,0,0,0,0,1
1,0.0,0.0,0.49709,0.500431,0.519632,0.51422,0.502452,0.531622,0.588569,0.514524,...,0.0,0,0,0,0,0,0,0,0,1
2,0.0,0.0,0.49709,0.500431,0.519632,0.51422,0.502452,0.531622,0.588569,0.514524,...,0.0,0,0,0,0,0,0,0,0,1
3,0.0,0.0,0.49709,0.500431,0.519632,0.51422,0.502452,0.531622,0.588569,0.514524,...,0.0,0,0,0,0,0,0,0,0,1
4,0.0,0.0,0.49709,0.500431,0.519632,0.51422,0.502452,0.531622,0.588569,0.514524,...,0.0,0,0,0,0,0,0,0,0,1


In [None]:
# Label Analysis

print(data['engagement'].value_counts())
print()

print(1,data['engagement'].value_counts()[1]/sum(data['engagement'].value_counts()))
print(0,data['engagement'].value_counts()[0]/sum(data['engagement'].value_counts()))

Note: keep timestamp and session_num until right before running the model in all feature sets

### Step 2: Create Windows for RNN (for chosen Feature Set)

In [11]:
# CHOOSE FEATURE SET, Window Size

FS = data8

# window_size = # of frames to feed into an RNN at a time
window_size = 50 

session_threshold = 8 # train on sessions <= 8, test on sessions > 8

In [12]:
# Sort Data (Temporal Model)

FS = FS.sort_values(['session_num', 'timestamp'])

In [13]:
# Create Windows, mapped by session
# throw away end/remainder of session < 50 frames

windows = []
X_train2 = []
y_train2 = []
X_test2 = []
y_test2 = []

curr_session = -1
window_start = -1
curr_window = -1

for i,r in FS.iterrows():
    this_session = r['session_num']
    
    # we've encountered a new session
    if curr_session != this_session:
        # ignore remainder window
        curr_session = this_session
        window_start = i
        curr_window = 1
        continue
    
    # we've found a complete window
    if curr_window == window_size - 1:
        to_add = FS[window_start:i+1]
        to_add = to_add.drop(columns=['session_num', 'timestamp'])
        
        windows.append(to_add)
        if (curr_session <= session_threshold):
            y_train2.append(to_add['engagement'])
            X_train2.append(to_add.drop(columns=['engagement']))
        else:
            y_test2.append(to_add['engagement'])
            X_test2.append(to_add.drop(columns=['engagement']))
            
        window_start = i+1
        curr_window = 0
        continue 
        
    curr_window += 1

In [26]:
# Shuffle Windows

random.shuffle(windows)

y1 = []
X1 = []

for i in windows:
    y1.append(i['engagement'])
    X1.append(i.drop(columns=['engagement']))

X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y1, test_size=0.3, random_state=42)

### Step 3: Implement Your Model!

In [15]:
# For chosen feature set, run on both Train-Test Splits
from keras.models import Sequential
from keras.layers import Embedding, Input
from keras.layers.merge import Concatenate
from keras.layers.core import Dense, Activation, Flatten
from keras.layers import Dropout, concatenate
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import Bidirectional
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.optimizers import RMSprop
from keras import metrics
from keras.models import Model

Using TensorFlow backend.


In [29]:
len(X_train1)
len(y_train1)

3635

In [30]:
model = Sequential()

# At this point, each individual training sample is now a sequence of word embedding vectors

num_features = X_train1[0].shape[1]

model.add(LSTM(64, input_length=window_size, input_dim=num_features, return_sequences = False)) #return_sequences: if multiple LSTM layers
model.add(Dropout(.2))

# Dense 2 (final vote)
model.add(Dense(50, activation = 'sigmoid'))

print(model.summary())

# Define Loss and Optimizer
LOSS = 'binary_crossentropy' #classifying between 0 and 1
OPTIMIZER = 'rmsprop' #RMSprop typically works better for RNNs per Keras

model.compile(loss = LOSS, optimizer = OPTIMIZER, metrics = [metrics.binary_accuracy])



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 64)                24320     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 50)                3250      
Total params: 27,570
Trainable params: 27,570
Non-trainable params: 0
_________________________________________________________________
None


In [32]:
X_train1 = np.array(X_train1)
y_train1 = np.array(y_train1)

In [33]:
EPOCHS = 10
BATCH_SIZE = 64

model.fit(X_train1, y_train1, 
          epochs = EPOCHS, 
          batch_size = BATCH_SIZE, 
          validation_split =.2)


Train on 2908 samples, validate on 727 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x10deed518>