# A notebook for comparing the performance of various classifier implementations
#### Note: This notebook takes a very long time to run as TensorFlow was having a lot of trouble integrating with the Great Lakes GPU setup. If you want to play with a specific portion it is recommended you either manually run the cells of interest or extract the relevant portions to another notebook.
##### Additional Note: Due to an issue with Great Lakes connections the cell run orders may appear to be slightly off. This is the result of the connection terminating but the notebook continuing to run.

## Imports and reading in / generating the data for classifiers

In [1]:
# set the random state for reproducibility
random_state = 42

In [2]:
# general purpose imports
import os
import pickle
import json
from path import Path

# data manipulation imports
import pandas as pd
import numpy as np

# sklearn utility imports
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.utils import resample
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import SCORERS
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
# sklearn model imports
from sklearn.svm import SVC

# get rid of tensorflow debug output
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# tensorflow imports
import tensorflow as tf
import keras

# get rid of some other annoying warning outputs
import warnings
warnings.filterwarnings('ignore')

### Set the base path variable:

In [3]:
# set the base path for where you have stored the data generated previously (you must complete this step or things won't run correctly)
base_path = Path("/nfs/turbo/seas-nhcarter/human_wildlife_interactions/classifier_video_data/")
# set the path to where the cloned repo resides
repo_path = Path("/nfs/turbo/seas-nhcarter/human_wildlife_interactions/repo")

In [4]:
# read in existing video data matrices
v_train_path = Path(base_path / "train_mat.csv")
v_test_path = Path(base_path / "test_mat.csv")
v_val_path = Path(base_path / "val_mat.csv")
v_train_df = pd.read_csv(v_train_path) 
v_test_df = pd.read_csv(v_test_path)
v_val_df = pd.read_csv(v_val_path)

In [5]:
X_train_v = v_train_df.iloc[:,:-1]
y_train_v = v_train_df.iloc[:,-1]
X_val_v = v_val_df.iloc[:,:-1]
y_val_v = v_val_df.iloc[:,-1]
X_test_v = v_test_df.iloc[:,:-1]
y_test_v = v_test_df.iloc[:,-1]

In [6]:
# sanity check the read in data
print("Train data shape:      {}; data y value shape:       {}".format(X_train_v.shape, y_train_v.shape))
print("Validation data shape: {};  validation y value shape: {}".format(X_val_v.shape, y_val_v.shape))
print("Test data shape:       {};  test y value shape:       {}".format(X_test_v.shape, y_test_v.shape))

Train data shape:      (2794, 1153); data y value shape:       (2794,)
Validation data shape: (494, 1153);  validation y value shape: (494,)
Test data shape:       (581, 1153);  test y value shape:       (581,)


In [7]:
# read in the frame pickle files (3.8 gigs, give it a minute)
frame_path = Path(base_path / "frame_features_dict.pkl")
with open(frame_path, 'rb') as file:
    frames_raw = pickle.load(file, encoding = 'utf-8')

In [8]:
# generate training data from frames (we don't store this data as this is the only notebook that needs it and it generates pretty quickly)
with open(repo_path / 'human_wildlife_interactions/data/processed/hunting_dict.json') as file:
    cluster_results = json.load(file)
with open(base_path /  'train_ids.pkl', 'rb') as file:
    train_ids = pickle.load(file, encoding = 'utf-8')
with open(base_path / 'val_ids.pkl', 'rb') as file:
    val_ids = pickle.load(file, encoding = 'utf-8')
with open(base_path / 'test_ids.pkl', 'rb') as file:
    test_ids = pickle.load(file, encoding = 'utf-8')
    
    
def frame_matrix_generator(num_frames = -1):    
    audio_train_frames = []
    rgb_train_frames = []
    y_train_frames = []

    audio_val_frames = []
    rgb_val_frames = []
    y_val_frames = []

    audio_test_frames = []
    rgb_test_frames = []
    y_test_frames = []
    

    for video_id in cluster_results.keys():
        data = frames_raw[video_id]
        y_label = cluster_results[video_id]
        if video_id in train_ids:
            y_train_frames.append(y_label)
            if num_frames != -1:
                audio_train_frames.append(np.array(data['audio_lst'][0:num_frames]))
                rgb_train_frames.append(np.array(data['rgb_lst'][0:num_frames]))
            else:
                audio_train_frames.append(np.array(data['audio_lst']))
                rgb_train_frames.append(np.array(data['rgb_lst']))
        if video_id in val_ids:
            y_val_frames.append(y_label)
            if num_frames != -1:
                audio_val_frames.append(np.array(data['audio_lst'][0:num_frames]))
                rgb_val_frames.append(np.array(data['rgb_lst'][0:num_frames]))
            else:
                audio_val_frames.append(np.array(data['audio_lst']))
                rgb_val_frames.append(np.array(data['rgb_lst']))
        if video_id in test_ids:
            y_test_frames.append(y_label)
            if num_frames != -1:
                audio_test_frames.append(np.array(data['audio_lst'][0:num_frames]))
                rgb_test_frames.append(np.array(data['rgb_lst'][0:num_frames]))
            else:
                audio_test_frames.append(np.array(data['audio_lst']))
                rgb_test_frames.append(np.array(data['rgb_lst']))
    return np.array(audio_train_frames), np.array(rgb_train_frames), np.array(audio_val_frames), np.array(rgb_val_frames), np.array(audio_test_frames), np.array(rgb_test_frames), np.array(y_train_frames), np.array(y_val_frames), np.array(y_test_frames)
            

audio_train_frames, rgb_train_frames, audio_val_frames, rgb_val_frames, audio_test_frames, rgb_test_frames, y_train_frames, y_val_frames, y_test_frames = frame_matrix_generator(100)

## SVM Classifier

In [51]:
# best result from GridSearchCV (see videoSVM.ipynb for more information)
svc_clf = SVC(C=0.01, class_weight={0: 0.1, 1: 0.9}, kernel='linear', random_state=random_state, probability=True).fit(X_train_v, y_train_v)
svc_preds = svc_clf.predict(X_test_v)

# results from svc_clf
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, svc_preds)
svc_score = svc_clf.score(X_test_v, y_test_v)
roc_auc = roc_auc_score(y_test_v, svc_preds)

print("Classifier: SVC")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("SVC Accuracy Score: {}".format(svc_score))
print("SVC ROC/AUC Score:  {}".format(roc_auc))
print(confusion_matrix(y_test_v,svc_preds))

Classifier: SVC
Class 0 Precision:  0.9722814498933902  |  Class 1 Precision: 0.32142857142857145
Class 0 Recall:     0.8571428571428571  |  Class 1 Recall:    0.7346938775510204
Class 0 FScore:     0.911088911088911  |  Class 1 FScore:    0.4472049689440994
Class 0 Support:    532  |  Class 1 Support:   49
SVC Accuracy Score: 0.846815834767642
SVC ROC/AUC Score:  0.7959183673469388
[[456  76]
 [ 13  36]]


## CNN Implementation - Video Data
#### Network Design

In [10]:
# simple matmul CNN approach with class weights based off of Supriya Gadi Patil's CNN implementation
# https://github.com/supriya-gdptl/kaggle-youtube8m
l2_reg = .00000001
# define inputs
input_1 = keras.Input(shape=(1024,))
input_2 = keras.Input(shape=(128,))

# reduce using fully connected layer
videoNN = keras.layers.Dense(32, activation=tf.nn.leaky_relu,kernel_regularizer=keras.regularizers.l2(l2_reg))(input_1)
audioNN = keras.layers.Dense(32, activation=tf.nn.leaky_relu,kernel_regularizer=keras.regularizers.l2(l2_reg))(input_2)

# adjust shape to make everything (32x1) instead of (32,)
video_dim = tf.expand_dims(videoNN, -1)
audio_dim = tf.expand_dims(audioNN, -1)

# transpose audio to enable matmul operation
audio_dim = tf.transpose(audio_dim, perm=[0,2,1])

# matmul to produce 32x32 result
matrix = tf.matmul(video_dim, audio_dim)

# need another empty dimension for CNN to work
matrix = tf.expand_dims(matrix, -1)

# 2 layer CNN with single pooling layer
convolution_1 = keras.layers.Conv2D(filters=8, kernel_size=[3,3])(matrix)
average_pool = keras.layers.AveragePooling2D(pool_size=2, strides=2)(convolution_1)
convolution_2 = keras.layers.Conv2D(filters=4, kernel_size=[3,3])(average_pool)

# flatten output layer
flattening = keras.layers.Flatten()(convolution_2)

# output layer
output = keras.layers.Dense(1, activation=tf.nn.sigmoid, kernel_regularizer=keras.regularizers.l2(l2_reg))(flattening)

# build the graph
cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])

# grab the initial weights so we can reset while changing parameters
initial_weights = cnn_model.get_weights()

In [11]:
# need to split the data up to match the inputs specified above
train_video = X_train_v.iloc[:, :1024]
train_audio = X_train_v.iloc[:,1024:-1]
test_video = X_test_v.iloc[:, :1024]
test_audio = X_test_v.iloc[:, 1024:-1]
val_video = X_val_v.iloc[:, :1024]
val_audio = X_val_v.iloc[:, 1024:-1]

#### Testing Class Weights

In [12]:
# testing different weights
weight_vals = [.01,.02,.03,.04,.05,.06,.07,.08,.09,.1]
for val in weight_vals:
    # reset the model back to starting weights between each run to avoid inadvertently testing weighted epochs instead
    cnn_model.set_weights(initial_weights)
    w_0 = val
    w_1 = 1 - w_0
    print("Class 0 weight: {}, Class 1 weight{}".format(w_0, w_1))
    cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
    cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
    cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0: w_0, 1: w_1},validation_data=([val_video, val_audio], y_val_v))

Class 0 weight: 0.01, Class 1 weight0.99
Class 0 weight: 0.02, Class 1 weight0.98
Class 0 weight: 0.03, Class 1 weight0.97
Class 0 weight: 0.04, Class 1 weight0.96
Class 0 weight: 0.05, Class 1 weight0.95
Class 0 weight: 0.06, Class 1 weight0.94
Class 0 weight: 0.07, Class 1 weight0.9299999999999999
Class 0 weight: 0.08, Class 1 weight0.92
Class 0 weight: 0.09, Class 1 weight0.91
Class 0 weight: 0.1, Class 1 weight0.9


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [13]:
# in this instance the best balance seems to have been achieved at .1 / .9 so the weights are adjusted to reflect
cnn_model.set_weights(initial_weights)
cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0: .1, 1: .9},validation_data=([val_video, val_audio], y_val_v))



<keras.callbacks.History at 0x14a2b5dde310>

In [14]:
cnn_model.evaluate([test_video, test_audio], y_test_v)



[0.5850654244422913, 0.6878932118415833]

In [15]:
cnn_raw_preds = cnn_model.predict([test_video, test_audio])
cnn_preds = []
for probs in cnn_raw_preds:
    cnn_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, cnn_preds)
accuracy = accuracy_score(y_test_v, cnn_preds)
cnn_loss, cnn_auc = cnn_model.evaluate([test_video, test_audio], y_test_v)
print("Classifier: CNN")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("CNN Accuracy Score: {}".format(accuracy))
print("CNN ROC/AUC Score:  {}".format(cnn_auc))
print(confusion_matrix(y_test_v,cnn_preds))

Classifier: CNN
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
CNN Accuracy Score: 0.9156626506024096
CNN ROC/AUC Score:  0.6878932118415833
[[532   0]
 [ 49   0]]


#### Testing class weights with early stopping

In [16]:
# same as previous model but with early stopping
weight_vals = [.01,.02,.03,.04,.05,.06,.07,.08,.09,.1]
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)
keras.backend.clear_session()
for val in weight_vals:
    cnn_model.set_weights(initial_weights)
    w_0 = val
    w_1 = 1 - w_0
    print(w_0, w_1)
    cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
    cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
    cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0: w_0, 1: w_1},validation_data=([val_video, val_audio], y_val_v), callbacks=[callback])

0.01 0.99
0.02 0.98
0.03 0.97
0.04 0.96
0.05 0.95
0.06 0.94
0.07 0.9299999999999999
0.08 0.92
0.09 0.91
0.1 0.9


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance

In [17]:
# in this case it seems the best performance was .08 / .92, weights have been adjusted accordingly
cnn_model.set_weights(initial_weights)
cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0: .08, 1: .92},validation_data=([val_video, val_audio], y_val_v), callbacks=[callback])



<keras.callbacks.History at 0x14a2b02ef700>

In [18]:
cnn_raw_preds = cnn_model.predict([test_video, test_audio])
cnn_preds = []
for probs in cnn_raw_preds:
    cnn_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, cnn_preds)
accuracy = accuracy_score(y_test_v, cnn_preds)
cnn_loss, cnn_auc = cnn_model.evaluate([test_video, test_audio], y_test_v)
print("Classifier: CNN")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("CNN Accuracy Score: {}".format(accuracy))
print("CNN ROC/AUC Score:  {}".format(cnn_auc))
print(confusion_matrix(y_test_v,cnn_preds))

Classifier: CNN
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
CNN Accuracy Score: 0.9156626506024096
CNN ROC/AUC Score:  0.6577221155166626
[[532   0]
 [ 49   0]]


#### Testing epochs

In [19]:
# epoch experiments
epoch_list = [10,11,12,13,14,15] # the best AUC values in our testing came from this range, feel free to modify as desired
for epoch in epoch_list:
    cnn_model.set_weights(initial_weights)
    cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
    cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
    cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0:.1, 1:.9},validation_data=([val_video, val_audio], y_val_v), callbacks=[callback],epochs=epoch)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
Epoch 1/14
Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [20]:
# in this run it seems as though 11 epochs had the best results (the previous run occurred at 13), so the epochs have been set to 11
cnn_model.set_weights(initial_weights)

cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0:.1, 1:.9},validation_data=([val_video, val_audio], y_val_v), callbacks=[callback],epochs=11)

Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11


<keras.callbacks.History at 0x14a2c88ceaf0>

In [21]:
cnn_raw_preds = cnn_model.predict([test_video, test_audio])
cnn_preds = []
for probs in cnn_raw_preds:
    cnn_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, cnn_preds)
accuracy = accuracy_score(y_test_v, cnn_preds)
cnn_loss, cnn_auc = cnn_model.evaluate([test_video, test_audio], y_test_v)
print("Classifier: CNN")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("CNN Accuracy Score: {}".format(accuracy))
print("CNN ROC/AUC Score:  {}".format(cnn_auc))
print(confusion_matrix(y_test_v,cnn_preds))

Classifier: CNN
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
CNN Accuracy Score: 0.9156626506024096
CNN ROC/AUC Score:  0.8291007876396179
[[532   0]
 [ 49   0]]


## Fully Connected Neural Network - Video Data
#### Network Design

In [22]:
# model design based off of Roberto Chavez's paper
# https://github.com/rchavezj/Label_YT_Videos

# audio input with fully connected layers
audio_input = keras.Input(shape=(128,))
audio_fc1 = keras.layers.Dense(512, activation='relu')(audio_input)
audio_fc2 = keras.layers.Dense(1024, activation='relu')(audio_fc1)
audio_fc3 = keras.layers.Dense(4096, activation='relu')(audio_fc2)
audio_fc4 = keras.layers.Dense(8192, activation='relu')(audio_fc3)
audio_fc5 = keras.layers.Dense(4096, activation='relu')(audio_fc4)
# video input with fully connected layers
video_input = keras.Input(shape=(1024,))
video_fc1 = keras.layers.Dense(512, activation='relu')(video_input)
video_fc2 = keras.layers.Dense(1024, activation='relu')(video_fc1)
video_fc3 = keras.layers.Dense(4096, activation='relu')(video_fc2)
video_fc4 = keras.layers.Dense(8192, activation='relu')(video_fc3)
video_fc5 = keras.layers.Dense(4096, activation='relu')(video_fc4)
# merge data and pass to fully connected layer
nn_merge = keras.layers.concatenate([audio_fc5, video_fc5])
nn_fc = keras.layers.Dense(4096, activation='relu')(nn_merge) 
# output layer
nn_output = keras.layers.Dense(1, activation=tf.nn.sigmoid,name='nn_output')(nn_fc)
# compile model
nn_model = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
# grab the initial weights to reset the model between parameter changes
nn_weights = nn_model.get_weights()

### Testing Class Weights (Long Run Time Warning)

In [23]:
for val in weight_vals:
    w_0 = val
    w_1 = 1 - w_0
    print(w_0, w_1)
    nn_model.set_weights(nn_weights)
    nn_model = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
    nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
    nn_model.fit(x=[train_audio, train_video], y=y_train_v, class_weight={0: w_0, 1: w_1},validation_data=([val_audio,val_video], y_val_v))

0.01 0.99
0.02 0.98
0.03 0.97
0.04 0.96
0.05 0.95
0.06 0.94
0.07 0.9299999999999999
0.08 0.92
0.09 0.91
0.1 0.9


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [None]:
# this one is kind of tricky to assess the "best" but we have chosen to use .06 / .94 based on pretty bad train AUC all around and better validation AUC
# unclear why this cell shows as not having been run despite having output
tf.keras.backend.clear_session()
nn_model.set_weights(nn_weights)
nn_model = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
nn_model.fit(x=[train_audio, train_video], y=y_train_v, class_weight={0: .06, 1: 0.94},validation_data=([val_audio,val_video], y_val_v))



<keras.callbacks.History at 0x14a2c6674910>

In [25]:
nn_raw_preds = nn_model.predict([test_audio, test_video])
nn_preds = []
for probs in nn_raw_preds:
    nn_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, nn_preds)
accuracy = accuracy_score(y_test_v, nn_preds)
nn_loss, nn_auc = nn_model.evaluate([test_audio,test_video], y_test_v)
print("Classifier: NN")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("NN Accuracy Score: {}".format(accuracy))
print("NN ROC/AUC Score:  {}".format(nn_auc))
print(confusion_matrix(y_test_v,nn_preds))

Classifier: NN
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
NN Accuracy Score: 0.9156626506024096
NN ROC/AUC Score:  0.6059153079986572
[[532   0]
 [ 49   0]]


### Testing Class Weights with Early Stopping (Long Run Time Warning)

In [26]:
# early stopping implementation
for val in weight_vals:
    w_0 = val
    w_1 = 1 - w_0
    print(w_0, w_1)
    nn_model.set_weights(nn_weights)
    nn_model = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
    nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
    nn_model.fit(x=[train_audio, train_video], y=y_train_v, class_weight={0: w_0, 1: w_1},validation_data=([val_audio,val_video], y_val_v), callbacks=[callback])

0.01 0.99
0.02 0.98
0.03 0.97
0.04 0.96
0.05 0.95
0.06 0.94
0.07 0.9299999999999999
0.08 0.92
0.09 0.91
0.1 0.9


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [27]:
# best performance was .1 / .9
nn_model.set_weights(nn_weights)
nn_model.fit(x=[train_audio, train_video], y=y_train_v, class_weight={0: .1, 1: .9},validation_data=([val_audio,val_video], y_val_v), callbacks=[callback])



<keras.callbacks.History at 0x14a2c54aa7c0>

In [28]:
nn_raw_preds = nn_model.predict([test_audio, test_video])
nn_preds = []
for probs in nn_raw_preds:
    nn_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, nn_preds)
accuracy = accuracy_score(y_test_v, nn_preds)
nn_loss, nn_auc = nn_model.evaluate([test_audio,test_video], y_test_v)
print("Classifier: NN")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("NN Accuracy Score: {}".format(accuracy))
print("NN ROC/AUC Score:  {}".format(nn_auc))
print(confusion_matrix(y_test_v,nn_preds))

Classifier: NN
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
NN Accuracy Score: 0.9156626506024096
NN ROC/AUC Score:  0.5102232694625854
[[532   0]
 [ 49   0]]


### Testing Epochs (Very Long Run Time Warning)

In [30]:
# you can change class weights based on previous results
epoch_list = [2,3,4]
for epoch in epoch_list:
    nn_model.set_weights(nn_weights)
    nn_model = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
    nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
    nn_model.fit(x=[train_audio, train_video], y=y_train_v, class_weight={0:.1, 1:.9},validation_data=([val_audio,val_video], y_val_v), 
                 callbacks=[callback],epochs=epoch)

Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [31]:
# adjust the epochs
nn_model.set_weights(nn_weights)
nn_model.fit(x=[train_audio, train_video], y=y_train_v, class_weight={0: .06, 1: .94},validation_data=([val_audio,val_video], y_val_v), 
             callbacks=[callback], epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x14a2bff1f730>

In [34]:
nn_raw_preds = nn_model.predict([test_audio, test_video])
nn_preds = []
for probs in nn_raw_preds:
    nn_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, nn_preds)
accuracy = accuracy_score(y_test_v, nn_preds)
nn_loss, nn_auc = nn_model.evaluate([test_audio,test_video], y_test_v)
print("Classifier: NN")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("NN Accuracy Score: {}".format(accuracy))
print("NN ROC/AUC Score:  {}".format(nn_auc))
print(confusion_matrix(y_test_v,nn_preds))

Classifier: NN
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
NN Accuracy Score: 0.9156626506024096
NN ROC/AUC Score:  0.5619149804115295
[[532   0]
 [ 49   0]]


### "Two Stream" LSTM - Frame Data
#### Network Design

In [35]:
# from Roberto Chavez's paper
# https://github.com/rchavezj/Label_YT_Videos

stream_x1 = keras.Input(shape=(100,128))
stream_x2 = keras.Input(shape=(100,1024))

stream_fc_1_x1 = keras.layers.Dense(512, activation='relu')(stream_x1) 
stream_fc_1_x2 = keras.layers.Dense(512, activation='relu')(stream_x2) 

# LSTM
stream_lstm_1_x1 = keras.layers.LSTM(128, return_sequences=True, go_backwards=False)(stream_fc_1_x1)
stream_lstm_1_x2 = keras.layers.LSTM(1024, return_sequences=True, go_backwards=False)(stream_fc_1_x2)

# LSTM
stream_lstm_2_x1 = keras.layers.LSTM(128, return_sequences=True, go_backwards=True)(stream_lstm_1_x1)
stream_lstm_2_x2 = keras.layers.LSTM(1024, return_sequences=True, go_backwards=True)(stream_lstm_1_x2)

stream_dropout_1_x1 = keras.layers.Dropout(rate=0.5)(stream_lstm_2_x1)
stream_dropout_1_x2 = keras.layers.Dropout(rate=0.5)(stream_lstm_2_x2)

stream_fc_2_x1 = keras.layers.Dense(1, activation='relu')(stream_dropout_1_x1) 
stream_fc_2_x2 = keras.layers.Dense(1, activation='relu')(stream_dropout_1_x2) 

stream_fc_3_x1 = keras.layers.Dense(16, activation='relu')(stream_dropout_1_x1) 
stream_fc_3_x2 = keras.layers.Dense(16, activation='relu')(stream_dropout_1_x2)

stream_pool_1_x1 = keras.layers.GlobalMaxPooling1D()(stream_fc_3_x1)
stream_pool_1_x2 = keras.layers.GlobalMaxPooling1D()(stream_fc_3_x2)

stream_merge_1 = keras.layers.concatenate([stream_pool_1_x1, stream_pool_1_x2])

stream_fc_2 = keras.layers.Dense(8192, activation='relu')(stream_merge_1) 

stream_fc_3 = keras.layers.Dense(4096, activation='relu')(stream_fc_2) 

stream_output = keras.layers.Dense(1, activation='sigmoid')(stream_fc_3)

# # Complete Model Diagram
stream_lstm_model = keras.Model(inputs=[stream_x1, stream_x2],outputs=[stream_output])
stream_lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()] )

stream_weights = stream_lstm_model.get_weights()

### Testing Class Weights (Long Run Time Warning)

In [36]:
for val in weight_vals:
    w_0 = val
    w_1 = 1 - w_0
    print(w_0, w_1)
    stream_lstm_model.set_weights(stream_weights)
    stream_lstm_model.fit(x=[audio_train_frames, rgb_train_frames], y=y_train_v,class_weight={0:w_0,1:w_1}, 
                          validation_data=([audio_val_frames, rgb_val_frames], y_val_v))

0.01 0.99
0.02 0.98
0.03 0.97
0.04 0.96
0.05 0.95
0.06 0.94
0.07 0.9299999999999999
0.08 0.92
0.09 0.91
0.1 0.9


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [39]:
# in the previous run the AUC scores did a worse job than normal, so we went with the only value that produced a val AUC that wasn't .50
stream_lstm_model.set_weights(stream_weights)

stream_lstm_model.fit(x=[audio_train_frames, rgb_train_frames], y=y_train_v, class_weight={0:.01,1:.99}, validation_data=([audio_val_frames, rgb_val_frames], y_val_v))



<keras.callbacks.History at 0x14a2c18cb820>

In [40]:
lstm_raw_preds = stream_lstm_model.predict([audio_test_frames, rgb_test_frames])
lstm_preds = []
for probs in lstm_raw_preds:
    lstm_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, lstm_preds)
accuracy = accuracy_score(y_test_v, lstm_preds)
lstm_loss, lstm_auc = stream_lstm_model.evaluate([audio_test_frames, rgb_test_frames], y_test_v)
print("Classifier: LSTM")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("LSTM Accuracy Score: {}".format(accuracy))
print("LSTM ROC/AUC Score:  {}".format(lstm_auc))
print(confusion_matrix(y_test_v,lstm_preds))

Classifier: LSTM
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
LSTM Accuracy Score: 0.9156626506024096
LSTM ROC/AUC Score:  0.5
[[532   0]
 [ 49   0]]


### Testing Class Weights with Early Stopping (Long Run Time Warning)

In [41]:
for val in weight_vals:
    w_0 = val
    w_1 = 1 - w_0
    print(w_0, w_1)
    stream_lstm_model.set_weights(stream_weights)
    stream_lstm_model.fit(x=[audio_train_frames, rgb_train_frames], y=y_train_v,class_weight={0:w_0,1:w_1}, 
                          validation_data=([audio_val_frames, rgb_val_frames], y_val_v), callbacks=[callback])

0.01 0.99
0.02 0.98
0.03 0.97
0.04 0.96
0.05 0.95
0.06 0.94
0.07 0.9299999999999999
0.08 0.92
0.09 0.91
0.1 0.9


#### Run the best model above and generate some metrics
##### You will need to do some analysis on the runs above and manually adjust the class_weight parameter below. This is due to a combination of random initialization behaviors between notebook runs (TensorFlow will change the values everytime the kernel restarts) and a judgement call between training and validation performance.

In [45]:
# in this case we are going with the highest training AUC score since all but 1 validationg scores are the same
stream_lstm_model.set_weights(stream_weights)

stream_lstm_model.fit(x=[audio_train_frames, rgb_train_frames], y=y_train_v, class_weight={0:.07,1:.93}, 
                      validation_data=([audio_val_frames, rgb_val_frames], y_val_v), callbacks=[callback])



<keras.callbacks.History at 0x14a2b57afdc0>

In [46]:
lstm_raw_preds = stream_lstm_model.predict([audio_test_frames, rgb_test_frames])
lstm_preds = []
for probs in lstm_raw_preds:
    lstm_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, lstm_preds)
accuracy = accuracy_score(y_test_v, lstm_preds)
lstm_loss, lstm_auc = stream_lstm_model.evaluate([audio_test_frames, rgb_test_frames], y_test_v)
print("Classifier: LSTM")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("LSTM Accuracy Score: {}".format(accuracy))
print("LSTM ROC/AUC Score:  {}".format(lstm_auc))
print(confusion_matrix(y_test_v,lstm_preds))

Classifier: LSTM
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
LSTM Accuracy Score: 0.9156626506024096
LSTM ROC/AUC Score:  0.5
[[532   0]
 [ 49   0]]


### Testing Epochs (VERY Long Run Time Warning)

In [42]:
epoch_list = [2,3]
for epoch in epoch_list:
    stream_lstm_model.set_weights(stream_weights)
    stream_lstm_0model.fit(x=[audio_train_frames, rgb_train_frames], y=y_train_v,class_weight={0:w_0,1:w_1}, 
                          validation_data=([audio_val_frames, rgb_val_frames], y_val_v), callbacks=[callback], epochs=epoch)

Epoch 1/2
Epoch 2/2
Epoch 1/3
Epoch 2/3
Epoch 3/3


#### Run the best model above and generate some metrics
##### In this case the performance never seems to improve, so rather than retraining we will just print out the metrics from the last iteration

In [43]:
lstm_raw_preds = stream_lstm_model.predict([audio_test_frames, rgb_test_frames])
lstm_preds = []
for probs in lstm_raw_preds:
    lstm_preds.append(np.argmax(probs))
precision, recall, fscore, support = precision_recall_fscore_support(y_test_v, lstm_preds)
accuracy = accuracy_score(y_test_v, lstm_preds)
lstm_loss, lstm_auc = stream_lstm_model.evaluate([audio_test_frames, rgb_test_frames], y_test_v)
print("Classifier: LSTM")
print("Class 0 Precision:  {}  |  Class 1 Precision: {}".format(precision[0], precision[1]))
print("Class 0 Recall:     {}  |  Class 1 Recall:    {}".format(recall[0], recall[1]))
print("Class 0 FScore:     {}  |  Class 1 FScore:    {}".format(fscore[0], fscore[1]))
print("Class 0 Support:    {}  |  Class 1 Support:   {}".format(support[0], support[1]))
print("LSTM Accuracy Score: {}".format(accuracy))
print("LSTM ROC/AUC Score:  {}".format(lstm_auc))
print(confusion_matrix(y_test_v,lstm_preds))

Classifier: LSTM
Class 0 Precision:  0.9156626506024096  |  Class 1 Precision: 0.0
Class 0 Recall:     1.0  |  Class 1 Recall:    0.0
Class 0 FScore:     0.9559748427672956  |  Class 1 FScore:    0.0
Class 0 Support:    532  |  Class 1 Support:   49
LSTM Accuracy Score: 0.9156626506024096
LSTM ROC/AUC Score:  0.5
[[532   0]
 [ 49   0]]
