This notebook serves to test the Fully Connected architecture and see how resolving class imbalance through resampling fares.
This approach is inspired by the multimodal video input fully connected neural network approach found here: https://github.com/rchavezj/Label_YT_Videos/blob/master/code/Algorithms.ipynb

In [3]:
import tensorflow as tf
import numpy as np
import pickle
import keras
import json
import pandas as pd
import keras
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import roc_auc_score
from sklearn.utils import resample
from path import Path

You will need to set the base path here:

In [4]:
# set the base path
base_path = Path('/nfs/turbo/seas-nhcarter/human_wildlife_interactions/classifier_video_data')

In [5]:
# read in the matrices
train_path = Path(base_path / "train_mat.csv")
test_path = Path(base_path / "test_mat.csv")
val_path = Path(base_path / "val_mat.csv")
train_df = pd.read_csv(train_path) 
test_df = pd.read_csv(test_path)
val_df = pd.read_csv(val_path)

In [7]:
# model design based off of Roberto Chavez's paper
# https://github.com/rchavezj/Label_YT_Videos

# audio input with fully connected layers
audio_input = keras.Input(shape=(128,))
audio_fc1 = keras.layers.Dense(512, activation='relu')(audio_input)
audio_fc2 = keras.layers.Dense(1024, activation='relu')(audio_fc1)
audio_fc3 = keras.layers.Dense(4096, activation='relu')(audio_fc2)
audio_fc4 = keras.layers.Dense(8192, activation='relu')(audio_fc3)
audio_fc5 = keras.layers.Dense(4096, activation='relu')(audio_fc4)
# video input with fully connected layers
video_input = keras.Input(shape=(1024,))
video_fc1 = keras.layers.Dense(512, activation='relu')(video_input)
video_fc2 = keras.layers.Dense(1024, activation='relu')(video_fc1)
video_fc3 = keras.layers.Dense(4096, activation='relu')(video_fc2)
video_fc4 = keras.layers.Dense(8192, activation='relu')(video_fc3)
video_fc5 = keras.layers.Dense(4096, activation='relu')(video_fc4)
# merge data and pass to fully connected layer
nn_merge = keras.layers.concatenate([audio_fc5, video_fc5])
nn_fc = keras.layers.Dense(4096, activation='relu')(nn_merge) 
# output layer
nn_output = keras.layers.Dense(1, activation=tf.nn.sigmoid,name='nn_output')(nn_fc)
# compile model
nn_model = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
# grab the initial weights to reset the model between parameter changes
nn_weights = nn_model.get_weights()

In [8]:
# prep the data
X_train_v = train_df.iloc[:,:-1]
y_train_v = train_df.iloc[:,-1]
X_val_v = val_df.iloc[:,:-1]
y_val_v = val_df.iloc[:,-1]
X_test_v = test_df.iloc[:,:-1]
y_test_v = test_df.iloc[:,-1]
train_video = X_train_v.iloc[:, :1024]
train_audio = X_train_v.iloc[:,1024:-1]
test_video = X_test_v.iloc[:, :1024]
test_audio = X_test_v.iloc[:, 1024:-1]
val_video = X_val_v.iloc[:, :1024]
val_audio = X_val_v.iloc[:, 1024:-1]

In [9]:
# fit the model
nn_model.fit(x=[train_audio, train_video], y=np.array(y_train_v).reshape((-1,1)),class_weight={0:.1, 1:.9},
             validation_data=([val_audio,val_video], np.array(y_val_v).reshape((-1,1))))



<keras.callbacks.History at 0x152021799460>

In [10]:
nn_model.evaluate(x=[test_audio, test_video], y=np.array(y_test_v))



[0.6391013860702515, 0.5847590565681458]

In [12]:
raw_preds = nn_model.predict([test_audio, test_video])



In [13]:
preds = []
for pair in raw_preds:
    preds.append(np.argmax(pair))

In [15]:
pred_count = 0
correct_count = 0
for idx, val in enumerate(preds):
    if val == 1:
        pred_count += 1
    if y_test_v[idx] == 1:
        correct_count += 1
pred_count, correct_count

(0, 49)

In [17]:
precision_recall_fscore_support(y_test_v, preds)

  _warn_prf(average, modifier, msg_start, len(result))


(array([0.91566265, 0.        ]),
 array([1., 0.]),
 array([0.95597484, 0.        ]),
 array([532,  49]))

In [18]:
roc_auc_score(y_test_v, preds)

0.5

In [19]:
train_df = train_df.rename(columns= {'1152':"y"})

hunting_df = train_df.loc[train_df.y == 1.0]
non_hunting_df = train_df.loc[train_df.y != 1.0]

hunting_upsampled = resample(hunting_df, n_samples = len(non_hunting_df))

new_train_df = pd.concat([non_hunting_df, hunting_upsampled])
# shuffle the dataframe
new_train_df = new_train_df.sample(frac=1)

In [21]:
balanced_nn = keras.Model(inputs=[audio_input, video_input],outputs=[nn_output])
balanced_nn.set_weights(nn_weights)
balanced_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])

In [22]:
X_train_video = new_train_df.iloc[:,:1024]
X_train_audio = new_train_df.iloc[:, 1025:-1]
X_train_y = new_train_df.y

In [23]:
balanced_nn.fit(x=[train_audio, train_video], y=np.array(y_train_v),validation_data=([val_audio,val_video], np.array(y_val_v)))



<keras.callbacks.History at 0x15201f964640>

In [25]:
raw_preds2 = nn_model.predict([test_audio, test_video])
preds2 = []
for pair in raw_preds2:
    preds2.append(np.argmax(pair))
    
pred_count2 = 0
correct_count2 = 0
for idx, val in enumerate(preds2):
    if val == 1:
        pred_count2 += 1
    if y_test_v[idx] == 1:
        correct_count2 += 1
pred_count2, correct_count2



(0, 49)

In [26]:
precision_recall_fscore_support(y_test_v, preds2)

  _warn_prf(average, modifier, msg_start, len(result))


(array([0.91566265, 0.        ]),
 array([1., 0.]),
 array([0.95597484, 0.        ]),
 array([532,  49]))

In [27]:
roc_auc_score(y_test_v, preds2)

0.5