In [1]:
# https://github.com/supriya-gdptl/kaggle-youtube8m/blob/master/video_level_models.py

This notebook serves to test the CNN architecture and see how resolving class imbalance through resampling fares.

### Imports / Data

In [2]:
# imports
import tensorflow as tf
import os
import pickle
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import json
from path import Path
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import roc_auc_score
from sklearn.utils import resample
import keras

2023-04-17 13:03:19.366867: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-17 13:03:19.408123: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-17 13:03:19.414009: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


You will need to set the base path here

In [3]:
# set the base path
base_path = Path('/nfs/turbo/seas-nhcarter/human_wildlife_interactions/classifier_video_data')

In [4]:
# read in the matrices
train_path = Path(base_path / "train_mat.csv")
test_path = Path(base_path / "test_mat.csv")
val_path = Path(base_path / "val_mat.csv")
train_df = pd.read_csv(train_path) 
test_df = pd.read_csv(test_path)
val_df = pd.read_csv(val_path)

### Network Design

In [5]:
# simple matmul CNN approach with class weights based off of Supriya Gadi Patil's CNN implementation
# https://github.com/supriya-gdptl/kaggle-youtube8m
l2_reg = .00000001
# define inputs
input_1 = keras.Input(shape=(1024,))
input_2 = keras.Input(shape=(128,))

# reduce using fully connected layer
videoNN = keras.layers.Dense(32, activation=tf.nn.leaky_relu,kernel_regularizer=keras.regularizers.l2(l2_reg))(input_1)
audioNN = keras.layers.Dense(32, activation=tf.nn.leaky_relu,kernel_regularizer=keras.regularizers.l2(l2_reg))(input_2)

# adjust shape to make everything (32x1) instead of (32,)
video_dim = tf.expand_dims(videoNN, -1)
audio_dim = tf.expand_dims(audioNN, -1)

# transpose audio to enable matmul operation
audio_dim = tf.transpose(audio_dim, perm=[0,2,1])

# matmul to produce 32x32 result
matrix = tf.matmul(video_dim, audio_dim)

# need another empty dimension for CNN to work
matrix = tf.expand_dims(matrix, -1)

# 2 layer CNN with single pooling layer
convolution_1 = keras.layers.Conv2D(filters=8, kernel_size=[3,3])(matrix)
average_pool = keras.layers.AveragePooling2D(pool_size=2, strides=2)(convolution_1)
convolution_2 = keras.layers.Conv2D(filters=4, kernel_size=[3,3])(average_pool)

# flatten output layer
flattening = keras.layers.Flatten()(convolution_2)

# output layer
output = keras.layers.Dense(1, activation=tf.nn.sigmoid, kernel_regularizer=keras.regularizers.l2(l2_reg))(flattening)

# build the graph
cnn_model = keras.Model(inputs=[input_1,input_2], outputs=[output])
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])
initial_weights = cnn_model.get_weights()

### Fit the model - normal data

In [6]:
X_train_v = train_df.iloc[:,:-1]
y_train_v = train_df.iloc[:,-1]
X_val_v = val_df.iloc[:,:-1]
y_val_v = val_df.iloc[:,-1]
X_test_v = test_df.iloc[:,:-1]
y_test_v = test_df.iloc[:,-1]

In [7]:
train_video = X_train_v.iloc[:, :1024]
train_audio = X_train_v.iloc[:,1024:-1]
test_video = X_test_v.iloc[:, :1024]
test_audio = X_test_v.iloc[:, 1024:-1]
val_video = X_val_v.iloc[:, :1024]
val_audio = X_val_v.iloc[:, 1024:-1]

In [8]:
cnn_model.fit(x=[train_video, train_audio], y=y_train_v, class_weight={0: .1, 1: .9},validation_data=([val_video, val_audio], y_val_v))



<keras.callbacks.History at 0x14b7f47c3c40>

In [9]:
cnn_model.evaluate(x=[test_video, test_audio], y=np.array(y_test_v))



[0.7552503943443298, 0.5898419618606567]

In [10]:
preds = cnn_model.predict(x=[test_video, test_audio])



In [11]:
# turn the probabilities into actual predictions
preds_translated = []
for pred in preds:
    preds_translated.append(np.argmax(pred))

In [12]:
# get the score information
precision_recall_fscore_support(y_test_v, preds_translated)

  _warn_prf(average, modifier, msg_start, len(result))


(array([0.91566265, 0.        ]),
 array([1., 0.]),
 array([0.95597484, 0.        ]),
 array([532,  49]))

In [13]:
# get the roc / auc score
roc_auc_score(y_test_v, preds_translated)

0.5

### "Balance" the dataset

In [14]:
# resample the imbalanced class
train_df = train_df.rename(columns= {'1152':"y"})
hunting_df = train_df.loc[train_df.y == 1.0]
non_hunting_df = train_df.loc[train_df.y != 1.0]
hunting_upsampled = resample(hunting_df, n_samples = len(non_hunting_df))
hunting_upsampled.shape

(2559, 1154)

In [15]:
# combine the dataframes
new_train_df = pd.concat([non_hunting_df, hunting_upsampled])
# shuffle the dataframe
new_train_df = new_train_df.sample(frac=1)

In [16]:
# train the new model
balanced_cnn = tf.keras.Model(inputs=[input_1,input_2], outputs=[output])
balanced_cnn.set_weights(initial_weights)
balanced_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.AUC()])

In [17]:
X_train_video = new_train_df.iloc[:,:1024]
X_train_audio = new_train_df.iloc[:, 1025:-1]
X_train_y = new_train_df.y
balanced_cnn.fit(x=[X_train_video, X_train_audio], y=X_train_y ,class_weight={0: 0.1, 1: 0.9}, validation_data=([val_video, val_audio], np.array(y_val_v)))



<keras.callbacks.History at 0x14b7dd4a0250>

In [18]:
cnn_model.evaluate(x=[test_video, test_audio], y=np.array(y_test_v).reshape((-1,1)))



[1.242297887802124, 0.5673431158065796]

In [19]:
preds = cnn_model.predict(x=[test_video, test_audio])
# turn the probabilities into actual predictions
preds_translated = []
for pred in preds:
    preds_translated.append(np.argmax(pred))
# get the score information
precision_recall_fscore_support(y_test_v, preds_translated)



  _warn_prf(average, modifier, msg_start, len(result))


(array([0.91566265, 0.        ]),
 array([1., 0.]),
 array([0.95597484, 0.        ]),
 array([532,  49]))

In [20]:
# get the roc / auc score
roc_auc_score(y_test_v, preds_translated)

0.5