# CNN Video Sentiment Analysis
This notebook will walk through the process of training a CNN to analyse frames of videos, and then take the aggregate over frames in a video.

In [1]:
import matplotlib
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import SGD
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import random
import matplotlib.pyplot as plt
import numpy as np
import argparse
import pickle
import cv2
import os
import utils.data
import utils.model

In [2]:
import wandb
from wandb.keras import WandbCallback
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcharlieisalright[0m (use `wandb login --relogin` to force relogin)


True

In [3]:
# One of ravdess, ravdess-faces
dataset = "fer+"

# One of RN18-FER+, RN18-MS, or RN50
EXPERIMENT = "RN18-FER+"

We use transfer learning, on top of the ResNet CNN, using frames extracted from our videos to get the specific model. In this case, we are training using data at 720p.

In [4]:
if EXPERIMENT in ['RN18-FER+', 'RN18-MS']:
    channels_first = True
else:
    channels_first = False
    
trainX, valX, testX, trainY, valY, testY, lb = utils.data.load_img_dataset(dataset, channels_first)


# Randomly change the train set so results are more generalizable
if EXPERIMENT in ['RN18-FER+', 'RN18-MS']:
    data_format = 'channels_first'
    train_augmentation = ImageDataGenerator(
        horizontal_flip=True,
        fill_mode="nearest",
        data_format=data_format)
else:
    data_format = 'channels_last'
    train_augmentation = ImageDataGenerator(
        rotation_range=30,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest",
        data_format=data_format)

val_augmentation = ImageDataGenerator(data_format=data_format)
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
train_augmentation.mean = mean
val_augmentation.mean = mean

In [5]:
print(len(trainX))

25238


In [7]:
print(len(trainY))

25238


In [8]:
def get_model():
    return utils.model.get_model(EXPERIMENT, len(lb.classes_))

In [9]:
def train():
    # default hyperparameters
    config_defaults = {
        'batch_size' : 31,
        'learning_rate' : 0.0008475,
        'epochs': 49,
        'momentum' : 0.9,
        'decay': 1e-4
    }

    wandb.init(project='sentiment', entity='charlieisalright', config=config_defaults)
    config = wandb.config
    
    config.architecture_name = EXPERIMENT
    config.dataset = dataset
    
    # Compile the model, using stochastic gradient descent optimization.
    opt = SGD(lr=config.learning_rate, momentum=config.momentum, decay=config.decay / config.epochs)
    model = get_model()
    model.compile(loss="categorical_crossentropy", optimizer=opt,
        metrics=["accuracy"])

    # Now we can start training!
    H = model.fit(
        x = train_augmentation.flow(trainX, trainY, batch_size=config.batch_size),
        steps_per_epoch = len(trainX) // config.batch_size,
        validation_data = val_augmentation.flow(valX, valY),
        validation_steps = len(valX) // config.batch_size,
        epochs = config.epochs,
        callbacks = [WandbCallback()]
    )
    
    return model


Next, we setup a sweep of hyperparameters.

In [None]:
model = train()

  warn("The `IPython.html` package has been deprecated since IPython 4.0. "




In [None]:
sweep_config = {
    "method": "bayes",
    "metric": {
        "name": "val_loss",
        "goal": "minimize"
    },
    "parameters":{
        "epochs": {
            "distribution": "int_uniform",
            "min": 13,
            "max": 50
        },
        "batch_size": {
            "distribution": "int_uniform",
            "min": 4,
            "max": 64
        },
        "learning_rate": {
            "distribution": "uniform",
            "min": 0.00001,
            "max": 0.01
        }
    }
}
sweep_id = 'ocuhuuax' #wandb.sweep(sweep_config, project='sentiment')
wandb.agent(sweep_id, project='sentiment', function=train)

Finally, evaluate the network, and plot some results.

In [None]:
model = load_model("../models/best-models/resnet50-face.h5")

predictions = model.predict(x=testX.astype("float32"), batch_size=32)
print(classification_report(testY.argmax(axis=1),
	predictions.argmax(axis=1), target_names=lb.classes_))


In [None]:
from sklearn.metrics import confusion_matrix
import itertools

matrix = confusion_matrix(testY.argmax(axis=1), predictions.argmax(axis=1), normalize='true')

plt.imshow(matrix, interpolation="nearest")

target_names = lb.classes_
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)

thresh = matrix.max() / 1.5
for i, j in itertools.product(range(matrix.shape[0]), range(matrix.shape[1])):
    plt.text(j, i, "{:0.2f}".format(matrix[i, j]),
             horizontalalignment="center",
             color="white" if matrix[i, j] < thresh else "black")

## NOTES
This works but provides accuracy of only about 0.3 ish. Next thing to try is using a face details model first!!!

In [None]:
# Save model 

model.save("../models/best-models/resnet50-direct")

In [None]:
PLOT = False
if PLOT:
    from tensorflow import keras
    keras.utils.plot_model(get_RN50_model(), show_shapes=True)


In [None]:
DEBUG = False
if DEBUG:
    model = get_model()
    model.predict(np.array(data[0:2]))