# Training workflow

In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import sklearn
import os
import copy

from IPython.display import Video

## Constants

In [None]:
CUDA = torch.cuda.is_available()
RANDOM_SEED = 1
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
NUM_EPOCHS = 15

print(f"CUDA: {CUDA}")

In [None]:
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

## Load Data

Extracted features, scores, metadata, etc.

In [None]:
data = pd.read_csv("training_set/scores_v2.csv").set_index("video_id")

In [None]:
from features.video import load_C3D_features
from features.image import load_ResNet152_features, load_LBP_features, load_HOG_features
from features.audio import load_VGGish_features
from features.emotion import load_Emotion_features, extract_emotions

In [None]:
data["emotion"] = load_Emotion_features(data.index)

In [None]:
data["lbp"] = load_LBP_features(data.index)

In [None]:
data["hog"] = load_HOG_features(data.index)

In [None]:
data["c3d"] = load_C3D_features(data.index)

In [None]:
data["resnet152"] = load_ResNet152_features(data.index)

In [None]:
data["vggish"] = load_VGGish_features(data.index)

## Target Prep

Khosla points out that the memorability score used in Isola's paper did not take into account the memory retention duration.
Cohendet utilized the same idea as Khosla, which involved a decay rate

$$\alpha \leftarrow 
\frac{\sum^N_{i=1}\frac{1}{n^{(i)}} \sum^{n^{(i)}}_{j=1} \log(\frac{t^{(i)}_j}{T})[x^{(i)}_j - m^{(i)}_T] }
{\sum^N_{i=1}\frac{1}{n^{(i)}} \sum^{n^{(i)}}_{j=1}[ \log(\frac{t^{(i)}_j}{T})]^2}
$$

to calculate memorability 
$$
m_T^{(i)} \leftarrow
\frac{1}{n^{(i)}} \sum^{n^{(i)}}_{j=1}[x^{(i)}_j - \alpha \log(\frac{t_j^{(i)}}{T})]
$$

where we have $n^{(i)}$ observations for image $i$ given by $x^{(i)} \in {0,1}$ and $t^{(i)}_j$ where $x_j=1$  implies that the image repeat was correctly detected when it shown after time $t_j$

IDEA: potentially explore calculating $\alpha$ per user

In [None]:
from target_augmentation import add_position_delta, calculate_alpha_and_memorability

In [None]:
annotations = pd.read_csv("training_set/short_term_annotations_v2.csv")

In [None]:
add_position_delta(annotations)

print("Average t:", np.mean(annotations["t"]))

# We use approximately the average_t to calculate T as the memorability in question
big_t = int(np.around(np.mean(annotations["t"])))
label = f"m_{big_t}"
print(f"Calculating adjusted value for {label}")
alpha, data[label] = calculate_alpha_and_memorability(annotations, T = big_t)
print(f"Alpha: {alpha}")
plt.scatter(data["part_1_scores"], data[f"m_{big_t}"])
plt.xlabel("Original memorability score")
plt.ylabel(f"Adjusted memorability score ({label})")
plt.show()

## Data Prep

Building datasets

potentially weighting samples based on annotations?



In [None]:
data.head().iloc[0]["lbp"].shape

In [None]:
from train import split_training, build_matrixes

In [None]:
train_data, valid_data = split_training(data)
print("training:",len(train_data))
print("validation:", len(valid_data))

#### Pick Features and Target

In [None]:
target = "m_75"
features = "vggish"

In [None]:
features_train, targets_train, video_ids_train = build_matrixes(train_data, target_name = target, feature_name = features)
features_valid, targets_valid, video_ids_valid = build_matrixes(valid_data, target_name = target, feature_name = features)

print("features_train shape:", features_train.shape)
print("targets_train shape:", targets_train.shape)
print("features_valid shape:", features_valid.shape)
print("features_valid shape:", targets_valid.shape)
total_features = len(features_train) + len(features_valid)
unique, count = np.unique(np.concatenate([video_ids_valid, video_ids_train]), return_counts=True)
print("Total features:", total_features)
print("Total videos:", len(unique))
print("Avg features per video:", total_features / len(unique))
print("Min features per video:", min(count))
print("Max features per video:", max(count))

## Model training

In [None]:
from train import train_two_layer_nn, train_svr

def train_model(model_type, features_train, targets_train, features_valid, targets_valid):
    if "two_layer_nn" == model_type:
        model, train_losses, valid_losses = train_two_layer_nn(
            features_train, targets_train, features_valid, targets_valid,
            hidden_dim = 100,
            num_epochs=NUM_EPOCHS,
            cuda=CUDA,
            batch_size=BATCH_SIZE,
            learning_rate=LEARNING_RATE
        )

        plt.plot(train_losses, label="Training Loss")
        plt.plot(valid_losses, label="Validation Loss")
        plt.legend()
        plt.show()

    elif "svr" == model_type:
        model = train_svr(features_train, targets_train)
    else:
        raise ValueError(f"'{model_type}' is not a valid model type")
    return model
        

In [None]:
model_type = "two_layer_nn"

model = train_model(model_type, features_train, targets_train, features_valid, targets_valid)

## Test

Spearman's rank correlation, ROC curves, etc

In [None]:
from train import get_predictions

predictions, actuals = get_predictions(model_type, model, features_valid, targets_valid, video_ids_valid)

In [None]:
len(predictions)

In [None]:
spearman_rank, _ = stats.spearmanr(actuals, predictions)
print(spearman_rank)

In [None]:
spearman_rank, _ = stats.spearmanr(actuals, predictions)
print("SPEARMAN RANK:",spearman_rank)

fig, ax = plt.subplots(1, figsize=(8,8))
min_mem = min(np.min(actuals), np.min(predictions))
max_mem = max(np.max(actuals), np.max(predictions))
plt.scatter(actuals, predictions, label = f"Spearman rank correlation = {spearman_rank}")
plt.plot([min_mem, max_mem], [min_mem, max_mem], label="1 to 1")
plt.title(f"actual {target} vs predicted {target}")
plt.legend()
plt.ylabel("predictions")
plt.xlabel("actual")
plt.show()