### Friends canned laughter identification

In this notebook, we will try to use data from available friends
episodes to try to train a model using VGGish

In [None]:
import sys
sys.path.append('../utils/')

In [None]:
# local imports
import utils
import episode
import color
import stats
import modelbuilder
# stdlib and package imports
import numpy as np
from pathlib import Path 
from matplotlib import pyplot as plt
from collections import Counter
# keras and ML imports
from keras.models import Sequential, Model, model_from_yaml
from keras.callbacks import ModelCheckpoint
from keras.layers import Input, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize as sknormalize
from sklearn.metrics import confusion_matrix
from imblearn import under_sampling

#### Extracting data segments
Here we will extract labelled embeddings of wav data for each of the episodes

In [None]:
# these are the episodes we have annotation data for
episodes = ['friends-s02-e{:0>2d}'.format(i) for i in range(1, 5)] + ['friends-s03-e09']

In [None]:
# use VGGish to generate embeddings for each of the episode, and split data into chunks of 0.96s (no preserve_length)
# see if archive exists so the expensive method is run only if not run already
X_raw, Y_raw, refs = episode.get_data(which_episodes=episodes, use_vggish=True, preserve_length=False)

In [None]:
X_raw.shape, Y_raw.shape, refs.shape

In [None]:
Y_raw[-4], refs[-4]

#### Now we'll use the extracted data to generate balanced training and testing data sets

First, resample data to have equal number of 'laugh' and 'no-laugh' examples

In [None]:
rus = under_sampling.RandomUnderSampler(sampling_strategy='not minority')
X_res, Y_res = rus.fit_resample(X_raw, Y_raw)

In [None]:
# did the resampling work?
Counter(Y_res)

Next, split data into training and testing sets so it doesn't get mixed up

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_res, Y_res, test_size=0.25, random_state=1,
                                                    stratify=Y_res.reshape(-1))

In [None]:
# check again: did the split go as expected?
Counter(Y_train), Counter(Y_test)

#### Now we'll attempt to model the balanced data using a Keras dense model

In [None]:
checkpoint = ModelCheckpoint(filepath='task:{task}-ckpt.hdf5'.format(task='all-mixed-together'), save_best_only=True)
model = modelbuilder.build_laugh_model(optimizer='rmsprop')

In [None]:
# train model
H = model.fit(X_train, Y_train.reshape(-1), epochs=50, validation_data=[X_test, Y_test.reshape(-1)],
              callbacks=[checkpoint])

In [None]:
model.load_weights(filepath='task:{task}-ckpt.hdf5'.format(task='all-mixed-together'))
model = modelbuilder._compile_binary(model)

In [None]:
# plot training history
stats.plot_history(H)

In [None]:
# plot the ROC curve for this model and data
stats.plot_roc_curve(model=model, x=X_test, y_true=Y_test.reshape(-1))

### Per episode train/valid split (Taylor)

The above code is cheating a bit by taking one laugh incident and allowing
adjacent chunks to be in both the training and validation sets. It's easy
to fix this though by constructing the training and testing sets from entirely
different episodes. This also guards against the case that laughter is different
in some episodes compared to others.

Oddly enough, the results seem to perform worse on the training data but just as
well on the validation set. Though these results seem to depend a lot on how long
the model is trained for... Hopefully more tuning will fix this. Will have to think
about why that is the case, though.

In [None]:
eps_num = [int(x[13:15]) for x, _, _ in refs]
train_flag = np.array([x in [1, 2, 3] for x in eps_num])
print(Counter(train_flag))

X_raw_train = X_raw[train_flag,]
X_raw_valid = X_raw[~train_flag,]
Y_raw_train = Y_raw[train_flag,]
Y_raw_valid = Y_raw[~train_flag,]

rus = under_sampling.RandomUnderSampler(sampling_strategy='not minority')
X_train, Y_train = rus.fit_resample(X_raw_train, Y_raw_train)
X_valid, Y_valid = rus.fit_resample(X_raw_valid, Y_raw_valid)

print(Counter(Y_train))
print(Counter(Y_valid))

In [None]:
checkpoint = ModelCheckpoint(filepath='task:{task}-ckpt.hdf5'.format(task='per-episode-split'), save_best_only=True)
model = modelbuilder.build_laugh_model(optimizer='rmsprop')

In [None]:
H = model.fit(X_train, Y_train.reshape(-1), epochs=50, validation_data=[X_valid, Y_valid.reshape(-1)],
             callbacks=[checkpoint])

In [None]:
model.load_weights(filepath='task:{task}-ckpt.hdf5'.format(task='per-episode-split'))
model = modelbuilder._compile_binary(model)

In [None]:
Y_pred = model.predict(X_valid) > 0.5
confusion_matrix(Y_valid, Y_pred)

In [None]:
stats.plot_history(H)

In [None]:
stats.plot_roc_curve(model=model, x=X_raw_valid, y_true=Y_raw_valid)

### Per season train/valid split

In the code block below, we'll try to train our model on data from one season,
and validate it on data from a whole different season.

In [None]:
szn_num = [int(x[9:11]) for x, _, _ in refs]
train_flag = np.array([x in [2] for x in szn_num])
print(Counter(train_flag))

X_raw_train = X_raw[train_flag,]
X_raw_valid = X_raw[~train_flag,]
Y_raw_train = Y_raw[train_flag,]
Y_raw_valid = Y_raw[~train_flag,]

rus = under_sampling.RandomUnderSampler(sampling_strategy='not minority')
X_train, Y_train = rus.fit_resample(X_raw_train, Y_raw_train)
X_valid, Y_valid = rus.fit_resample(X_raw_valid, Y_raw_valid)

print(Counter(Y_train))
print(Counter(Y_valid))

In [None]:
# inp = Input(shape=(128,), name='in0')

# layer = Dense(16, activation='relu', name='d0')(inp)
# layer = Dropout(.4, name='dr0')(layer)

# layer = Dense(8, activation='relu', name='d1')(layer)
# layer = Dropout(.4, name='dr1')(layer)

# layer = Dense(1, activation='sigmoid', name='out')(layer)

# model = Model(inputs=[inp], outputs=[layer])
# model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['binary_accuracy'])

# model.summary()

checkpoint = ModelCheckpoint(filepath='task:{task}-ckpt.hdf5'.format(task='per-season-split'), save_best_only=True)
model = modelbuilder.build_laugh_model(optimizer='rmsprop')

In [None]:
H = model.fit(X_train, Y_train.reshape(-1), epochs=50, validation_data=[X_valid, Y_valid.reshape(-1)],
              callbacks=[checkpoint])

In [None]:
model.load_weights(filepath='task:{task}-ckpt.hdf5'.format(task='per-season-split'))
model = modelbuilder._compile_binary(model)

In [None]:
Y_pred = model.predict(X_valid) > 0.5
confusion_matrix(Y_valid, Y_pred)

In [None]:
stats.plot_history(H)

In [None]:
fpr, tpr, thr = stats.plot_roc_curve(model=model, x=X_raw_valid, y_true=Y_raw_valid)

### Mark an entire episode with labels

In [None]:
decoded, preds = episode.detect_in_episode(episode='friends-s03-e09', model=model, precision=2, algorithms=['threshold'],
                                           params=dict(n=3, t=.6))

In [None]:
probs = episode._binary_probs_to_multiclass(preds)

In [None]:
np.average(probs[10:20], axis=0), probs[10:20]

In [None]:
preds[:16]

In [None]:
[*zip(*[[(key,val) for val in decoded[key]] for key in decoded])]

In [None]:
episode.load_annotations(episode='friends-s03-e09')