### Canned laughter identification using pre-trained models

In this notebook, we will use model(s) trained on friends laughter data
to detect laughter in any media file

In [None]:
import sys
sys.path.append('../utils/')

In [None]:
# local imports
import utils
import episode
import color
import stats
import modelbuilder
# stdlib and package imports
import numpy as np
import pandas as pd
from pathlib import Path 
from matplotlib import pyplot as plt
from collections import Counter
# keras and ML imports
from keras.models import Sequential, Model, model_from_yaml
from keras.callbacks import ModelCheckpoint
from keras.layers import Input, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize as sknormalize
from sklearn.metrics import confusion_matrix
from imblearn import under_sampling

### Load an existing model

In [None]:
model = modelbuilder.build_laugh_model()
model.load_weights(filepath='task:per-season-split-ckpt.hdf5')
model = modelbuilder._compile_binary(model)

### Mark an entire episode with labels

In [None]:
decoded, preds = episode.detect_in_episode(episode='friends-s03-e09', model=model, precision=2,
                                           algorithms=['threshold', 'hmm'], params=dict(n=3, t=.6))

In [None]:
df = pd.DataFrame(decoded)

In [None]:
annot = episode.load_annotations(episode='friends-s03-e09')['laughter']

In [None]:
def in_any_patch(time, annot=annot, precision=2):
    for s, e in annot:
        if s <= time < e or s <= (time+.96e3/precision) < e:
            
            if s <= time < e and s <= (time+.96e3/precision) < e:
                return 1.
            elif time < s and s <= time+.96e3/precision < e:
                return (time+.96e3/precision-s) / (.96e3/precision)
            else:
                return (e-time) / (.96e3/precision)
            
    else:
        return 0.

In [None]:
df['raw_preds'] = preds

In [None]:
df['y_true_annot'] = df['timestamp'].apply(lambda t: in_any_patch(t))
# df['positive_label'] = df[df['threshold'] == 1]['timestamp'].apply(lambda x: in_any_patch(x))
# df['negative_label'] = df[df['threshold'] == 0]['timestamp'].apply(lambda x: not in_any_patch(x))

In [None]:
df[2900:]

In [None]:
# do these algorithms even produce their own result (False = YES, they produce something different)
all(df['hmm'] == df['raw_preds']), all(df['threshold'] == df['raw_preds'])

In [None]:
for w in range(20):
    s = slice(w*200, (w+1)*200)
    plt.figure(figsize=(18, 7))
    plt.plot(df['timestamp'][s]/1e3, df['raw_preds'][s], 'bo-',
             df['timestamp'][s]/1e3, df['y_true_annot'][s], 'ro-',
             df['timestamp'][s]/1e3, df['hmm'][s]/.9, 'g.',
             df['timestamp'][s]/1e3, (df['raw_preds'][s]>.5)/.95, 'c.')

    plt.legend(['raw_preds', 'y_true', 'hmm_smoothing\n(0 or 1)', 'pred>.5? (0 or 1)'],
               loc='center left', bbox_to_anchor=(1, 0.5))
    plt.title('visualization of predictions in window {} of width 100sec in s03e09'.format(w))
    plt.show()