### Friends canned laughter identification

In this notebook, we will look at some of the annotated
data from Friends and visualize it in basic ways like
plotting histograms and computing basic stats about the data.

#### Preamble: imports

In [None]:
import sys
sys.path.append('../utils/')

In [None]:
# local imports
import utils
import episode
# stdlib and package imports
import numpy as np
from matplotlib import pyplot as plt

#### Load annotations

In [None]:
# make a list to store all annotations that will be loaded
annotations = []

In [None]:
# annotations are currently available for episodes 1--4
for epnum in range(1, 5):
    epname = 'friends-s02-e{:0>2d}'.format(epnum)
    this_annotations = episode.load_annotations(epname)
    annotations += this_annotations['laughter'] # pick only the annotations tagged 'laughter'

#### Plot histograms

1. Histogram of durations of laugh track patches
2. Histogram of durations of non-laughter patches
3. Overview of laugh track patches over the length of the episode

In [None]:
# a duration is simply the difference between end and start times for a patch
durations = [(end-start)/1e3 for start, end in annotations]
plt.hist(durations, bins=int(len(annotations)**.6), color='cyan')
plt.show()

In [None]:
# a patch of non-laughter is the difference between the next start time and the previous end time
nonpatches = [(s2-e1)/1e3 for (s1, e1), (s2, e2) in zip(annotations, annotations[1:]) if s2-e1 >= 0]
plt.hist(nonpatches, bins=int(len(nonpatches)**.6), color='orange')
plt.show()

In [None]:
# in this cell we sample times and plot if at that point there's canned laughter or not
# for each episode, plot '1' when the time is in a laughter patch, '0' when not
def get_patch_plot_data(annotations):
    mintime = annotations[0][0]
    maxtime = annotations[-1][1]
    xvals = [[], []] # the ones
    yvals = [[], []] # the zeros
    for time in np.arange(mintime, maxtime, (maxtime-mintime)/5e2): # split into 500 equally spaced out sample points
        neareststart = min(annotations, key=lambda patch: time - patch[0] + maxtime*int(time < patch[0]))
        nearestend = min(annotations, key=lambda patch: time - patch[1] + maxtime*int(time < patch[1]))
        if nearestend < neareststart: # happens when 'time' is inside a patch, so nearest end must be before nearest start
            xvals[1] += [time/1e3]
            yvals[1] += [1]
        else:
            xvals[0] += [time/1e3]
            yvals[0] += [0]
    return xvals, yvals, mintime, maxtime

In [None]:
for epname in ['friends-s02-e{:0>2d}'.format(i) for i in range(1, 5)] + ['friends-s03-e09']:
    
    this_annotations = episode.load_annotations(epname)['laughter']
    xvals, yvals, mintime, maxtime = get_patch_plot_data(this_annotations)
    
    plt.figure(figsize=(16,.8))
    plt.plot(xvals[1], yvals[1], 'b.', label='canned laughter')
    plt.plot(xvals[0], yvals[0], 'r.', label='no canned laughs')
    plt.axis([None, maxtime/1e3*1.2, -6, 6])
    plt.legend()
    plt.title(epname)
    
plt.show()