Modules needed for this script (trying to keep as minimal as possible).

In [None]:
import numpy as np
import sklearn.linear_model
from matplotlib import pyplot as plt
from scipy.io import wavfile
from scipy import signal

import yaml

Load in the audio data and extract a spectrogram of the entire episode.

In [None]:
sr, wav = wavfile.read("../wav/friends-s03-e09.wav")
f, t, Sxx = signal.spectrogram(wav, sr, nperseg=256)

Load the annotated laughted data as a numpy array (two columns;
start and stop time in milliseconds). Create an array y_vals that
give whether an intervals contains laughter (1) or not (0).

In [None]:
with open("../data/friends-s03-e09_laughter.yml", 'r') as file:
    laughter = np.array([[x, y] for x, y, _ in yaml.load(file)])
    
y_vals = np.zeros(t.shape)

for interval in laughter / 1000:
    y_vals = y_vals + np.int8((t >= interval[0]) & (t <= interval[1]))
    
assert np.max(y_vals) == 1

Construct training data from the spectrogram and hand-labeled episode of laughter. Right
now, using first half of the episode to train and second half to test.

In [None]:
X = Sxx.transpose()
y = y_vals

X_train = X[1:int(X.shape[0] / 2), :]
y_train = y[1:int(X.shape[0] / 2)]
X_valid = X[int(X.shape[0] / 2):X.shape[0], :]
y_valid = y[int(X.shape[0] / 2):X.shape[0]]

Built an l1-penalized linear regression model with the training data. Report some
common metrics on the output.

In [None]:
model = sklearn.linear_model.LogisticRegression(penalty="l1", C=0.01, solver='liblinear')
model = model.fit(X_train[:, :], y_train)
yhat_logit = model.predict_proba(X[:, :])[:,1]

print("Model accuracy: {0:f}".format(model.score(X_valid[:, :], y_valid)))
print("Proportion of 1's in validation data: {0:f}".format(np.mean(y_valid)))
print("Proportion of 1's in validation pred: {0:f}".format(np.mean(model.predict(X_valid[:, :]))))

Create a function to plot the model output for a specific period of time in the show over
the spectrogram.

In [None]:
def plot_predictions(start, end, yhat, times, Sxx, f, size=1):
    """Plot predictions over spectrogram
    
    Args:
        start: Start time in seconds
        end: End time in seconds
        yhat: Array of predicted probabilities.
        times: Array of the time codes (ms).
        Sxx: Spectrogram to plot.
        f: Array of frequency codes.
        size: Size of the smoothing window; set to 1 (default) for no smoothing.
                    
    Output:
        Produces a plot with the spectrogram, the predicted probabilities
        give as a red line, and the annotated laughter as a blue line. The
        probability scale sets a probability of 1 to 20000 Hz.
    """

    # construct the predicted values
    tc = [start, end] * 1000 
    index = np.argwhere((times > tc[0]) & (times <= tc[1]))
    weights = [1 / size] * size
    yhat_logit_smooth = np.convolve(yhat, np.array(weights)[::-1], 'same')
    
    # produce the plot
    fig = plt.figure(figsize=(15,8))
    plt.pcolormesh(times[index][:, 0], f, np.log10(1+Sxx[:, index][:, :, 0]))
    plt.plot(times[index][:, 0], yhat_logit_smooth[index][:, 0] * 20000, 'r--')
    plt.plot(times[index][:, 0], y[index][:, 0] * 20000, 'b--')
    plt.ylabel('log Frequency [Hz]')
    plt.xlabel('Time [sec]')

Modify the function here to look at the model's performance.

In [None]:
plot_predictions(300, 320, yhat_logit, t, Sxx, f, size=30)