In [66]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12,8)
import numpy as np
import pandas as pd

### For Traditional ML, the list of ten 1ms frames is flattened into one array before feeding into the classifier.

In [21]:
# Modified from data_generator used in Keras models
from tensorflow.keras.preprocessing.sequence import pad_sequences

def data_generator(batch_size, tfrecord, start_frac=0, end_frac=1):
    '''
    Shuffles the Audioset training data and returns a generator of training data and boolean laughter labels
    batch_size: batch size for each set of training data and labels
    tfrecord: filestring of the tfrecord file to train on
    start_frac: the starting point of the data set to use, as a fraction of total record length (used for CV)
    end_frac: the ending point of the data set to use, as a fraction of total record length (used for CV)
    '''
    max_len=10
    records = list(tf.compat.v1.python_io.tf_record_iterator(tfrecord))
    records = records[int(start_frac*len(records)):int(end_frac*len(records))]
    rec_len = len(records)
    shuffle = np.random.permutation(range(rec_len))
    num_batches = rec_len//batch_size - 1
    j = 0

    laugh_labels = [66, 67] # changed to labels for cheer and applause
    while True:
        X = []
        y = []
        for idx in shuffle[j*batch_size:(j+1)*batch_size]:
            example = records[idx]
            tf_seq_example = tf.train.SequenceExample.FromString(example)
            example_label = list(np.asarray(tf_seq_example.context.feature['labels'].int64_list.value))
            laugh_bin = any((True for x in example_label if x in laugh_labels))
            y.append(laugh_bin)

            n_frames = len(tf_seq_example.feature_lists.feature_list['audio_embedding'].feature)
            audio_frame = []
            for i in range(n_frames):
                audio_frame.append(np.frombuffer(tf_seq_example.feature_lists.feature_list['audio_embedding'].
                                                         feature[i].bytes_list.value[0],np.uint8).astype(np.float32))
            pad = [np.zeros([128], np.float32) for i in range(max_len-n_frames)]
            audio_frame += pad
            X.append(np.array(audio_frame).flatten()) # flattened the ten 1ms frames into one 10ms frame

        j += 1
        if j >= num_batches:
            shuffle = np.random.permutation(range(rec_len))
            j = 0

        X = np.array(X)
        return X, np.array(y)

## Trying out LinearSVC

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report

In [64]:
batch_size = 12658 # Length of all the samples
X, y = data_generator(batch_size,'unbal_laugh_speech_subset.tfrecord', 0, 1)
y = [1 if i== True else 0 for i in y]

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42, stratify=y)

In [67]:
clf = LinearSVC(random_state=0, tol=1e-5)
clf.fit(X_train, y_train)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0)

In [68]:
pred = clf.predict(X_test)

In [69]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.88      0.90      0.89      1899
           1       0.89      0.87      0.88      1899

   micro avg       0.88      0.88      0.88      3798
   macro avg       0.88      0.88      0.88      3798
weighted avg       0.88      0.88      0.88      3798



In [76]:
svc_eval = clf.predict(X_eval)
print(classification_report(y_eval, svc_eval))

              precision    recall  f1-score   support

       False       0.84      0.89      0.86       120
        True       0.88      0.82      0.85       120

   micro avg       0.86      0.86      0.86       240
   macro avg       0.86      0.86      0.86       240
weighted avg       0.86      0.86      0.86       240



## Trying out CatBoost

In [32]:
import catboost as cab

In [57]:
model = cab.CatBoostClassifier(verbose=300, loss_function='MultiClass', task_type='GPU')
model.fit(X_train, y_train)

0:	learn: -0.6742237	total: 21.9ms	remaining: 21.8s
300:	learn: -0.1193711	total: 4.87s	remaining: 11.3s
600:	learn: -0.0838463	total: 9.62s	remaining: 6.39s
900:	learn: -0.0623176	total: 14.3s	remaining: 1.57s
999:	learn: -0.0569968	total: 15.9s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7fe178b0ab38>

In [58]:
cab_pred = model.predict(X_test)

In [59]:
print(classification_report(y_test, cab_pred))

              precision    recall  f1-score   support

           0       0.97      0.93      0.95      1899
           1       0.93      0.97      0.95      1899

   micro avg       0.95      0.95      0.95      3798
   macro avg       0.95      0.95      0.95      3798
weighted avg       0.95      0.95      0.95      3798



In [71]:
X_eval, y_eval = data_generator(batch_size, 'eval_laugh_speech_subset.tfrecord', 0, 1)

In [75]:
cab_eval = model.predict(X_eval)
print(classification_report(y_eval, cab_eval))

              precision    recall  f1-score   support

       False       0.89      0.95      0.92       120
        True       0.95      0.88      0.91       120

   micro avg       0.92      0.92      0.92       240
   macro avg       0.92      0.92      0.92       240
weighted avg       0.92      0.92      0.92       240

