In [12]:
import time
import librosa
import pandas as pd
from sklearn.externals import joblib
from sklearn.neural_network import MLPClassifier

In [13]:
SHIFT_FACTOR = 1 / 5

In [14]:
def read_meta(display_id):
    meta = pd.read_csv('meta.csv')
    info = meta[meta['display_id'] == display_id]
    if info.empty: return
    
    title = info['title'].values[0]
    views_per_like = info['view_count'].values[0] / info['like_count'].values[0]
    view_count = info['view_count'].values[0] 
    like_count = info['like_count'].values[0]
    duration = info['duration'].values[0]
    
    return (title, round(views_per_like, 2), view_count, like_count, duration)

In [15]:
def get_total_laugh(y_preds, sr):
    total_laugh = 0
    
    shift = int(sr * SHIFT_FACTOR)
    start, stop = 0, shift
    t = 0

    seq = False
    
    for y_pred in y_preds:
        if (not seq) and y_pred == 1:
            seq = True
            start = t
        elif seq and y_pred == 0:
            seq = False
            stop = t + shift
            duration = (stop - start)/sr
            
            if duration >= 1:
                total_laugh += duration
        t += shift
    
    return total_laugh

In [16]:
def to_srt(outfile, y_preds, sr):
    
    subtitles = []
    shift = int(sr * SHIFT_FACTOR)
    start = 0
    stop = 0
    t = 0
    i = 1
    
    is_sequence = False
    
    for y_pred in y_preds:
        if is_sequence:
            if y_pred == 0:
                is_sequence = False
                stop = t + shift
                duration = (stop - start) / sr
                if duration >= 1:
                    st = time.strftime('%H:%M:%S,000', time.gmtime(start/sr))
                    sp = time.strftime('%H:%M:%S,000', time.gmtime(stop/sr))
                    subtitle = "{}\n{} --> {}\n{}\n\n".format(i, st, sp, "[Laugh]")
                    subtitles.append(subtitle)
                    i += 1
        else:
            if y_pred == 1:
                is_sequence = True
                start = t

        t += shift
    
    with open(outfile + '.srt', 'w') as f:
        f.write(''.join(subtitles))

In [17]:
def get_sample_rate(audio_file):
    _, sr = librosa.load(audio_file, sr=None, mono=True)
    return sr

In [18]:
tr = pd.concat([
    pd.read_csv("2aaM63uawjo.csv"),
    pd.read_csv("ao2LuQwi4is.csv")
])

In [19]:
X_train = tr.drop('label', axis=1)
y_train = tr['label']

In [21]:
model = MLPClassifier(hidden_layer_sizes=(900), early_stopping=True, verbose=False)
model.fit(X_train, y_train)
joblib.dump(model, 'model.pkl')

['model.pkl']

In [22]:
test_filenames = [
    'wvzLATU8M04',
    'jaOEY8iRo6c',
    'Q2LDobhGHm4',
    'NcAZdATqkxA',
    'e8mvLIXoIug',
    'WIVVb87JPog',
    'UpX_kurvAXM',
    'IAzZnHwmBfc',
    'EEFInk9wlzs',
]

for filename in test_filenames:
    print("Detecting laughs in '{}'".format(filename))
    X_test = pd.read_csv(filename + '.csv')
    ts_sr = get_sample_rate(filename + '.m4a')    
    y_preds  = model.predict(X_test)
    to_srt(filename, y_preds, ts_sr)
    

Detecting laughs in 'wvzLATU8M04'
Detecting laughs in 'jaOEY8iRo6c'
Detecting laughs in 'Q2LDobhGHm4'
Detecting laughs in 'NcAZdATqkxA'
Detecting laughs in 'e8mvLIXoIug'
Detecting laughs in 'WIVVb87JPog'
Detecting laughs in 'UpX_kurvAXM'
Detecting laughs in 'IAzZnHwmBfc'
Detecting laughs in 'EEFInk9wlzs'
