In [1]:
#@title Install Software

!git clone https://github.com/jrgillick/laughter-detection.git

%cd laughter-detection/
!pip install tgt
!pip install pyloudnorm
!pip install praatio==3.8.0
!pip install tensorboardX==1.9

from google.colab import files

Cloning into 'laughter-detection'...
remote: Enumerating objects: 432, done.[K
remote: Counting objects: 100% (208/208), done.[K
remote: Compressing objects: 100% (108/108), done.[K
remote: Total 432 (delta 120), reused 153 (delta 91), pack-reused 224[K
Receiving objects: 100% (432/432), 85.84 MiB | 27.80 MiB/s, done.
Resolving deltas: 100% (228/228), done.
/content/laughter-detection
Collecting tgt
  Downloading tgt-1.4.4.tar.gz (21 kB)
Building wheels for collected packages: tgt
  Building wheel for tgt (setup.py) ... [?25l[?25hdone
  Created wheel for tgt: filename=tgt-1.4.4-py3-none-any.whl size=28929 sha256=536363d7c3448fad68502b2d0fce03c329d649b3739187c7443fef85144e3cff
  Stored in directory: /root/.cache/pip/wheels/23/26/00/05f728381a2620ac79029acb7eb117631a8d1046d0c603ab5e
Successfully built tgt
Installing collected packages: tgt
Successfully installed tgt-1.4.4
Collecting pyloudnorm
  Downloading pyloudnorm-0.1.0-py3-none-any.whl (9.3 kB)
Installing collected packages: p

# **GPU is not enabled by default**

In order to load the model without errors:

Go to Runtime > Change runtime type > Hardware accelerator > GPU

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#@title Setup and Load Model

import os, sys, pickle, time, librosa, argparse, torch, numpy as np, pandas as pd, scipy
from tqdm import tqdm
import tgt
sys.path.append('./utils/')
import laugh_segmenter
import models, configs
import dataset_utils, audio_utils, data_loaders, torch_utils
from tqdm import tqdm
from torch import optim, nn
from functools import partial
from distutils.util import strtobool

sample_rate = 8000

model_path = 'checkpoints/in_use/resnet_with_augmentation'
config = configs.CONFIG_MAP['resnet_with_augmentation']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device {device}")

##### Load the Model

model = config['model'](dropout_rate=0.0, linear_layer_size=config['linear_layer_size'], filter_sizes=config['filter_sizes'])
feature_fn = config['feature_fn']
model.set_device(device)

if os.path.exists(model_path):
    torch_utils.load_checkpoint(model_path+'/best.pth.tar', model)
    model.eval()
else:
    raise Exception(f"Model checkpoint not found at {model_path}")

Using device cuda
training with dropout=0.0
Loading checkpoint at: checkpoints/in_use/resnet_with_augmentation/best.pth.tar
Loading checkpoint at step:  60600


In [10]:
audio_path = r"/content/drive/MyDrive/Depression_Detection/Without_Slicing/Depressed_People_Audio/AI_108_result.mp3"


In [11]:
#@title Choose settings and run Laughter Detection

#@markdown This will take a minute

threshold = 0.5 #@param {type:"slider", min:0.1, max:1.0, step:0.1}
min_length = 0.2 #@param {type:"slider", min:0.1, max:1.0, step:0.1}
save_to_audio_files = True #@param {type:"boolean"}
save_to_textgrid = False #@param {type:"boolean"}
output_dir = 'laughter_detection_output' #@param {type:"string"}


##### Load the audio file and features
    
inference_dataset = data_loaders.SwitchBoardLaughterInferenceDataset(
    audio_path=audio_path, feature_fn=feature_fn, sr=sample_rate)

collate_fn=partial(audio_utils.pad_sequences_with_labels,
                        expand_channel_dim=config['expand_channel_dim'])

inference_generator = torch.utils.data.DataLoader(
    inference_dataset, num_workers=4, batch_size=8, shuffle=False, collate_fn=collate_fn)


##### Make Predictions

probs = []
for model_inputs, _ in tqdm(inference_generator):
    x = torch.from_numpy(model_inputs).float().to(device)
    preds = model(x).cpu().detach().numpy().squeeze()
    if len(preds.shape)==0:
        preds = [float(preds)]
    else:
        preds = list(preds)
    probs += preds
probs = np.array(probs)

file_length = audio_utils.get_audio_length(audio_path)

fps = len(probs)/float(file_length)

probs = laugh_segmenter.lowpass(probs)
instances = laugh_segmenter.get_laughter_instances(probs, threshold=threshold, min_length=float(min_length), fps=fps)

print(); print("found %d laughs." % (len (instances)))

if len(instances) > 0:
    full_res_y, full_res_sr = librosa.load(audio_path,sr=44100)
    wav_paths = []
    maxv = np.iinfo(np.int16).max
    
    if save_to_audio_files:
        if output_dir is None:
            raise Exception("Need to specify an output directory to save audio files")
        else:
            os.system(f"mkdir -p {output_dir}")
            for index, instance in enumerate(instances):
                laughs = laugh_segmenter.cut_laughter_segments([instance],full_res_y,full_res_sr)
                wav_path = output_dir + "/laugh_" + str(index) + ".wav"
                scipy.io.wavfile.write(wav_path, full_res_sr, (laughs * maxv).astype(np.int16))
                wav_paths.append(wav_path)
            print(laugh_segmenter.format_outputs(instances, wav_paths))
    
    if save_to_textgrid:
        laughs = [{'start': i[0], 'end': i[1]} for i in instances]
        tg = tgt.TextGrid()
        laughs_tier = tgt.IntervalTier(name='laughter', objects=[
        tgt.Interval(l['start'], l['end'], 'laugh') for l in laughs])
        tg.add_tier(laughs_tier)
        fname = os.path.splitext(os.path.basename(audio_path))[0]
        tgt.write_to_file(tg, os.path.join(output_dir, fname + '_laughter.TextGrid'))

        print('Saved laughter segments in {}'.format(
            os.path.join(output_dir, fname + '_laughter.TextGrid')))

100%|██████████| 8472/8472 [06:45<00:00, 20.89it/s]



found 1 laughs.
[{'filename': 'laughter_detection_output/laugh_0.wav', 'start': 1111.5228909973587, 'end': 1112.1742920804497}]


In [12]:
#@title Listen to the detected laughter

import IPython
from IPython.display import Audio
detected_laughs = librosa.util.find_files(output_dir)
for laugh in detected_laughs:
  IPython.display.display(Audio(laugh))