# Audio Preprocessing

Taking in the raw audio files for 20 episodes, processing and setting equal to X. Then taking the "labels" for laughter that I hand-made for each episode, and setting them equal to y. 

In [None]:
!pip install pydub

In [None]:
from pydub import AudioSegment
import os
from scipy.io import wavfile
import matplotlib.pyplot as plt
import operator
import numpy as np
from sklearn.model_selection import train_test_split
import shutil

### Checking kaggle notebook folder for input data

In [None]:
print(os.listdir("../input/"))

### Deciding how long each clip should be, how many timesteps it should be split into, and how much overlap each clip should have. 

In [None]:
# predtimesteps is dictated by model
predtimesteps = 847
cliplen = 10000
lag = 0 # this will dictate overlap necessary
overlap = round(((cliplen / predtimesteps) * lag) + 1) # rounding up to nearest ms here
print("overlap in ms is " + str(overlap))
print("length of each timestep in ms is " + str(cliplen / predtimesteps))

### Defining folders

In [None]:
audiofolder = '/kaggle/input/labeledepisodes/'
laughlabelsfolder = '/kaggle/input/laughlabels/'
clipsfolder = '/kaggle/working/clips/'

### Creating 10-sec wav clips out of episodes

In [None]:
if os.path.exists(clipsfolder):
    # deletes clips folder and all clips inside
    shutil.rmtree(clipsfolder)
    # makes new empty clips folder
    os.makedirs(clipsfolder)
else:
    os.makedirs(clipsfolder)

for filename in os.listdir(audiofolder):
    if not filename.startswith('.'):
        season = filename[9:11]
        episode = filename[12:14]
        filepath = audiofolder + filename
        file = AudioSegment.from_file(filepath)
#         print("Length of file is " + str(len(file)))

        startcut = 0
        endcut = startcut + cliplen
        count = 1

        while startcut < len(file):
            #create clip here
            clip = file[startcut:endcut]
            if len(clip) < cliplen:
                break
            else:
                clip.export(clipsfolder + "s" + str(season) + "e" + str(episode) + "n" + str(count) + "beg" + str(startcut) + "end" + str(endcut) + ".wav", format="wav")



#             print("clip num is " + str(count))
#             print("clip start is " + str(startcut))
#             print("clip end is " + str(endcut))
#             print("clip len is " + str(len(clip)))
            #increment values for next clip
            startcut += (cliplen - overlap)
            endcut = min(startcut + cliplen, len(file))
            count += 1

### Sorting all clips by episode and beginning timestep

In [None]:
listtosort = []

for filename in os.listdir(clipsfolder):
    if not filename.startswith('.'):
        season = int(filename[1:3])
        episode = int(filename[4:6])
        findbegstart = filename.find("beg")
        findendstart = filename.find("end")
        findendend = filename.find(".")
        begtimestart = findbegstart + 3
        endtimestart = findendstart + 3
        begtime = int(filename[begtimestart:findendstart])
        endtime = int(filename[endtimestart:findendend])
        listtosort.append([season, episode, begtime, endtime, filename])


sortedclips = sorted(listtosort, key = operator.itemgetter(0, 1, 2))
# print(sortedclips)

### Preprocessing X

Turning 10-second clip into spectrogram data. Splitting clip into defined number of timesteps and frequencies. Getting each clip into the right shape to be fed into the model. 

In [None]:
# Creating X numpy array of shape (number of clips, number of frequencies, number of spectrogram timesteps)

count = 0

for info in sortedclips:
    clipfilename = info[4]
    clipfilepath = clipsfolder + clipfilename
    FS, data = wavfile.read(clipfilepath) # read wav file
    channelcount = data.ndim
#     print("Number of channels is " + str(data.ndim))
    if channelcount == 1:
        pxx, freqs, bins, im = plt.specgram(data, Fs=FS, NFFT=512, noverlap=0)  # building plot and spectrogram
    elif channelcount == 2:
        pxx, freqs, bins, im = plt.specgram(data[:,0], Fs=FS, NFFT=512, noverlap=0)  # building plot and spectrogram
    Tx = pxx.shape[1] # represents number of time steps in spectrogram
    n_freq = pxx.shape[0] # represents number of frequencies in spectrogram
    pxxtransposed = pxx.T
    if count == 40:
        print("file duration is " + str(len(data) / float(FS)))
        print("filename is " + str(clipfilename))
        print("number of channels is " + str(data.ndim)) # 1 channel now because we are merging in stereo
        print("num of time steps in spectrogram is " + str(Tx))
        print("num of frequencies in spectrogram is " + str(n_freq))
        print("shape of pxxtransposed is " + str(pxxtransposed.shape))
        plt.show() # plot the spectrogram
    if count == 0:
        preX = np.zeros((len(sortedclips), Tx, n_freq))
        print("preX zeros shape is " + str(preX.shape))
    os.remove(clipfilepath)
    preX[count] = pxxtransposed
#     spectdata.append(pxxtransposed)
#     np.save(xfolder + str(count) + '.txt', pxxtransposed)
    count += 1




### Preprocessing laugh labels

* Creating dictionary of laugh labels for hand-labeled episodes.

In [None]:
labeldict = {}

for filename in os.listdir(laughlabelsfolder):
    if not filename.startswith('.'):
        season = filename[0:2]
        episode = filename[2:4]
        labeldict[season + episode] = []
        htmlpath = laughlabelsfolder + filename
        with open(htmlpath) as f:
            for line in f:
                if '\\' not in line:
                    # converting to ms
#                     print(season, episode)
#                     print(line)
                    pair = [float(i) * 1000 for i in line.split()]
                    labeldict[season + episode].append(pair)
                else:
                    continue
print(labeldict)

### Creating Y numpy array of shape (number of clips, number of predicted timesteps, 1)

In [None]:
for i, info in enumerate(sortedclips):
    season = info[0]
    dictseason = str(season).zfill(2)
    episode = info[1]
    dictepisode = str(episode).zfill(2)
    begtime = info[2]
    endtime = info[3]
    workinglabels = labeldict[dictseason + dictepisode]
    cliplabels = []
    increment = (endtime - begtime) / predtimesteps
    for count in range(predtimesteps): # range is from 0 to end - 1 for example
        # this skips t-0 as a timestep but includes last possible timestep (t-1000 if there are 1000 timesteps)
        timestep = begtime + increment + (count * increment)
        timesteplabel = 0
        for pair in workinglabels:
            if pair[0] <= timestep <= pair[1]:
                timesteplabel = 1
        cliplabels.append(timesteplabel)
#     print("Length of cliplabels is " + str(len(cliplabels)))
    sortedclips[i].append(cliplabels)
print(sortedclips)

### Add lag to y-values (if any)

Original thought was that some lag between the laughter and the laughter label could help the model learn faster. This didn't seem to be the case so we are not using any lag. 

In [None]:
sortedlagclips = []

for info in sortedclips:
    beforelag = info[5]
    afterlag = beforelag[:-lag or None]
    pos = 0
    value = 0
    for i in range(lag):
        afterlag.insert(pos, value)
    sortedlagclips.append([info[0], info[1], info[2], info[3], info[4], afterlag])
print(sortedlagclips)

### Create y numpy array to be fed to model

In [None]:
preYlist = []

for info in sortedlagclips:
    labels = info[5]
    preYlist.append(labels)
preY = np.stack(preYlist, axis=0)
preY = np.expand_dims(preY, axis=2)
print(preY.shape)

### Saving X and y locally in Kaggle (to be manipulated in the next notebook)

In [None]:
prexfolder = '/kaggle/working/prex/'
preyfolder = '/kaggle/working/prey/'

print("shape of preX is " + str(preX.shape))
print("shape of preY is " + str(preY.shape))

if os.path.exists(clipsfolder):
    # deletes clips folder and all clips inside
    shutil.rmtree(clipsfolder)
if os.path.exists(prexfolder):
    shutil.rmtree(prexfolder)
if os.path.exists(preyfolder):
    shutil.rmtree(preyfolder)

os.makedirs(prexfolder)
os.makedirs(preyfolder)

np.save(prexfolder + 'prex.txt', preX)
np.save(preyfolder + 'prey.txt', preY)
