<img src="./slides/Slide1.JPG">

<img src="./slides/Slide2.JPG">

# Data Processing

In [3]:
# importing modules
import pandas as pd
import numpy as np
import librosa
import sklearn

In [2]:
# function to import csv files and converting them to pandas dataframe
def csvtodf(path):
    df = pd.read_csv(path,sep="\t",names=['file','start','stop','label'],skip_blank_lines=False)
    return df

In [3]:
df1=csvtodf("./train/csv/event_list_devtrain_babycry.csv") # importing csv for babycry
df2=csvtodf("./train/csv/event_list_devtrain_glassbreak.csv") # importing csv for glassbreak
df3=csvtodf("./train/csv/event_list_devtrain_gunshot.csv") # importing csv for gunshot
df1.head()

Unnamed: 0,file,start,stop,label
0,mixture_devtrain_babycry_000_07a75692b15446e9f...,20.463606,23.503606,babycry
1,mixture_devtrain_babycry_001_6c193b823c29f4ee3...,20.757498,22.897498,babycry
2,mixture_devtrain_babycry_002_4f125ae66e5687404...,,,
3,mixture_devtrain_babycry_003_2c5dc0c186ac0a771...,,,
4,mixture_devtrain_babycry_004_2e748c922c0762182...,,,


In [4]:
# function to change "NaN" label to "none"
def cleaner(df):
    for i in range(len(df)):
        if np.isnan(df.iloc[i,1]):
            df.iloc[i,3]="none"

In [5]:
cleaner(df1)
cleaner(df2)
cleaner(df3)

In [6]:
# merging all the dataframes into one
df=pd.concat([df1,df2,df3],ignore_index=True)
del df1,df2,df3
df

Unnamed: 0,file,start,stop,label
0,mixture_devtrain_babycry_000_07a75692b15446e9f...,20.463606,23.503606,babycry
1,mixture_devtrain_babycry_001_6c193b823c29f4ee3...,20.757498,22.897498,babycry
2,mixture_devtrain_babycry_002_4f125ae66e5687404...,,,none
3,mixture_devtrain_babycry_003_2c5dc0c186ac0a771...,,,none
4,mixture_devtrain_babycry_004_2e748c922c0762182...,,,none
5,mixture_devtrain_babycry_005_02c94942f42f96cdc...,,,none
6,mixture_devtrain_babycry_006_a9196cb1f180ff3c9...,,,none
7,mixture_devtrain_babycry_007_63be1137309c3f059...,,,none
8,mixture_devtrain_babycry_008_1c6b876d43bd46e08...,28.369433,29.389433,babycry
9,mixture_devtrain_babycry_009_fe63017a3f5302e60...,26.828925,28.708925,babycry


In [7]:
# multithreaded operation to process audio files and convert it into numpy array
%%time
import threading
l1=[]
l2=[]
l3=[]
l4=[]
l5=[]
l6=[]
l7=[]
l8=[]
def arraymaker(a,b,l):
    for i in range(a,b):
        sound,rate=librosa.load("./train/trainset/"+df.iloc[i,0])
        l.append(sound)
A=int(len(df)/8)
t1=threading.Thread(target=arraymaker,args=(0,A,l1))
t2=threading.Thread(target=arraymaker,args=(A,A+A,l2))
t3=threading.Thread(target=arraymaker,args=(A+A,A+A+A,l3))
t4=threading.Thread(target=arraymaker,args=(A+A+A,A+A+A+A,l4))
t5=threading.Thread(target=arraymaker,args=(A+A+A+A,A+A+A+A+A,l5))
t6=threading.Thread(target=arraymaker,args=(A+A+A+A+A,A+A+A+A+A+A,l6))
t7=threading.Thread(target=arraymaker,args=(A+A+A+A+A+A,A+A+A+A+A+A+A,l7))
t8=threading.Thread(target=arraymaker,args=(A+A+A+A+A+A+A,len(df),l8))

t1.start()
t2.start()
t3.start()
t4.start()
t5.start()
t6.start()
t7.start()
t8.start()

t1.join()
t2.join()
t3.join()
t4.join()
t5.join()
t6.join()
t7.join()
t8.join()

print("done")

done
CPU times: user 42min 13s, sys: 2min 40s, total: 44min 54s
Wall time: 8min 15s


In [8]:
l=l1+l2+l3+l4+l5+l6+l7+l8
df=df.assign(music=l)
df=df.infer_objects()
df=df.drop(columns="file")
df.head()


Unnamed: 0,start,stop,label,music
0,20.463606,23.503606,babycry,"[0.001950416, 0.0038487082, 0.0039996826, 0.00..."
1,20.757498,22.897498,babycry,"[0.0015649316, 0.0023856005, 0.0022190004, 0.0..."
2,,,none,"[-0.00021317093, -0.0003537956, -0.00051814277..."
3,,,none,"[-0.0096916035, -0.014332312, -0.013025637, -0..."
4,,,none,"[-7.560667e-05, -2.443353e-05, 1.2316083e-05, ..."


# Feature Extraction
 <ul>
    <li> making mfcc from the processed audio files</li>
    <li> storing mfcc in a pickle file to train or test the model</li>
 </ul>

In [9]:
# multithreaded operation to convert audio files to mfcc
%%time
rate=22050
l1=[]
l2=[]
l3=[]
l4=[]
l5=[]
l6=[]
l7=[]
l8=[]
def fun(a,b,l):
    for i in range(a,b):
        mfccs=librosa.feature.mfcc(y=df.iloc[i,3],sr=rate,n_mfcc=40)
        mfccscaled=sklearn.preprocessing.scale(mfccs,axis=1)
        l.append(mfccscaled)

A=int(len(df)/8)
t1=threading.Thread(target=fun,args=(0,A,l1))
t2=threading.Thread(target=fun,args=(A,A+A,l2))
t3=threading.Thread(target=fun,args=(A+A,A+A+A,l3))
t4=threading.Thread(target=fun,args=(A+A+A,A+A+A+A,l4))
t5=threading.Thread(target=fun,args=(A+A+A+A,A+A+A+A+A,l5))
t6=threading.Thread(target=fun,args=(A+A+A+A+A,A+A+A+A+A+A,l6))
t7=threading.Thread(target=fun,args=(A+A+A+A+A+A,A+A+A+A+A+A+A,l7))
t8=threading.Thread(target=fun,args=(A+A+A+A+A+A+A,len(df),l8))

t1.start()
t2.start()
t3.start()
t4.start()
t5.start()
t6.start()
t7.start()
t8.start()

t1.join()
t2.join()
t3.join()
t4.join()
t5.join()
t6.join()
t7.join()
t8.join()

print("done")

l=l1+l2+l3+l4+l5+l6+l7+l8
print(df.dtypes)
print(type(df.iloc[0,3]))
df=df.assign(music=l)
df=df.infer_objects()
df.head()
df.to_pickle("./train/featurised.csv") # storing mfcc in a pickle file that will be used in model training
df.head()

done
start    float64
stop     float64
label     object
music     object
dtype: object
<class 'numpy.ndarray'>
CPU times: user 16min 46s, sys: 7.9 s, total: 16min 54s
Wall time: 2min 46s


Unnamed: 0,start,stop,label,music
0,20.463606,23.503606,babycry,"[[0.7639337425550246, 0.6550242365052964, 0.42..."
1,20.757498,22.897498,babycry,"[[-0.543597299526563, -0.9428291340167716, -1...."
2,,,none,"[[-0.1903899039644559, 0.1827240966906672, 0.2..."
3,,,none,"[[1.5529618435211259, 1.3576098636501042, 1.47..."
4,,,none,"[[1.3362740413911116, 1.4712855405769114, 1.28..."


In [4]:
%matplotlib notebook
import matplotlib.pyplot as plt
import librosa.display
import sklearn

## Amplitude vs Time plot for a audio file

In [5]:
y,rate=librosa.load("./train/trainset/mixture_devtrain_babycry_000_07a75692b15446e9fbf6cc3afaf96097.wav")
plt.figure(figsize=(10,5))
librosa.display.waveplot(y)

<IPython.core.display.Javascript object>

<matplotlib.collections.PolyCollection at 0x7f96dd29dcc0>

## unscaled MFCC for the corresponding audio file

In [6]:
plt.figure(figsize=(10,5))
librosa.display.specshow(librosa.feature.mfcc(y=y,sr=rate,n_mfcc=40), sr=rate, x_axis='time')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f96dd291c50>

## scaled MFCC for corresponding audio file

In [13]:
plt.figure(figsize=(10,5))
librosa.display.specshow(sklearn.preprocessing.scale(librosa.feature.mfcc(y=y,sr=rate,n_mfcc=40),axis=1), sr=rate, x_axis='time')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7fc6bb353e10>