In [13]:
import os
import numpy as np
import scipy
from scipy.io import wavfile
import scipy.fftpack as fft
from scipy.signal import get_window
import matplotlib.pyplot as plt
import math
import librosa

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
import warnings
from sklearn.exceptions import DataConversionWarning
from sklearn.model_selection import RandomizedSearchCV

lut=pd.read_csv('../resources/dataset/Annotations/final/annotation.csv')

# Pre-Processing Data
- Load Ground Truth from annotation.csv at 44.1kHz
- Load audio files for each song
    1. Average L+R to convert to mono audio
    2. Divide into 5 second windows with 1 second hop\
    3. Label each hop based on whether its midpoint is within the start and end times annotated 

In [24]:
# BLOCK AUDIO
def block_audio(x,blockSize,hopSize,fs):    
    # allocate memory    
    numBlocks = math.ceil(x.size / hopSize)    
    xb = np.zeros([numBlocks, blockSize])    
    # compute time stamps    
    t = (np.arange(0, numBlocks) * hopSize) / fs   
    t_mid = t + (0.5*blockSize/fs)
    x = np.concatenate((x, np.zeros(blockSize)),axis=0)    
    for n in range(0, numBlocks):        
        i_start = n * hopSize        
        i_stop = np.min([x.size - 1, i_start + blockSize - 1])        
        xb[n][np.arange(0,blockSize)] = x[np.arange(i_start, i_stop + 1)]    
    return (xb,t,t_mid)


## Labelling the blocks based on ground truth

In [None]:
lut=pd.read_csv('../resources/dataset/Annotations/final/annotation.csv')
i=0
for video_id in lut['video_id'].unique():
    x,sr = librosa.load('../resources/dataset/Audio/processed/'+video_id+'.wav',sr=44100,mono=True)
    blockSize = int(sr * 1)
    hopSize = int(sr * 0.5)

    xb,t,t_mid = block_audio(x,blockSize,hopSize,sr)
    labels=[]
    for ts in t_mid:
        for idx,row in lut[lut['video_id'] == video_id].reset_index().iterrows():
            annotated_start = row['timestamp_start']
            annotated_end = row['timestamp_end']
            if annotated_start <= ts <= annotated_end:
                labels.append(row['scream_type'])
                break
        if ~(annotated_start <= ts <= annotated_end):
            labels.append('no_vocals')
    # Create new dataframs
    if i == 0:
        df=pd.DataFrame()

        df['t'] = t
        df['t_mid'] = t_mid
        blocks=[]
        for blk in xb:
            blocks.append(blk)
        df.insert(0,'video_id',video_id)
        df.insert(3,'label',labels)
        df['xb'] = blocks
        i+=1
    else:
        tmp=pd.DataFrame()
        tmp['t'] = t
        tmp['t_mid'] = t_mid
        blocks=[]
        for blk in xb:
            blocks.append(blk)
        tmp['video_id'] = video_id
        tmp['label'] = labels
        tmp['xb'] = blocks
        df=df.append(tmp)
out = df.to_numpy()
np.save('../resources/working_data/data.npy', out)
    #df.to_csv('./resources/working_data/'+video_id+'.csv',header=True, index=False,encoding='utf-8-sig',sep='\t')


In [4]:
df=pd.DataFrame(np.load('../resources/working_data/data.npy',allow_pickle = True),columns=['video_id','start_time','mid_ts','label','audio'])

In [5]:
df

Unnamed: 0,video_id,start_time,mid_ts,label,audio
0,4600fGWcn9o,0.0,0.5,no_vocals,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,4600fGWcn9o,0.5,1.0,no_vocals,"[0.6896209716796875, 0.7178192138671875, 0.738..."
2,4600fGWcn9o,1.0,1.5,no_vocals,"[0.10858154296875, 0.0853118896484375, 0.02359..."
3,4600fGWcn9o,1.5,2.0,no_vocals,"[0.0962982177734375, 0.0923919677734375, 0.083..."
4,4600fGWcn9o,2.0,2.5,no_vocals,"[0.5100250244140625, 0.488677978515625, 0.4525..."
...,...,...,...,...,...
33815,0m5fIHHfJTw,217.5,218.0,no_vocals,"[0.5259857177734375, 0.569580078125, 0.6193847..."
33816,0m5fIHHfJTw,218.0,218.5,no_vocals,"[-0.0138702392578125, -0.013885498046875, -0.0..."
33817,0m5fIHHfJTw,218.5,219.0,no_vocals,"[0.0004730224609375, 0.0006256103515625, 0.000..."
33818,0m5fIHHfJTw,219.0,219.5,no_vocals,"[0.0001983642578125, 0.000213623046875, 0.0002..."


# Extract Features
## 13 delta_mfccs, ZCR, Spectral Crest, Spectral Centroid
- Normalize the features across the entire dataset
- Extract mean, std dev of the feature value per block 
- Calculate change in feature from one block to another


### Bandpass Filtering

In [7]:
from scipy.signal import butter, lfilter

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [22]:
# video_id = '-xYZM04JxnQ'
# x,fs = librosa.load('./resources/dataset/Audio/processed/'+video_id+'.wav',sr=44100,mono=True)#scipy.io.wavfile.read('./resources/dataset/Audio/processed/'+file_name)
# y=butter_bandpass_filter(x,200,4500,fs)
# sf.write('./resources/working_data/'+video_id+'_bandpass.wav', y, fs)
# sf.write('./resources/working_data/'+video_id+'_original.wav', x, fs)

In [9]:
def agg_mfccs(x):
    mfccs = librosa.feature.mfcc(x,n_mfcc = 13)
    mean = [np.mean(feature) for feature in mfccs]
    std = [np.std(feature) for feature in mfccs]
    mfcc_delta = librosa.feature.delta(mfccs)
    delta_mean=[np.mean(feature) for feature in mfcc_delta]
    delta_std=[np.std(feature) for feature in mfcc_delta]
    return mean,std,delta_mean,delta_std

In [27]:
def extract_features(x,fs):
    x_filtered = butter_bandpass_filter(x,200,4500,fs)
    #MFCCs
    mfcc_mean,mfcc_std,delta_mfcc_mean,delta_mfcc_std = agg_mfccs(x_filtered)
    #ZCR
    zcr=librosa.feature.zero_crossing_rate(x_filtered)
    return mfcc_mean,mfcc_std,delta_mfcc_mean,delta_mfcc_std,zcr

Aggregating features blockwise (mean and std dev)

In [10]:
df['zcr'] = ''
df['average_zcr'] = ''
df['zcr_stddev'] = ''

#df['mfccs'] = ''
df['mfcc_mean'] = ''
df['mfcc_std'] = ''

df['delta_mfcc_mean'] = ''
df['delta_mfcc_std'] = ''

#Row wise (block wise) aggregation of features by mean and std dev
for i in range(len(df)):
    audio = butter_bandpass_filter(df['audio'][i],200,4500,44100)
    # Calculate ZCR (3 features)
    zcr=librosa.feature.zero_crossing_rate(audio)
    df['zcr'][i] = zcr
    df['average_zcr'][i] = np.mean(zcr)
    df['zcr_stddev'][i] = np.std(zcr)

    # Extract 13 MFCCs - get mean and std deviation for each (26 features) + Delta MFCCs (26 features) = total 52 Features
    mean,std,delta_mean,delta_std = agg_mfccs(audio)
    #df['mfccs'][i] = mfccs[0]
    df['mfcc_mean'][i] = mean
    df['mfcc_std'][i] = std

    df['delta_mfcc_mean'][i] = delta_mean
    df['delta_mfcc_std'][i] = delta_std


## Normalize features
This normalizes over entire dataset

### TODO: Normalize over training set (get training set mean and std dev for every feature, and apply these to z-score normalization for both training and test)

In [15]:
df[['mfcc1_mean','mfcc2_mean','mfcc3_mean','mfcc4_mean','mfcc5_mean','mfcc6_mean','mfcc7_mean','mfcc8_mean','mfcc9_mean','mfcc10_mean','mfcc11_mean','mfcc12_mean','mfcc13_mean']]=pd.DataFrame(df.mfcc_mean.tolist(), index= df.index)
df[['mfcc1_std','mfcc2_std','mfcc3_std','mfcc4_std','mfcc5_std','mfcc6_std','mfcc7_std','mfcc8_std','mfcc9_std','mfcc10_std','mfcc11_std','mfcc12_std','mfcc13_std']]=pd.DataFrame(df.mfcc_std.tolist(), index= df.index)

df[['delta_mfcc1_mean','delta_mfcc2_mean','delta_mfcc3_mean','delta_mfcc4_mean','delta_mfcc5_mean','delta_mfcc6_mean','delta_mfcc7_mean','delta_mfcc8_mean','delta_mfcc9_mean','delta_mfcc10_mean','delta_mfcc11_mean','delta_mfcc12_mean','delta_mfcc13_mean']]=pd.DataFrame(df.delta_mfcc_mean.tolist(), index= df.index)
df[['delta_mfcc1_std','delta_mfcc2_std','delta_mfcc3_std','delta_mfcc4_std','delta_mfcc5_std','delta_mfcc6_std','delta_mfcc7_std','delta_mfcc8_std','delta_mfcc9_std','delta_mfcc10_std','delta_mfcc11_std','delta_mfcc12_std','delta_mfcc13_std']]=pd.DataFrame(df.delta_mfcc_std.tolist(), index= df.index)

In [16]:
df.columns

Index(['video_id', 'start_time', 'mid_ts', 'label', 'audio', 'zcr',
       'average_zcr', 'zcr_stddev', 'mfcc_mean', 'mfcc_std', 'delta_mfcc_mean',
       'delta_mfcc_std', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta

In [17]:
selected_cols=['video_id', 'start_time', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']
np.save('../resources/working_data/bandpassed_features.npy', df[selected_cols].to_numpy())

# Classify!

In [5]:
cols=['video_id', 'ts', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']

In [6]:
d=np.load('../resources/working_data/bandpassed_features.npy',allow_pickle=True)
df = pd.DataFrame(d,columns=cols)

lut = pd.read_csv('../resources/dataset/lookup_new.csv')

## Undersampling data to even out class distribution

In [7]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy='not minority',random_state=0)
from collections import Counter
X = df[cols].to_numpy()
y=df[['label']].to_numpy()

X_under, y_under = undersample.fit_resample(X, y)

undersampled_data = pd.DataFrame(X_under,columns=cols)
undersampled_data['label'] = y_under
undersampled_data

Unnamed: 0,video_id,ts,mid_ts,label,average_zcr,zcr_stddev,mfcc1_mean,mfcc2_mean,mfcc3_mean,mfcc4_mean,...,delta_mfcc4_std,delta_mfcc5_std,delta_mfcc6_std,delta_mfcc7_std,delta_mfcc8_std,delta_mfcc9_std,delta_mfcc10_std,delta_mfcc11_std,delta_mfcc12_std,delta_mfcc13_std
0,-2WqQY_xSSM,170.0,170.5,clean,0.073893,0.011337,-152.753272,284.981878,-93.477,-15.155162,...,1.900268,0.939742,1.367665,1.287651,1.33179,1.429769,0.690348,0.810736,1.136817,0.744197
1,FNdC_3LR2AI,219.0,219.5,clean,0.051573,0.008637,-162.772371,272.799532,-70.236482,-4.821217,...,1.383114,0.733027,0.876068,0.761469,0.604008,0.569817,0.611702,0.751562,0.580362,0.539497
2,4600fGWcn9o,280.5,281.0,clean,0.052398,0.007181,-153.846348,280.376,-64.06919,-30.836762,...,0.680709,0.492429,0.534133,0.647866,0.528231,0.533929,0.503313,0.601414,0.394102,0.51978
3,get0cXOsSXg,80.0,80.5,clean,0.051926,0.006304,-152.557575,254.43004,-70.693706,-15.705866,...,1.84092,0.788524,0.785359,0.788353,0.698425,0.706137,0.675212,0.774093,1.115977,0.874281
4,74nTzbgDGWM,121.5,122.0,clean,0.082823,0.01458,-131.354576,259.734565,-92.604535,-16.989386,...,1.184945,1.060576,0.892398,0.776234,0.806224,0.860167,0.78622,0.958681,0.807963,0.783989
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,B7iIS91fMAc,117.0,117.5,no_vocals,0.076475,0.008886,-89.760135,271.705597,-92.606483,-19.669077,...,0.857525,1.018744,0.685387,0.955924,0.460226,0.653125,0.695206,0.671385,0.634551,0.587681
3326,ainbICPRV8Y,25.5,26.0,no_vocals,0.029875,0.012476,-263.921251,244.006279,-20.215523,-23.054255,...,1.22011,1.094766,1.453475,0.898146,0.902559,0.988358,0.642122,0.90724,0.943337,0.707029
3327,Bh_5ofa__pY,192.5,193.0,no_vocals,0.043923,0.015109,-288.004774,235.199194,-23.476881,-16.760224,...,0.877283,0.617634,0.690396,0.544516,0.725523,0.677132,0.944769,0.813373,0.628507,0.563559
3328,C_voh9WFbsM,193.0,193.5,no_vocals,0.053992,0.008646,-280.271697,291.041747,-57.55072,-39.034453,...,1.293991,1.134028,0.969015,1.280277,0.638989,0.674139,0.683723,0.737367,0.659187,0.702026


## Plot scatter plot of features

In [5]:
import plotly.express as px
fig = px.scatter_matrix(undersampled_data[['label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']],dimensions=["average_zcr", "zcr_stddev", "mfcc1_mean", "delta_mfcc1_mean"],color='label')
fig.show()

## Train-test split

In [8]:
from sklearn.model_selection import GroupShuffleSplit
train_inds, test_inds = next(GroupShuffleSplit(test_size=.2, n_splits=2, random_state = 0).split(lut, groups=lut['band_name']))

train = lut.iloc[train_inds]
test = lut.iloc[test_inds]

train_ids = train['video_id'].to_numpy()
test_ids = test['video_id'].to_numpy()

#df_final = df
df_final = undersampled_data
train = df_final[df_final.video_id.isin(train_ids)]
test = df_final[df_final.video_id.isin(test_ids)]

## Normalizing features

In [9]:
cols = ['average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']

for col in cols:
    #df[col] = df[col]/max(np.abs(df[col]))
    df[col] = (df[col] - np.mean(df[col]))/np.std(df[col]) # z-score normalization

## Convert df to numpy array for input to classifier

In [10]:
X_train = train[['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean','mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean','mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std','delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', 'delta_mfcc13_std']].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[['average_zcr','zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean','mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean','mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std','delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', 'delta_mfcc13_std']].to_numpy()
y_test = test[['label']].to_numpy()

# kNN Classifier

## Classification for bandwidth limited input

In [8]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train)
KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[16 16  4  5 41 20]
 [30 18 21  3 37  4]
 [69 11 49 11 62 29]
 [56 58 32 18 75 29]
 [35 28 30  9 47  4]
 [25 19 12 33 25 69]]
0.2261862680226074



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



# SVM

## SVM for bandwidth limited input

In [15]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[ 34   2   3   2  18  43]
 [ 16  47   3   4  39   4]
 [ 62  30  62   4  63  10]
 [ 48  49   8  14 105  44]
 [ 29  38  10   5  65   6]
 [ 21  15   1   8  10 128]]
0.3789904740769749


# Random Forest

In [17]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score

RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[ 25   0   2   2  10  63]
 [ 29  44  13   0  24   3]
 [ 98  34  47   2  34  16]
 [ 76  86   4   4  63  35]
 [ 22  36  14   3  77   1]
 [ 25  14   0   5  13 126]]
0.33313771662671937


# 3 Class Problem

## 3 class KNN

In [16]:
y_train_3class = []
y_test_3class = []
for i in range(len(y_train)):
    if y_train[i] in ['midfry','lowfry','highfry','layered']:
        y_train_3class.append('scream')
    elif y_train[i] == 'clean':
        y_train_3class.append('sing')
    else:
        y_train_3class.append('no vocal')

for i in range(len(y_test)):
    if y_test[i] in ['midfry','lowfry','highfry','layered']:
        y_test_3class.append('scream')
    elif y_test[i] == 'clean':
        y_test_3class.append('sing')
    else:
        y_test_3class.append('no vocal')

y_train_3class = np.array(y_train_3class)
y_test_3class = np.array(y_test_3class)

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
k=5
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train_3class)
KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test_3class, KNN_prediction, average='macro')
cm = confusion_matrix(y_test_3class, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[ 79  90  14]
 [ 66 607  92]
 [ 19  76   7]]
0.44296882787829817


### KNN Classify 3 class problem

## 3 class SVM

In [13]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
SVM_model = make_pipeline(StandardScaler(),\
    SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None)\
        )
SVM_model.fit(X_train, y_train_3class)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test_3class, SVM_prediction, average='macro')
cm = confusion_matrix(y_test_3class, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[113  64   6]
 [ 40 677  48]
 [ 29  65   8]]
0.5299539170506913


## 3 class RF

# WIP- Optimization

In [14]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()




print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")
k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train)

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
SVM_model.fit(X_train, y_train)

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=90,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[16 16  4  5 41 20]
 [30 18 21  3 37  4]
 [69 11 49 11 62 29]
 [56 58 32 18 75 29]
 [35 28 30  9 47  4]
 [25 19 12 33 25 69]]
0.2261862680226074
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 34   2   3   2  18  43]
 [ 16  47   3   4  39   4]
 [ 62  30  62   4  63  10]
 [ 48  49   8  14 105  44]
 [ 29  38  10   5  65   6]
 [ 21  15   1   8  10 128]]
0.3789904740769749
-------------------------------------------------------------------------------
            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight='bal