# Imports

In [55]:
import os
import pandas as pd
import librosa
import numpy as np
import math
import scipy.io.wavfile, scipy.signal
from scipy.spatial import distance
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
#from sklearn.naive_bayes import GaussianNB
#from sklearn.tree import DecisionTreeClassifier
#from sklearn.svm import SVC

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score


# Pre-Processing Data
- Load Ground Truth from annotation.csv at 44.1kHz
- Load audio files for each song
    1. Average L+R to convert to mono audio
    2. Divide into 5 second windows with 1 second hop\
    3. Label each hop based on whether its midpoint is within the start and end times annotated 

In [8]:
# BLOCK AUDIO
def block_audio(x,blockSize,hopSize,fs):    
    # allocate memory    
    numBlocks = math.ceil(x.size / hopSize)    
    xb = np.zeros([numBlocks, blockSize])    
    # compute time stamps    
    t = (np.arange(0, numBlocks) * hopSize) / fs   
    t_mid = t + (0.5*blockSize/fs)
    x = np.concatenate((x, np.zeros(blockSize)),axis=0)    
    for n in range(0, numBlocks):        
        i_start = n * hopSize        
        i_stop = np.min([x.size - 1, i_start + blockSize - 1])        
        xb[n][np.arange(0,blockSize)] = x[np.arange(i_start, i_stop + 1)]    
    return (xb,t,t_mid)


In [9]:
video_id = '_duhhVa-dk8'

In [10]:
x,sr = librosa.load('./resources/dataset/Audio/processed/'+video_id+'.wav',sr=44100,mono=True)#scipy.io.wavfile.read('./resources/dataset/Audio/processed/'+file_name)

In [11]:
blockSize = int(sr * 1)
hopSize = int(sr * 0.5)

xb,t,t_mid = block_audio(x,blockSize,hopSize,sr)

In [12]:
lut=pd.read_csv('./resources/dataset/Annotations/final/annotation.csv')

In [13]:
# Testing logic for labelling data based on ground truth

blocks = t_mid.size
i=0

for ts in t_mid:
    for idx,row in lut[lut['video_id'] == video_id].reset_index().iterrows():
        annotated_start = row['timestamp_start']
        annotated_end = row['timestamp_end']
        if annotated_start <= ts <= annotated_end:
            #print(f"ts - {ts}, start - {annotated_start} and end - {annotated_end}")
            i+=1
            break
    if ~(annotated_start <= ts <= annotated_end):
        i+=1         


Below paragraph takes about 282 s

In [14]:
lut=pd.read_csv('./resources/dataset/Annotations/final/annotation.csv')
i=0
for video_id in lut['video_id'].unique():
    x,sr = librosa.load('./resources/dataset/Audio/processed/'+video_id+'.wav',sr=44100,mono=True)
    blockSize = int(sr * 1)
    hopSize = int(sr * 0.5)

    xb,t,t_mid = block_audio(x,blockSize,hopSize,sr)
    labels=[]
    for ts in t_mid:
        for idx,row in lut[lut['video_id'] == video_id].reset_index().iterrows():
            annotated_start = row['timestamp_start']
            annotated_end = row['timestamp_end']
            if annotated_start <= ts <= annotated_end:
                labels.append(row['scream_type'])
                break
        if ~(annotated_start <= ts <= annotated_end):
            labels.append('no_vocals')
    # Create new dataframs
    if i == 0:
        df=pd.DataFrame()

        df['t'] = t
        df['t_mid'] = t_mid
        blocks=[]
        for blk in xb:
            blocks.append(blk)
        df.insert(0,'video_id',video_id)
        df.insert(3,'label',labels)
        df['xb'] = blocks
        i+=1
    else:
        tmp=pd.DataFrame()
        tmp['t'] = t
        tmp['t_mid'] = t_mid
        blocks=[]
        for blk in xb:
            blocks.append(blk)
        tmp['video_id'] = video_id
        tmp['label'] = labels
        tmp['xb'] = blocks
        df=df.append(tmp)
out = df.to_numpy()
np.save('./resources/working_data/data.npy', out)
    #df.to_csv('./resources/working_data/'+video_id+'.csv',header=True, index=False,encoding='utf-8-sig',sep='\t')


# Extract Features
## 13 delta_mfccs, ZCR, Spectral Crest, Spectral Centroid
- Normalize the features across the entire dataset
- Extract mean, std dev of the feature value per block 
- Calculate change in feature from one block to another


In [59]:
def agg_mfccs(x):
    mfccs = librosa.feature.mfcc(x,n_mfcc = 13)
    mean = [np.mean(feature) for feature in mfccs]
    std = [np.std(feature) for feature in mfccs]
    mfcc_delta = librosa.feature.delta(mfccs)
    delta_mean=[np.mean(feature) for feature in mfcc_delta]
    delta_std=[np.std(feature) for feature in mfcc_delta]
    return mean,std,delta_mean,delta_std

In [65]:
def extract_features(x):
    #MFCCs
    mfcc_mean,mfcc_std,delta_mfcc_mean,delta_mfcc_std = agg_mfccs(x)
    #ZCR
    zcr=librosa.feature.zero_crossing_rate(x)


    
    return mfcc_mean,mfcc_std,delta_mfcc_mean,delta_mfcc_std,zcr

Below paragraph takes about 420 s

In [61]:
df['zcr'] = ''
df['average_zcr'] = ''
df['zcr_stddev'] = ''

#df['mfccs'] = ''
df['mfcc_mean'] = ''
df['mfcc_std'] = ''

df['delta_mfcc_mean'] = ''
df['delta_mfcc_std'] = ''

for i in range(len(df)):
    # Calculate ZCR (3 features)
    zcr=librosa.feature.zero_crossing_rate(df['audio'][i])
    df['zcr'][i] = zcr
    df['average_zcr'][i] = np.mean(zcr)
    df['zcr_stddev'][i] = np.std(zcr)

    # Extract 13 MFCCs - get mean and std deviation for each (26 features) + Delta MFCCs (26 features) = total 52 Features
    mean,std,delta_mean,delta_std = agg_mfccs(df['audio'][i])
    #df['mfccs'][i] = mfccs[0]
    df['mfcc_mean'][i] = mean
    df['mfcc_std'][i] = std

    df['delta_mfcc_mean'][i] = delta_mean
    df['delta_mfcc_std'][i] = delta_std

# Find change in MFCC from one block to another (group by video_id)


In [62]:
df[['mfcc1_mean','mfcc2_mean','mfcc3_mean','mfcc4_mean','mfcc5_mean','mfcc6_mean','mfcc7_mean','mfcc8_mean','mfcc9_mean','mfcc10_mean','mfcc11_mean','mfcc12_mean','mfcc13_mean']]=pd.DataFrame(df.mfcc_mean.tolist(), index= df.index)
df[['mfcc1_std','mfcc2_std','mfcc3_std','mfcc4_std','mfcc5_std','mfcc6_std','mfcc7_std','mfcc8_std','mfcc9_std','mfcc10_std','mfcc11_std','mfcc12_std','mfcc13_std']]=pd.DataFrame(df.mfcc_std.tolist(), index= df.index)

df[['delta_mfcc1_mean','delta_mfcc2_mean','delta_mfcc3_mean','delta_mfcc4_mean','delta_mfcc5_mean','delta_mfcc6_mean','delta_mfcc7_mean','delta_mfcc8_mean','delta_mfcc9_mean','delta_mfcc10_mean','delta_mfcc11_mean','delta_mfcc12_mean','delta_mfcc13_mean']]=pd.DataFrame(df.delta_mfcc_mean.tolist(), index= df.index)
df[['delta_mfcc1_std','delta_mfcc2_std','delta_mfcc3_std','delta_mfcc4_std','delta_mfcc5_std','delta_mfcc6_std','delta_mfcc7_std','delta_mfcc8_std','delta_mfcc9_std','delta_mfcc10_std','delta_mfcc11_std','delta_mfcc12_std','delta_mfcc13_std']]=pd.DataFrame(df.delta_mfcc_std.tolist(), index= df.index)

## Normalize features

In [57]:
cols = ['average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']

for col in cols:
    df[col] = df[col]/max(np.abs(df[col]))

In [63]:
df.columns

Index(['video_id', 'ts', 'mid_ts', 'label', 'audio', 'zcr', 'average_zcr',
       'zcr_stddev', 'mfcc_mean', 'mfcc_std', 'delta_mfcc_mean',
       'delta_mfcc_std', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_s

In [37]:
df

Unnamed: 0,video_id,ts,mid_ts,label,audio,zcr,average_zcr,zcr_stddev,mfcc_mean,mfcc_std,...,delta_mfcc4_std,delta_mfcc5_std,delta_mfcc6_std,delta_mfcc7_std,delta_mfcc8_std,delta_mfcc9_std,delta_mfcc10_std,delta_mfcc11_std,delta_mfcc12_std,delta_mfcc13_std
0,4600fGWcn9o,0.0,0.5,no_vocals,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0.0, 0.0029296875, 0.03466796875, 0.0625, 0....",0.095039,0.11232,"[-129.30442472392588, 120.84185637231003, -42....","[153.86292168258274, 51.389458611989106, 24.00...",...,0.378322,0.362442,0.255812,0.216369,0.197342,0.270354,0.256349,0.17529,0.240528,0.429757
1,4600fGWcn9o,0.5,1.0,no_vocals,"[0.6896209716796875, 0.7178192138671875, 0.738...","[[0.001953125, 0.00732421875, 0.01220703125, 0...",0.030553,0.024637,"[-111.62375869978057, 148.0224260673816, -44.4...","[44.227963654699344, 19.079967098386888, 16.58...",...,0.246632,0.260994,0.231468,0.192004,0.194783,0.273414,0.291844,0.168888,0.195023,0.423875
2,4600fGWcn9o,1.0,1.5,no_vocals,"[0.10858154296875, 0.0853118896484375, 0.02359...","[[0.0087890625, 0.01123046875, 0.013671875, 0....",0.061321,0.086889,"[-90.45160174337661, 151.44191500944592, -48.0...","[59.2113875017919, 21.34803042085795, 17.82077...",...,0.238022,0.369578,0.295717,0.230257,0.211923,0.384199,0.33963,0.172804,0.233265,0.369526
3,4600fGWcn9o,1.5,2.0,no_vocals,"[0.0962982177734375, 0.0923919677734375, 0.083...","[[0.00732421875, 0.01318359375, 0.02734375, 0....",0.103292,0.073054,"[-40.750934579140804, 151.8890810594238, -59.9...","[43.66095542689978, 17.427200032963146, 12.853...",...,0.181022,0.313454,0.240912,0.174939,0.24846,0.355483,0.318322,0.203571,0.27357,0.256327
4,4600fGWcn9o,2.0,2.5,no_vocals,"[0.5100250244140625, 0.488677978515625, 0.4525...","[[0.0146484375, 0.02587890625, 0.03515625, 0.0...",0.110753,0.042473,"[-27.540076624559322, 159.0698455536097, -66.0...","[14.19266859603115, 10.165987966773274, 5.5215...",...,0.172422,0.128003,0.13986,0.124283,0.242181,0.2392,0.353877,0.214575,0.241836,0.275868
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33815,0m5fIHHfJTw,217.5,218.0,no_vocals,"[0.5259857177734375, 0.569580078125, 0.6193847...","[[0.0224609375, 0.0263671875, 0.03369140625, 0...",0.03751,0.042927,"[-379.1829176292383, 100.50875734676428, 21.88...","[138.55693522606055, 54.45983632757572, 34.383...",...,0.169655,0.202067,0.167204,0.159067,0.137605,0.25053,0.204719,0.164546,0.238999,0.218127
33816,0m5fIHHfJTw,218.0,218.5,no_vocals,"[-0.0138702392578125, -0.013885498046875, -0.0...","[[0.001953125, 0.00537109375, 0.005859375, 0.0...",0.058937,0.03881,"[-615.9036073044142, 135.52941009389406, 27.96...","[44.69026342277424, 18.359997856909146, 11.164...",...,0.145635,0.202335,0.160063,0.100675,0.160747,0.22733,0.21804,0.19375,0.208642,0.17897
33817,0m5fIHHfJTw,218.5,219.0,no_vocals,"[0.0004730224609375, 0.0006256103515625, 0.000...","[[0.02783203125, 0.0380859375, 0.0498046875, 0...",0.082163,0.034879,"[-704.9911777841647, 136.4628461787571, 22.106...","[46.577167835073745, 12.020080039354543, 13.29...",...,0.142813,0.196156,0.158167,0.089661,0.140134,0.211731,0.254929,0.206402,0.215636,0.252988
33818,0m5fIHHfJTw,219.0,219.5,no_vocals,"[0.0001983642578125, 0.000213623046875, 0.0002...","[[0.0244140625, 0.03564453125, 0.04736328125, ...",0.086027,0.075293,"[-832.7066016133175, 96.57763219382491, 26.467...","[146.6767357704523, 47.752663648316016, 13.327...",...,0.167977,0.160818,0.121757,0.104552,0.118002,0.185847,0.165261,0.151475,0.184445,0.241599


In [58]:
selected_cols=['video_id', 'ts', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']
np.save('./resources/working_data/data_with_features.npy', df[selected_cols].to_numpy())

In [68]:
df.shape

(33820, 64)

# Classify!

Train the classifier with the X_train and Y_train, and then make predictions based on X_test\
Compare the predictions with Y_test and calculate accuracy score and show the confusion matrix

## Load data with features

In [45]:
cols=['video_id', 'ts', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std']

Below paragraph takes about 16 s

In [2]:
d=np.load('./resources/working_data/data_with_features.npy',allow_pickle=True)
df = pd.DataFrame(d,columns=cols)

lut = pd.read_csv('./resources/dataset/lookup_new.csv')



In [46]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy='not minority',random_state=0)
from collections import Counter
X = df[cols].to_numpy()
y=df[['label']].to_numpy()

X_under, y_under = undersample.fit_resample(X, y)

In [47]:
undersampled_data = pd.DataFrame(X_under,columns=cols)
undersampled_data['label'] = y_under
undersampled_data

Unnamed: 0,video_id,ts,mid_ts,label,average_zcr,zcr_stddev,mfcc1_mean,mfcc2_mean,mfcc3_mean,mfcc4_mean,...,delta_mfcc4_std,delta_mfcc5_std,delta_mfcc6_std,delta_mfcc7_std,delta_mfcc8_std,delta_mfcc9_std,delta_mfcc10_std,delta_mfcc11_std,delta_mfcc12_std,delta_mfcc13_std
0,-2WqQY_xSSM,170.0,170.5,clean,0.118491,0.03782,-0.0296,0.601753,-0.525863,0.507023,...,0.258681,0.199542,0.271011,0.228729,0.405648,0.401938,0.331607,0.211656,0.37926,0.28405
1,FNdC_3LR2AI,219.0,219.5,clean,0.076993,0.033511,-0.042997,0.585207,-0.363184,0.591661,...,0.20641,0.266282,0.220918,0.231036,0.228568,0.236466,0.174915,0.172336,0.193141,0.215727
2,4600fGWcn9o,280.5,281.0,clean,0.095738,0.021124,-0.049838,0.6778,-0.431767,0.412706,...,0.103531,0.103153,0.116279,0.117942,0.13046,0.181046,0.176319,0.174433,0.129422,0.223523
3,get0cXOsSXg,80.0,80.5,clean,0.111308,0.044755,-0.027565,0.494452,-0.344048,0.517288,...,0.230225,0.220953,0.296749,0.292011,0.301978,0.474694,0.298779,0.329122,0.374311,0.319355
4,74nTzbgDGWM,121.5,122.0,clean,0.175599,0.095507,0.00864,0.416546,-0.323433,0.290742,...,0.175767,0.174746,0.17976,0.157361,0.220805,0.314724,0.179664,0.280147,0.273909,0.257404
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,B7iIS91fMAc,117.0,117.5,no_vocals,0.123218,0.029629,0.034601,0.514027,-0.43047,0.371001,...,0.193159,0.18078,0.166407,0.119742,0.156025,0.167708,0.196472,0.177082,0.168337,0.220691
3326,ainbICPRV8Y,25.5,26.0,no_vocals,0.052216,0.037502,-0.163743,0.627249,-0.183702,0.459418,...,0.289458,0.47157,0.457618,0.377304,0.483696,0.412237,0.253132,0.229593,0.383528,0.37384
3327,Bh_5ofa__pY,192.5,193.0,no_vocals,0.075113,0.045458,-0.184054,0.583448,-0.192367,0.497646,...,0.158606,0.16079,0.143837,0.163422,0.151393,0.181318,0.289548,0.244237,0.165313,0.203696
3328,C_voh9WFbsM,193.0,193.5,no_vocals,0.0401,0.020788,-0.157526,0.686169,-0.269531,0.147087,...,0.234284,0.482024,0.470637,0.315412,0.376187,0.373598,0.225737,0.277297,0.289864,0.348925


## Create Train Test Split

In [67]:
from sklearn.model_selection import GroupShuffleSplit
train_inds, test_inds = next(GroupShuffleSplit(test_size=.2, n_splits=2, random_state = 0).split(lut, groups=lut['band_name']))

train = lut.iloc[train_inds]
test = lut.iloc[test_inds]

train_ids = train['video_id'].to_numpy()
test_ids = test['video_id'].to_numpy()

#df_final = df
df_final = undersampled_data
train = df_final[df_final.video_id.isin(train_ids)]
test = df_final[df_final.video_id.isin(test_ids)]

### Training set breakout

In [68]:
train.groupby('label')['mid_ts'].count()

label
clean        453
highfry      442
layered      324
lowfry       287
midfry       402
no_vocals    372
Name: mid_ts, dtype: int64

### Test set breakout

In [69]:
test.groupby('label')['mid_ts'].count()

label
clean        102
highfry      113
layered      231
lowfry       268
midfry       153
no_vocals    183
Name: mid_ts, dtype: int64

In [53]:
X_train = train[['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean','mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean','mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std','delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', 'delta_mfcc13_std']].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[['average_zcr','zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean','mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean','mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std','delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', 'delta_mfcc13_std']].to_numpy()
y_test = test[['label']].to_numpy()

## Trying 3 class- scream, sing, nothing

In [87]:
y_train_3class = []
y_test_3class = []
for i in range(len(y_train)):
    if y_train[i] in ['midfry','lowfry','highfry','layered']:
        y_train_3class.append('scream')
    elif y_train[i] == 'clean':
        y_train_3class.append('sing')
    else:
        y_train_3class.append('no vocal')

for i in range(len(y_test)):
    if y_test[i] in ['midfry','lowfry','highfry','layered']:
        y_test_3class.append('scream')
    elif y_test[i] == 'clean':
        y_test_3class.append('sing')
    else:
        y_test_3class.append('no vocal')

y_train_3class = np.array(y_train_3class)
y_test_3class = np.array(y_test_3class)

In [68]:
(test.shape[0] + train.shape[0])**0.5

183.90214789392755

In [57]:
t=pd.DataFrame()
t['class'] = y_test_3class
t['blah'] = ''
t.groupby('class').count()

NameError: name 'y_test_3class' is not defined

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
k=5
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train_3class)
KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test_3class, KNN_prediction, average='macro')
cm = confusion_matrix(y_test_3class, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

# Classify

## Dummy Classifier 

In [60]:
from sklearn.dummy import DummyClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
dummy = DummyClassifier(strategy="uniform")
dummy.fit(X_train,y_train)
random_output=dummy.predict(X_test)

score=precision_score(y_test, random_output, average='macro')
cm = confusion_matrix(y_test, random_output)

print(cm)
print(score)

[[17 15 21 24  9 25]
 [54 42 53 45 51 50]
 [52 47 53 55 54 47]
 [66 58 69 80 58 63]
 [45 31 38 45 34 34]
 [47 46 36 36 41 36]]
0.16534279689110595


## Actual Classifier

In [63]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train)
KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[ 33   5  10   2  37  24]
 [114  84  33   9  40  15]
 [ 95  73  30   4  39  67]
 [ 87  26 111  49  60  61]
 [ 89  39  23  11  39  26]
 [ 62  31  10  12  21 106]]
0.26926132025584365



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



### Plotting macro accuracy vs k

In [61]:
from matplotlib import pyplot as plt 
k_plt=[]
score_plt=[]
k=1
while 1:
    if k <=15:
        print(k)
        k_plt.append(k)
        KNN_model = KNeighborsClassifier(n_neighbors=k)
        KNN_model.fit(X_train, y_train)
        KNN_prediction = KNN_model.predict(X_test)
        score=precision_score(y_test, KNN_prediction, average='macro')
        score_plt.append(score)
        k += 1
    else:
        break
#plt.plot(k_plt,score_plt)

1
2
3


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


4
5
6


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


7
8
9


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


10
11
12


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


13
14
15


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


In [62]:
import plotly.express as px
p=pd.DataFrame()
p['k'] = k_plt
p['score'] = score_plt
fig = px.line(p,x='k',y='score',title='k value vs accuracy')
fig.show()