In [3]:
! pip install pandas librosa scipy sklearn

You should consider upgrading via the '/Users/vedant/Desktop/Programming/scream-detection/.venv/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [4]:
import os
import pandas as pd
import librosa
import soundfile as sf
import numpy as np
import math
import scipy.io.wavfile, scipy.signal
from scipy.spatial import distance
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#from sklearn.naive_bayes import GaussianNB
#from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

lut=pd.read_csv('../resources/dataset/Annotations/final/annotation.csv')

# Pre-Processing Data
- Load Ground Truth from annotation.csv at 44.1kHz
- Load audio files for each song
    1. Average L+R to convert to mono audio
    2. Divide into 5 second windows with 1 second hop\
    3. Label each hop based on whether its midpoint is within the start and end times annotated 

In [5]:
# BLOCK AUDIO
def block_audio(x,blockSize,hopSize,fs):    
    # allocate memory    
    numBlocks = math.ceil(x.size / hopSize)    
    xb = np.zeros([numBlocks, blockSize])    
    # compute time stamps    
    t = (np.arange(0, numBlocks) * hopSize) / fs   
    t_mid = t + (0.5*blockSize/fs)
    x = np.concatenate((x, np.zeros(blockSize)),axis=0)    
    for n in range(0, numBlocks):        
        i_start = n * hopSize        
        i_stop = np.min([x.size - 1, i_start + blockSize - 1])        
        xb[n][np.arange(0,blockSize)] = x[np.arange(i_start, i_stop + 1)]    
    return (xb,t,t_mid)


## Labelling the blocks based on ground truth

In [6]:
lut=pd.read_csv('../resources/dataset/Annotations/final/annotation.csv')
i=0
for video_id in lut['video_id'].unique():
    x,sr = librosa.load('../resources/dataset/Audio/processed/'+video_id+'.wav',sr=44100,mono=True)
    blockSize = int(sr * 1)
    hopSize = int(sr * 0.5)

    xb,t,t_mid = block_audio(x,blockSize,hopSize,sr)
    labels=[]
    for ts in t_mid:
        for idx,row in lut[lut['video_id'] == video_id].reset_index().iterrows():
            annotated_start = row['timestamp_start']
            annotated_end = row['timestamp_end']
            if annotated_start <= ts <= annotated_end:
                labels.append(row['scream_type'])
                break
        if ~(annotated_start <= ts <= annotated_end):
            labels.append('no_vocals')
    # Create new dataframs
    if i == 0:
        df=pd.DataFrame()

        df['t'] = t
        df['t_mid'] = t_mid
        blocks=[]
        for blk in xb:
            blocks.append(blk)
        df.insert(0,'video_id',video_id)
        df.insert(3,'label',labels)
        df['xb'] = blocks
        i+=1
    else:
        tmp=pd.DataFrame()
        tmp['t'] = t
        tmp['t_mid'] = t_mid
        blocks=[]
        for blk in xb:
            blocks.append(blk)
        tmp['video_id'] = video_id
        tmp['label'] = labels
        tmp['xb'] = blocks
        df=df.append(tmp)
out = df.to_numpy()
np.save('../resources/working_data/data.npy', out)
    #df.to_csv('./resources/working_data/'+video_id+'.csv',header=True, index=False,encoding='utf-8-sig',sep='\t')


# Extract Features
## 13 delta_mfccs, ZCR, Spectral Crest, Spectral Centroid
- Normalize the features across the entire dataset
- Extract mean, std dev of the feature value per block 
- Calculate change in feature from one block to another


In [9]:
def block_audio(x,blockSize,hopSize,fs):    
    # allocate memory    
    numBlocks = math.ceil(x.size / hopSize)    
    xb = np.zeros([numBlocks, blockSize])    
    # compute time stamps    
    t = (np.arange(0, numBlocks) * hopSize) / fs   
    x = np.concatenate((x, np.zeros(blockSize)),axis=0)    
    for n in range(0, numBlocks):        
        i_start = n * hopSize        
        i_stop = np.min([x.size - 1, i_start + blockSize - 1])        
        xb[n][np.arange(0,blockSize)] = x[np.arange(i_start, i_stop + 1)]    
    return (xb,t)

def calc_stft(xb,fs=44100):
    stft = np.zeros((xb.shape[0],(int(xb[0].shape[0]/2)+1)))
    freqs = np.zeros((xb.shape[0],(int(xb[0].shape[0]/2)+1)))
    window = np.hanning(xb[0].shape[0])
    for i in range(xb.shape[0]):
        block= xb[i]
        # Apply Window to the block
        windowed_block = window * block 
        stft_blk = np.fft.fft(windowed_block)
        #stft_blk = np.fft.rfft(windowed_block)
        freq=np.fft.fftfreq(block.size,1/fs)
        freqs[i]=freq[:int(block.size/2)+1]
        stft_blk = np.abs(stft_blk)
        #stft_block = stft_blk[int((stft_blk.shape[0])/2):]
        stft_block = stft_blk[:int(((stft_blk.shape[0])/2)+1)]
        #stft_db = 10*np.log10(stft_block) # IS THIS CORRECT??
        stft[i]=stft_block#stft_db
    stft = np.array(stft)
    freqs=np.array(freqs)
    return stft,freqs

def extract_spectral_centroid(xb, fs):
    centroids = np.zeros(xb.shape[0])
    stft,freqs = calc_stft(xb,fs)
    #np.sum(magnitudes*freqs) / np.sum(magnitudes)
    for i in range(freqs.shape[0]):
        if np.all(stft[i] == 0):
            pass
        else:
            centroid = np.sum(stft[i]*freqs[i]) / np.sum(stft[i])
            centroids[i]=centroid
        #centroids.append(centroid)
    #centroids=np.array(centroids)
    return centroids
def calc_stft(xb,fs=44100):
    stft = np.zeros((xb.shape[0],(int(xb[0].shape[0]/2)+1)))
    freqs = np.zeros((xb.shape[0],(int(xb[0].shape[0]/2)+1)))
    window = np.hanning(xb[0].shape[0])
    for i in range(xb.shape[0]):
        block= xb[i]
        # Apply Window to the block
        windowed_block = window * block 
        stft_blk = np.fft.fft(windowed_block)
        #stft_blk = np.fft.rfft(windowed_block)
        freq=np.fft.fftfreq(block.size,1/fs)
        freqs[i]=freq[:int(block.size/2)+1]
        stft_blk = np.abs(stft_blk)
        #stft_block = stft_blk[int((stft_blk.shape[0])/2):]
        stft_block = stft_blk[:int(((stft_blk.shape[0])/2)+1)]
        #stft_db = 10*np.log10(stft_block) # IS THIS CORRECT??
        stft[i]=stft_block#stft_db
    stft = np.array(stft)
    freqs=np.array(freqs)
    return stft,freqs

def extract_spectral_centroid(xb, fs):
    centroids = np.zeros(xb.shape[0])
    stft,freqs = calc_stft(xb,fs)
    #np.sum(magnitudes*freqs) / np.sum(magnitudes)
    for i in range(freqs.shape[0]):
        if np.all(stft[i] == 0):
            pass
        else:
            centroid = np.sum(stft[i]*freqs[i]) / np.sum(stft[i])
            centroids[i]=centroid
        #centroids.append(centroid)
    #centroids=np.array(centroids)
    return centroids
def calc_stft(xb,fs=44100):
    stft = np.zeros((xb.shape[0],(int(xb[0].shape[0]/2)+1)))
    freqs = np.zeros((xb.shape[0],(int(xb[0].shape[0]/2)+1)))
    window = np.hanning(xb[0].shape[0])
    for i in range(xb.shape[0]):
        block= xb[i]
        # Apply Window to the block
        windowed_block = window * block 
        stft_blk = np.fft.fft(windowed_block)
        #stft_blk = np.fft.rfft(windowed_block)
        freq=np.fft.fftfreq(block.size,1/fs)
        freqs[i]=freq[:int(block.size/2)+1]
        stft_blk = np.abs(stft_blk)
        #stft_block = stft_blk[int((stft_blk.shape[0])/2):]
        stft_block = stft_blk[:int(((stft_blk.shape[0])/2)+1)]
        #stft_db = 10*np.log10(stft_block) # IS THIS CORRECT??
        stft[i]=stft_block#stft_db
    stft = np.array(stft)
    freqs=np.array(freqs)
    return stft,freqs

def extract_rms(xb):
    rms = np.zeros(xb.shape[0])
    for i in range(xb.shape[0]):
        if np.all(xb[i] == 0):
            r = 0.00001
        else:
            block = xb[i]
            r = np.sqrt(np.sum(block**2)/xb.shape[0])
            if r <= 0.00001: # Done to handle case when rms is 0 (for a block of all zeros
                r = 0.00001
            #rms.append(r)
        rms[i] = r
    #rms=np.array(rms)
    return 20*np.log10(rms)#rms,20*np.log10(rms) # What is reference value to convert to dB? Is it 1?


def extract_spectral_crest(xb):
    crest = np.zeros(xb.shape[0])
    stft,freqs = calc_stft(xb,44100)
    for i in range(stft.shape[0]):
        if np.all(stft[i] == 0):
            pass
        else:
            #crest.append((np.max(stft[i])/np.sum(stft[i])))
            crest[i] = np.max(stft[i])/np.sum(stft[i])
    #crest = np.array(crest)
    return crest

def extract_spectral_flux(xb):
    num_blocks=xb.shape[0] 
    blockSize = xb.shape[1]
    spectral_flux = np.zeros(num_blocks)
    stft,freqs = calc_stft(xb,44100)
    fft_len= stft.shape[1]
    n = 0
    k = 0
    for n in range(1,num_blocks):
        if np.all(stft[n] == 0):
            pass
        else:
            sum_flux = 0
            for k in range(fft_len):
                f = (abs(stft[n, k]) - abs(stft[n-1, k]))**2
                sum_flux += f
            flux = np.sqrt(sum_flux)/((blockSize/2)+1)
            spectral_flux[n] = flux #first flux value will be 0
    return spectral_flux

# Extracting RMS, Spectral Crest, Spectral Flux
def extract_manual_features(x,fs):

    xb,ts = block_audio(x,2048,1024,fs)
    #centroid = extract_spectral_centroid(xb,fs)
    rms = extract_rms(xb)
    #zcr = extract_zerocrossingrate(xb)
    crest = extract_spectral_crest(xb)
    flux = extract_spectral_flux(xb)

    #centroid_mean = np.mean(centroid)
    #centroid_std = np.std(centroid)

    rms_mean = np.mean(rms)
    rms_std = np.std(rms)

    #zcr_mean = np.mean(zcr)
    #zcr_std = np.std(zcr)

    crest_mean = np.mean(crest)
    crest_std = np.std(crest)

    flux_mean = np.mean(flux)
    flux_std = np.std(flux)

    return rms_mean, rms_std, crest_mean, crest_std, flux_mean,flux_std

def agg_mfccs(x):
    mfccs = librosa.feature.mfcc(x,n_mfcc = 13)
    mean = [np.mean(feature) for feature in mfccs]
    std = [np.std(feature) for feature in mfccs]
    mfcc_delta = librosa.feature.delta(mfccs)
    delta_mean=[np.mean(feature) for feature in mfcc_delta]
    delta_std=[np.std(feature) for feature in mfcc_delta]
    return mean,std,delta_mean,delta_std

def extract_features(x,fs=44100):
    #MFCCs
    mfcc_mean,mfcc_std,delta_mfcc_mean,delta_mfcc_std = agg_mfccs(x)
    #ZCR
    zcr=librosa.feature.zero_crossing_rate(x)
    #Spectral Centroid
    centroid = librosa.feature.spectral_centroid(x,44100)
    #Spectral Contrast
    contrast = librosa.feature.spectral_contrast(x,44100)
    #Spectral Flatness
    flatness = librosa.feature.spectral_flatness(x)
    #Spectral Roll-off
    rolloff = librosa.feature.spectral_rolloff(x,44100)
    
    
    return mfcc_mean,mfcc_std,delta_mfcc_mean,delta_mfcc_std,zcr,centroid,contrast,flatness,rolloff

df=pd.DataFrame(np.load('../resources/working_data/data.npy',allow_pickle = True),columns=['video_id','start_time','mid_ts','label','audio'])

df['zcr'] = ''
df['average_zcr'] = ''
df['zcr_stddev'] = ''

#df['mfccs'] = ''
df['mfcc_mean'] = ''
df['mfcc_std'] = ''

df['delta_mfcc_mean'] = ''
df['delta_mfcc_std'] = ''

df['centroid'] = ''
df['centroid_mean']=''
df['centroid_std'] = ''

df['contrast'] = ''
df['contrast_mean']=''
df['contrast_std'] = ''

df['flatness'] = ''
df['flatness_mean']=''
df['flatness_std'] = ''

df['rolloff'] = ''
df['rolloff_mean']=''
df['rolloff_std'] = ''

#Row wise (block wise) aggregation of features by mean and std dev
for i in range(len(df)):
    #audio = butter_bandpass_filter(df['audio'][i],200,4500,44100)
    #audio = df['audio'][i]
    mean, std, delta_mean, delta_std, zcr, centroid, contrast, flatness, rolloff = extract_features(df['audio'][i],44100)
    # Calculate ZCR - mean and std
    df['zcr'][i] = zcr
    df['average_zcr'][i] = np.mean(zcr)
    df['zcr_stddev'][i] = np.std(zcr)
    # Extract 13 MFCCs - get mean and std deviation for each (26 features) + Delta MFCCs (26 features) = total 52 Features
    df['mfcc_mean'][i] = mean
    df['mfcc_std'][i] = std
    df['delta_mfcc_mean'][i] = delta_mean
    df['delta_mfcc_std'][i] = delta_std

    #Calculate Spectral Centroid Mean and Std
    df['centroid'][i] = centroid
    df['centroid_mean'][i]=np.mean(centroid)
    df['centroid_std'][i] = np.std(centroid)

    #Calculate Spectral Contrast - Mean and Std
    df['contrast'][i] = contrast
    df['contrast_mean'][i]=np.mean(contrast)
    df['contrast_std'][i] = np.std(contrast)

    #Calculate spectral flatness - mean and std
    df['flatness'][i] = flatness
    df['flatness_mean'][i] = np.mean(flatness)
    df['flatness_std'][i] = np.std(flatness)

    #Calculate spectral flatness - mean and std
    df['rolloff'][i] = rolloff
    df['rolloff_mean'][i] = np.mean(rolloff)
    df['rolloff_std'][i] = np.std(rolloff)


df[['mfcc1_mean','mfcc2_mean','mfcc3_mean','mfcc4_mean','mfcc5_mean','mfcc6_mean','mfcc7_mean','mfcc8_mean','mfcc9_mean','mfcc10_mean','mfcc11_mean','mfcc12_mean','mfcc13_mean']]=pd.DataFrame(df.mfcc_mean.tolist(), index= df.index)
df[['mfcc1_std','mfcc2_std','mfcc3_std','mfcc4_std','mfcc5_std','mfcc6_std','mfcc7_std','mfcc8_std','mfcc9_std','mfcc10_std','mfcc11_std','mfcc12_std','mfcc13_std']]=pd.DataFrame(df.mfcc_std.tolist(), index= df.index)

df[['delta_mfcc1_mean','delta_mfcc2_mean','delta_mfcc3_mean','delta_mfcc4_mean','delta_mfcc5_mean','delta_mfcc6_mean','delta_mfcc7_mean','delta_mfcc8_mean','delta_mfcc9_mean','delta_mfcc10_mean','delta_mfcc11_mean','delta_mfcc12_mean','delta_mfcc13_mean']]=pd.DataFrame(df.delta_mfcc_mean.tolist(), index= df.index)
df[['delta_mfcc1_std','delta_mfcc2_std','delta_mfcc3_std','delta_mfcc4_std','delta_mfcc5_std','delta_mfcc6_std','delta_mfcc7_std','delta_mfcc8_std','delta_mfcc9_std','delta_mfcc10_std','delta_mfcc11_std','delta_mfcc12_std','delta_mfcc13_std']]=pd.DataFrame(df.delta_mfcc_std.tolist(), index= df.index)

selected_cols=['video_id', 'start_time', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std',
       'centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std']
np.save('../resources/working_data/fullaudio_features.npy', df[selected_cols].to_numpy())

Aggregating features blockwise (mean and std dev)

In [22]:
df['zcr'] = ''
df['average_zcr'] = ''
df['zcr_stddev'] = ''

#df['mfccs'] = ''
df['mfcc_mean'] = ''
df['mfcc_std'] = ''

df['delta_mfcc_mean'] = ''
df['delta_mfcc_std'] = ''

df['centroid'] = ''
df['centroid_mean']=''
df['centroid_std'] = ''

df['contrast'] = ''
df['contrast_mean']=''
df['contrast_std'] = ''

df['flatness'] = ''
df['flatness_mean']=''
df['flatness_std'] = ''

df['rolloff'] = ''
df['rolloff_mean']=''
df['rolloff_std'] = ''

#Row wise (block wise) aggregation of features by mean and std dev
for i in range(len(df)):
    #audio = butter_bandpass_filter(df['audio'][i],200,4500,44100)
    #audio = df['audio'][i]
    mean, std, delta_mean, delta_std, zcr, centroid, contrast, flatness, rolloff = extract_features(df['audio'][i],44100)
    # Calculate ZCR - mean and std
    df['zcr'][i] = zcr
    df['average_zcr'][i] = np.mean(zcr)
    df['zcr_stddev'][i] = np.std(zcr)
    # Extract 13 MFCCs - get mean and std deviation for each (26 features) + Delta MFCCs (26 features) = total 52 Features
    df['mfcc_mean'][i] = mean
    df['mfcc_std'][i] = std
    df['delta_mfcc_mean'][i] = delta_mean
    df['delta_mfcc_std'][i] = delta_std

    #Calculate Spectral Centroid Mean and Std
    df['centroid'][i] = centroid
    df['centroid_mean'][i]=np.mean(centroid)
    df['centroid_std'][i] = np.std(centroid)

    #Calculate Spectral Contrast - Mean and Std
    df['contrast'][i] = contrast
    df['contrast_mean'][i]=np.mean(contrast)
    df['contrast_std'][i] = np.std(contrast)

    #Calculate spectral flatness - mean and std
    df['flatness'][i] = flatness
    df['flatness_mean'][i] = np.mean(flatness)
    df['flatness_std'][i] = np.std(flatness)

    #Calculate spectral flatness - mean and std
    df['rolloff'][i] = rolloff
    df['rolloff_mean'][i] = np.mean(rolloff)
    df['rolloff_std'][i] = np.std(rolloff)

In [23]:
df[['mfcc1_mean','mfcc2_mean','mfcc3_mean','mfcc4_mean','mfcc5_mean','mfcc6_mean','mfcc7_mean','mfcc8_mean','mfcc9_mean','mfcc10_mean','mfcc11_mean','mfcc12_mean','mfcc13_mean']]=pd.DataFrame(df.mfcc_mean.tolist(), index= df.index)
df[['mfcc1_std','mfcc2_std','mfcc3_std','mfcc4_std','mfcc5_std','mfcc6_std','mfcc7_std','mfcc8_std','mfcc9_std','mfcc10_std','mfcc11_std','mfcc12_std','mfcc13_std']]=pd.DataFrame(df.mfcc_std.tolist(), index= df.index)

df[['delta_mfcc1_mean','delta_mfcc2_mean','delta_mfcc3_mean','delta_mfcc4_mean','delta_mfcc5_mean','delta_mfcc6_mean','delta_mfcc7_mean','delta_mfcc8_mean','delta_mfcc9_mean','delta_mfcc10_mean','delta_mfcc11_mean','delta_mfcc12_mean','delta_mfcc13_mean']]=pd.DataFrame(df.delta_mfcc_mean.tolist(), index= df.index)
df[['delta_mfcc1_std','delta_mfcc2_std','delta_mfcc3_std','delta_mfcc4_std','delta_mfcc5_std','delta_mfcc6_std','delta_mfcc7_std','delta_mfcc8_std','delta_mfcc9_std','delta_mfcc10_std','delta_mfcc11_std','delta_mfcc12_std','delta_mfcc13_std']]=pd.DataFrame(df.delta_mfcc_std.tolist(), index= df.index)

In [24]:
selected_cols=['video_id', 'start_time', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std',
       'centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std']
np.save('../resources/working_data/fullaudio_features.npy', df[selected_cols].to_numpy())

# Classify!

In [2]:
import os
import pandas as pd
import librosa
import soundfile as sf
import numpy as np
import math
import scipy.io.wavfile, scipy.signal
from scipy.spatial import distance
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#from sklearn.naive_bayes import GaussianNB
#from sklearn.tree import DecisionTreeClassifier



cols=['video_id', 'ts', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std',
       'centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std']

d=np.load('../resources/working_data/fullaudio_features.npy',allow_pickle=True)
df = pd.DataFrame(d,columns=cols)

lut = pd.read_csv('../resources/dataset/lookup_new.csv')

## Undersampling data to even out class distribution

In [3]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy='not minority',random_state=0)
from collections import Counter
X = df[cols].to_numpy()
y=df[['label']].to_numpy()

X_under, y_under = undersample.fit_resample(X, y)

undersampled_data = pd.DataFrame(X_under,columns=cols)
undersampled_data['label'] = y_under
undersampled_data

Unnamed: 0,video_id,ts,mid_ts,label,average_zcr,zcr_stddev,mfcc1_mean,mfcc2_mean,mfcc3_mean,mfcc4_mean,...,delta_mfcc12_std,delta_mfcc13_std,centroid_mean,centroid_std,contrast_mean,contrast_std,flatness_mean,flatness_std,rolloff_mean,rolloff_std
0,-2WqQY_xSSM,170.0,170.5,clean,0.064711,0.013141,-33.488647,182.43503,-68.516147,52.649062,...,1.252244,0.772238,2589.444663,289.475144,18.311937,11.541943,0.000369,0.000644,5137.525256,822.999841
1,FNdC_3LR2AI,219.0,219.5,clean,0.042048,0.011644,-48.645003,177.418657,-47.320168,61.437749,...,0.637714,0.58649,2252.670184,406.041184,17.230606,8.328512,0.000491,0.000447,4936.548693,990.689924
2,4600fGWcn9o,280.5,281.0,clean,0.052285,0.007339,-56.385804,205.49042,-56.256098,42.855143,...,0.427326,0.607686,2086.736319,120.325,18.614503,11.205558,0.000146,0.000165,4004.185749,321.599205
3,get0cXOsSXg,80.0,80.5,clean,0.060788,0.01555,-31.186549,149.904298,-44.826998,53.714973,...,1.235904,0.868223,3160.926718,606.770609,19.015291,10.009012,0.001047,0.001308,7013.636517,1573.32826
4,74nTzbgDGWM,121.5,122.0,clean,0.0959,0.033184,9.77477,126.285521,-42.140997,30.190494,...,0.904395,0.699799,3626.052662,794.687833,18.398823,11.184099,0.007947,0.006261,7274.015019,1734.575346
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,B7iIS91fMAc,117.0,117.5,no_vocals,0.067293,0.010294,39.146283,155.839,-56.087105,38.524538,...,0.555818,0.599987,2986.670758,330.790543,15.02568,6.168961,0.004636,0.002975,5983.012864,709.657769
3326,ainbICPRV8Y,25.5,26.0,no_vocals,0.028517,0.01303,-185.253985,190.164722,-23.935036,47.705691,...,1.266337,1.01635,1581.209737,376.576557,21.499275,10.728373,0.000061,0.000111,3118.84934,928.814896
3327,Bh_5ofa__pY,192.5,193.0,no_vocals,0.041021,0.015794,-208.23298,176.885573,-25.064032,51.67533,...,0.545833,0.553783,1755.389776,353.973399,22.834608,10.198527,0.00005,0.00009,3487.388874,846.340486
3328,C_voh9WFbsM,193.0,193.5,no_vocals,0.0219,0.007223,-178.220519,208.027771,-35.117866,15.273391,...,0.957075,0.948613,1458.968299,307.19258,19.224315,10.167267,0.000026,0.00007,2965.641837,854.055893


## Plot scatter plot of features

In [4]:
import plotly.express as px
fig = px.scatter_matrix(undersampled_data[['label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std','centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std']],dimensions=["average_zcr", "zcr_stddev", "mfcc1_mean", "delta_mfcc1_mean","flatness_std","flatness_std"],color='label')
fig.show()

## Train-test split

In [5]:
from sklearn.model_selection import GroupShuffleSplit
train_inds, test_inds = next(GroupShuffleSplit(test_size=.2, n_splits=2, random_state = 0).split(lut, groups=lut['band_name']))

train = lut.iloc[train_inds]
test = lut.iloc[test_inds]

train_ids = train['video_id'].to_numpy()
test_ids = test['video_id'].to_numpy()

#df_final = df
df_final = undersampled_data
train = df_final[df_final.video_id.isin(train_ids)]
test = df_final[df_final.video_id.isin(test_ids)]

## Normalizing features and convert df to numpy array for input to classifier

In [6]:
cols = ['average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std','centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std']

for col in cols:
    #df[col] = df[col]/max(np.abs(df[col]))
    mean = np.mean(train[col])
    std = np.std(train[col])
    df[col] = (df[col] - mean)/std # z-score normalization

train = df_final[df_final.video_id.isin(train_ids)]
test = df_final[df_final.video_id.isin(test_ids)]

# selected_cols = ['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
#        'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
#        'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
#        'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
#        'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
#        'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
#        'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
#        'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
#        'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
#        'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
#        'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\ 
#        'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
#        'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
#        'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
#        'delta_mfcc13_std','centroid_mean','centroid_std',\
#        'contrast_mean','contrast_std',\
#        'flatness_mean','flatness_std',\
#        'rolloff_mean','rolloff_std']
selected_cols = ['mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std']
#        ,'centroid_mean','centroid_std',\
#         'contrast_mean','contrast_std',\
#         'flatness_mean','flatness_std',\
#         'rolloff_mean','rolloff_std']

X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score


from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

# kNN Classifier

## Classification for full bandwidth input

In [57]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train)
KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[  4   4   0   8  60  26]
 [  7   9  10   0  73  14]
 [ 28   4   5   7 163  24]
 [ 49   5   1  26 183   4]
 [ 29  34  18   3  56  13]
 [ 28  22   8  19  30  76]]
0.20965138856600093



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



## 3 class problem: sing, scream, nothing

### KNN Classify 3 class problem

In [38]:
y_train_3class = []
y_test_3class = []
for i in range(len(y_train)):
    if y_train[i] in ['midfry','lowfry','highfry','layered']:
        y_train_3class.append('scream')
    elif y_train[i] == 'clean':
        y_train_3class.append('sing')
    else:
        y_train_3class.append('no vocal')

for i in range(len(y_test)):
    if y_test[i] in ['midfry','lowfry','highfry','layered']:
        y_test_3class.append('scream')
    elif y_test[i] == 'clean':
        y_test_3class.append('sing')
    else:
        y_test_3class.append('no vocal')

y_train_3class = np.array(y_train_3class)
y_test_3class = np.array(y_test_3class)

In [39]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
k=5
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train_3class)
KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test_3class, KNN_prediction, average='macro')
cm = confusion_matrix(y_test_3class, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[ 89  82  12]
 [ 59 629  77]
 [ 43  59   0]]
0.4276172344235172


# SVM

## SVM for full signal

In [58]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



[[ 11   0   3   3  44  41]
 [  4  13   5   0  82   9]
 [ 44   8   4   2 159  14]
 [ 10   0  13   9 178  58]
 [ 26  28   4   1  77  17]
 [ 12   9   0   5  16 141]]
0.25948889017325344


## 3 class SVM

In [54]:
SVM_model = make_pipeline(StandardScaler(),\
    SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None)\
        )
SVM_model.fit(X_train, y_train_3class)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test_3class, SVM_prediction, average='macro')
cm = confusion_matrix(y_test_3class, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[125  46  12]
 [ 67 664  34]
 [ 56  38   8]]
0.5132936469906741


# Random Forest

In [59]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score

RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



[[  6   1   0   1  34  60]
 [  3  12   1   0  92   5]
 [ 14  16   1   3 148  49]
 [  7   1   1  11 208  40]
 [ 27  29   4   2  78  13]
 [  9  14   0   5  22 133]]
0.24591729121668396


### RF 3 class problem

In [45]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score

RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',)
RF_model.fit(X_train, y_train_3class)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test_3class, RF_prediction, average='macro')
cm = confusion_matrix(y_test_3class, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

[[ 85  96   2]
 [ 25 739   1]
 [ 18  80   4]]
0.6810471148842051


# Experiment 1: Only 13 MFCCs for 6 class problem

## Results:
Classifier : Accuracy
- kNN : 0.20965138856600093
- SVM : 0.25948889017325344
- RF : 0.26579033320585255

In [63]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std']
#        ,'centroid_mean','centroid_std',\
#         'contrast_mean','contrast_std',\
#         'flatness_mean','flatness_std',\
#         'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced')

KNN_model.fit(X_train, y_train)
SVM_model.fit(X_train, y_train)
RF_model.fit(X_train, y_train)

print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 100, criterion=gini, \nmax_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[  4   4   0   8  60  26]
 [  7   9  10   0  73  14]
 [ 28   4   5   7 163  24]
 [ 49   5   1  26 183   4]
 [ 29  34  18   3  56  13]
 [ 28  22   8  19  30  76]]
0.20965138856600093
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 11   0   3   3  44  41]
 [  4  13   5   0  82   9]
 [ 44   8   4   2 159  14]
 [ 10   0  13   9 178  58]
 [ 26  28   4   1  77  17]
 [ 12   9   0   5  16 141]]
0.25948889017325344
-------------------------------------------------------------------------------
            RF with n_estimators = 100, criterion=gini, 
max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto

# Experiment 2: 13 MFCCs + Delta MFCCs

## Results:
Classifier : Accuracy
- kNN : 0.21160954793326447
- SVM : 0.320572916667593
- RF : 0.2990929498490106

In [65]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced')

KNN_model.fit(X_train, y_train)
SVM_model.fit(X_train, y_train)
RF_model.fit(X_train, y_train)

print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 100, criterion=gini, \nmax_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[  4   4   0   8  60  26]
 [  7   9  10   0  73  14]
 [ 26   5   6   7 162  25]
 [ 49   5   1  25 184   4]
 [ 29  35  17   3  54  15]
 [ 27  23   9  17  30  77]]
0.21160954793326447
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 14   1   4   5  24  54]
 [ 14  23  11   1  58   6]
 [ 80   9  35   2  85  20]
 [ 19   8  13  17 166  45]
 [ 34  30   8   3  63  15]
 [ 12  15   1  10  12 133]]
0.320572916667593
-------------------------------------------------------------------------------
            RF with n_estimators = 100, criterion=gini, 
max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',

# Experiment 3: Librosa Features only

## Results:
Classifier : Accuracy
- kNN : 0.14598487359185544
- SVM : 0.20628745281990477
- RF : 0.3013805675780411

In [66]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev',\
        'centroid_mean','centroid_std',\
       'contrast_mean','contrast_std',\
       'flatness_mean','flatness_std',\
       'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced')

KNN_model.fit(X_train, y_train)
SVM_model.fit(X_train, y_train)
RF_model.fit(X_train, y_train)

print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 100, criterion=gini, \nmax_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[23 39  0 18 17  5]
 [12 46 17  3 22 13]
 [33 75 15 27 67 14]
 [80 56 38  3 66 25]
 [39 39 23 22 22  8]
 [37 51 22 16 21 36]]
0.14598487359185544
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 41  13   1   7  34   6]
 [ 13  17  13   0  61   9]
 [ 40  21   7  17 116  30]
 [ 70  74  23   6  91   4]
 [  8  14  20  12  96   3]
 [ 41  16  13  10  48  55]]
0.20628745281990477
-------------------------------------------------------------------------------
            RF with n_estimators = 100, criterion=gini, 
max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight=

# Experiment 4: All Features

## Results:
Classifier : Accuracy
- kNN : 0.15090983576491654
- SVM : 0.35187628931391995
- RF : 0.2962571928783088

In [68]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std','centroid_mean','centroid_std',\
       'contrast_mean','contrast_std',\
       'flatness_mean','flatness_std',\
       'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))


KNN_model.fit(X_train, y_train)
SVM_model.fit(X_train, y_train)


print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 100, criterion=gini, \nmax_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced')
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[25 33  1 19 19  5]
 [13 43 18  3 24 12]
 [36 77 14 30 64 10]
 [78 57 42  4 63 24]
 [35 35 23 26 26  8]
 [37 55 23 13 20 35]]
0.15090983576491654
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 22   2   1   3  22  52]
 [ 23  23  17   3  44   3]
 [ 91  22  34   6  63  15]
 [ 79  26  29  30  66  38]
 [ 15  22   6   5  94  11]
 [ 21   9   2   4  16 131]]
0.35187628931391995
-------------------------------------------------------------------------------
            RF with n_estimators = 100, criterion=gini, 
max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight=

# Experiment 5: Tweaking SVM parameters

## Results:

Confusion Matrix:

[[ 47 - 1 - 0 - 1 - 39 - 14]  
[ 23 - 47 - 0 - 6 - 37 - 0]  
[106 - 56 - 2 - 3 - 62 - 2]  
[ 96 - 59 - 0 - 49 - 63 - 1]  
[ 47 - 22 - 1 - 0 - 82 - 1]  
[ 77 - 17 - 0 - 6 - 27 - 56]]  

Macro Accuracy:  
0.4655243070525728

In [23]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std','centroid_mean','centroid_std',\
       'contrast_mean','contrast_std',\
       'flatness_mean','flatness_std',\
       'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()


print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=0))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
#print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=0.855, kernel = linear, degree = 3              ")
print("-------------------------------------------------------------------------------")
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=0.855, kernel='linear', degree=3,random_state=0))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
#print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=0.855, kernel = poly, degree = 3              ")
print("-------------------------------------------------------------------------------")
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=0.855, kernel='poly', degree=3,random_state=0))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
#print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=0.855, kernel = sigmoid           ")
print("-------------------------------------------------------------------------------")
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=0.855, kernel='sigmoid', random_state=0))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
#print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            TEST - SVM with C=0.855, kernel = poly, degree = 3, gamma=scale              ")
print("-------------------------------------------------------------------------------")
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=0.855, kernel='poly', degree=3,random_state=0,decision_function_shape='ovr',shrinking=False))
SVM_model.fit(X_train, y_train)
SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)





-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
0.35187628931391995
-------------------------------------------------------------------------------
            SVM with C=0.855, kernel = linear, degree = 3              
-------------------------------------------------------------------------------
0.36347318198151474
-------------------------------------------------------------------------------
            SVM with C=0.855, kernel = poly, degree = 3              
-------------------------------------------------------------------------------
0.4655243070525728
-------------------------------------------------------------------------------
            SVM with C=0.855, kernel = sigmoid           
-------------------------------------------------------------------------------
0.3429896180361958
-----------------

# Experiment 6: Tweaking RF

In [22]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std','centroid_mean','centroid_std',\
       'contrast_mean','contrast_std',\
       'flatness_mean','flatness_std',\
       'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()


print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 100, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=100,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=90,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 90, criterion=entropy, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=90,criterion='entropy',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 600, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=800,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(score)

-------------------------------------------------------------------------------
            RF with n_estimators = 100, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight='balanced'              
-------------------------------------------------------------------------------
0.33544940850316224
-------------------------------------------------------------------------------
            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight='balanced'              
-------------------------------------------------------------------------------
0.3475368085074988
-------------------------------------------------------------------------------
            RF with n_estimators = 90, criterion=entropy, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight='balanced'     

# Experiments with Optimized Classifiers

Done using hyperparameters optimized for the entire feature-set

## All Features

In [23]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev','mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std','centroid_mean','centroid_std',\
       'contrast_mean','contrast_std',\
       'flatness_mean','flatness_std',\
       'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()




print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")
k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
KNN_model.fit(X_train, y_train)

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))
SVM_model.fit(X_train, y_train)

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=90,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[25 33  1 19 19  5]
 [13 43 18  3 24 12]
 [36 77 14 30 64 10]
 [78 57 42  4 63 24]
 [35 35 23 26 26  8]
 [37 55 23 13 20 35]]
0.15090983576491654
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 22   2   1   3  22  52]
 [ 23  23  17   3  44   3]
 [ 91  22  34   6  63  15]
 [ 79  26  29  30  66  38]
 [ 15  22   6   5  94  11]
 [ 21   9   2   4  16 131]]
0.35187628931391995
-------------------------------------------------------------------------------
            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight='b

## MFCCs + Delta MFCCs Only

In [24]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',\
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', \
       'mfcc8_mean','mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', \
       'mfcc12_mean','mfcc13_mean', 'mfcc1_std', \
       'mfcc2_std', 'mfcc3_std', 'mfcc4_std','mfcc5_std', \
       'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std','mfcc10_std', \
       'mfcc11_std', 'mfcc12_std', 'mfcc13_std','delta_mfcc1_mean', \
       'delta_mfcc2_mean', 'delta_mfcc3_mean','delta_mfcc4_mean', \
       'delta_mfcc5_mean', 'delta_mfcc6_mean','delta_mfcc7_mean', \
       'delta_mfcc8_mean', 'delta_mfcc9_mean','delta_mfcc10_mean', \
       'delta_mfcc11_mean', 'delta_mfcc12_mean','delta_mfcc13_mean',\
       'delta_mfcc1_std', 'delta_mfcc2_std','delta_mfcc3_std', 'delta_mfcc4_std', \
       'delta_mfcc5_std','delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',\
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std','delta_mfcc12_std', \
       'delta_mfcc13_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))


KNN_model.fit(X_train, y_train)
SVM_model.fit(X_train, y_train)


print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=90,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[  4   4   0   8  60  26]
 [  7   9  10   0  73  14]
 [ 26   5   6   7 162  25]
 [ 49   5   1  25 184   4]
 [ 29  35  17   3  54  15]
 [ 27  23   9  17  30  77]]
0.21160954793326447
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 14   1   4   5  24  54]
 [ 14  23  11   1  58   6]
 [ 80   9  35   2  85  20]
 [ 19   8  13  17 166  45]
 [ 34  30   8   3  63  15]
 [ 12  15   1  10  12 133]]
0.320572916667593
-------------------------------------------------------------------------------
            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
m

## Centroid, ZCR, Contrast, Roll-off, Flatness

In [25]:
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

selected_cols = ['average_zcr','zcr_stddev',\
        'centroid_mean','centroid_std',\
        'contrast_mean','contrast_std',\
        'flatness_mean','flatness_std',\
        'rolloff_mean','rolloff_std']
X_train = train[selected_cols].to_numpy()
y_train=train[['label']].to_numpy()

X_test = test[selected_cols].to_numpy()
y_test = test[['label']].to_numpy()

k=4
KNN_model = KNeighborsClassifier(n_neighbors=k)
SVM_model = make_pipeline(StandardScaler(), SVC(gamma='auto',C=1.0, kernel='rbf', degree=3,random_state=None))


KNN_model.fit(X_train, y_train)
SVM_model.fit(X_train, y_train)


print("-------------------------------------------------------------------------------")
print("            KNN with k=4              ")
print("-------------------------------------------------------------------------------")

KNN_prediction = KNN_model.predict(X_test)
score=precision_score(y_test, KNN_prediction, average='macro')
cm = confusion_matrix(y_test, KNN_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            SVM with C=1, kernel = rbf, degree = 3              ")
print("-------------------------------------------------------------------------------")

SVM_prediction = SVM_model.predict(X_test)
score=precision_score(y_test, SVM_prediction, average='macro')
cm = confusion_matrix(y_test, SVM_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

print("-------------------------------------------------------------------------------")
print("            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',\nmax_leaf_nodes=None,class_weight='balanced'              ")
print("-------------------------------------------------------------------------------")

RF_model = RandomForestClassifier(n_estimators=90,criterion='gini',max_depth=None,\
    min_samples_split=2,min_samples_leaf=1,max_features='auto',max_leaf_nodes=None,class_weight='balanced',random_state=0)
RF_model.fit(X_train, y_train)
RF_prediction = RF_model.predict(X_test)
score=precision_score(y_test, RF_prediction, average='macro')
cm = confusion_matrix(y_test, RF_prediction) # clean, highfry, layered, lowfry, midfry, no vocals
print(cm)
print(score)

-------------------------------------------------------------------------------
            KNN with k=4              
-------------------------------------------------------------------------------
[[23 39  0 18 17  5]
 [12 46 17  3 22 13]
 [33 75 15 27 67 14]
 [80 56 38  3 66 25]
 [39 39 23 22 22  8]
 [37 51 22 16 21 36]]
0.14598487359185544
-------------------------------------------------------------------------------
            SVM with C=1, kernel = rbf, degree = 3              
-------------------------------------------------------------------------------
[[ 41  13   1   7  34   6]
 [ 13  17  13   0  61   9]
 [ 40  21   7  17 116  30]
 [ 70  74  23   6  91   4]
 [  8  14  20  12  96   3]
 [ 41  16  13  10  48  55]]
0.20628745281990477
-------------------------------------------------------------------------------
            RF with n_estimators = 90, criterion=gini, max_depth=None, min_samples_split=2,min_samples_leaf=1,max_features='auto',
max_leaf_nodes=None,class_weight='b