In [93]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from pathlib import Path
import librosa
import scipy
import IPython.display

In [94]:
data_path = "";

kickSamples = np.array([librosa.load(data_path +'audio\Kick\RGRS_KICK_HDFL_HT_01.wav', duration = 2),
               librosa.load(data_path +'audio\Kick\RGRS_KICK_HDFL_HT_02.wav', duration = 2),
               librosa.load(data_path +'audio\Kick\RGRS_KICK_HDFL_HT_03.wav', duration = 2),
               librosa.load(data_path +'audio\Kick\RGRS_KICK_SFFL_HT_01.wav', duration = 2),
               librosa.load(data_path +'audio\Kick\RGRS_KICK_SFFL_HT_02.wav',duration = 2)])


snareSamples = np.array([librosa.load(data_path +'audio\Snare\L400_SNR_DCFL_HT_01.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_DCFL_HT_02.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_DCFL_HT_03.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_DEFL_HT_01.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_DEFL_HT_02.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_DEFL_HT_03.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_LCFL_HT_01.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_LCFL_HT_02.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_LCFL_HT_03.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_LEFL_HT_01.wav',duration = 2),
                librosa.load(data_path +'audio\Snare\L400_SNR_LEFL_HT_02.wav',duration = 2)])   

#set duration to a constant so pads or truncates all amples to the same length
#snareSamples[x][0] --> actual data of sample
#snareSamples[x][1] --> Sample rate 

In [95]:
snareSamples

array([[array([ 4.9886043e-05,  1.2957437e-04,  3.0208417e-04, ...,
       -3.1988835e-05, -2.8691908e-05, -3.3227388e-05], dtype=float32),
        22050],
       [array([ 1.6417895e-05, -6.4880284e-04, -5.2967430e-03, ...,
       -2.9894332e-05, -3.0361671e-05, -3.1850708e-05], dtype=float32),
        22050],
       [array([-6.95978160e-05,  2.64925271e-04, -2.99041579e-03, ...,
       -1.10152854e-04, -9.74262293e-05, -1.09219036e-04], dtype=float32),
        22050],
       [array([ 3.9245795e-05,  5.5018445e-06, -1.9419534e-03, ...,
       -8.5174946e-08,  7.2827156e-08, -6.0356690e-08], dtype=float32),
        22050],
       [array([-1.4754027e-04,  2.1151219e-04, -2.2895583e-03, ...,
       -8.0466918e-07,  8.3107415e-07, -8.3744078e-07], dtype=float32),
        22050],
       [array([-1.7993708e-04,  3.2073408e-04, -1.9383996e-03, ...,
       -4.3390723e-05, -5.0770428e-05, -5.4644875e-05], dtype=float32),
        22050],
       [array([-2.3793144e-05,  1.4991555e-04, -2.2596093e

In [96]:
IPython.display.Audio(snareSamples[1][0],rate = snareSamples[0][1])

In [97]:
def extract_features(signal, fs):
    # Always Need to give an input for sampling rate so that other higher level features can be extracter
   
    zcr = librosa.feature.zero_crossing_rate(signal); #Zero Crossing Rate
    spec_cent = librosa.feature.spectral_centroid(signal,fs);  #Spectral Centroid
    tontz = librosa.feature.tonnetz(signal,fs) #tonal centroid features
    S = librosa.feature.melspectrogram(signal,fs) # Mel Spectorgram
    mfc = librosa.feature.mfcc(signal,fs,S) #Mel frequency Coefficents
    return [
        zcr,spec_cent,tontz,S,mfc
    ]


In [98]:
# zcr = librosa.feature.zero_crossing_rate(snareSamples[0][0])

In [99]:
kickSamples[0][1]

22050

In [111]:
kickFeat = [extract_features(x[0], x[1]) for x in kickSamples]
snareFeat = [extract_features(x[0], x[1]) for x in snareSamples]

# features output as lists of feature attributes for each sample
#kickFeat[k][0] -> zcr features for kick sample k
#kickFeat[k][1] -> spec centroid for kick sample k
#kickFeat[k][2] -> tonal centroid for kick sample k
#kickFeat[k][3] -> mel spec for kick smaple k
#kickFeat[k][4] -> mfcs for kick sample k
np.shape(snareFeat[0][1])

(1, 87)

In [112]:
# Organize extractions by feature classes rater than samples

class_tags = np.ones(len(kickSamples))
class_tags = np.append(class_tags, np.zeros(len(snareSamples)))

zero_co= np.array([])
cent_freq = np.array([])
tonal_ctroid = np.array([])
mfccs = np.array([])


for s in kickFeat:
    zero_co = np.append(zero_co, [s[0],1])
    cent_freq = np.append(cent_freq, [s[1],1])
    tonal_ctroid = np.append(tonal_ctroid,[s[2],1])
    mfccs = np.append(mfccs, [s[4],1])

for s in snareFeat:
    zero_co = np.append(zero_co, [s[0],0])
    cent_freq = np.append(cent_freq, [s[1],0])
    tonal_ctroid = np.append(tonal_ctroid,[s[2],0])
    mfccs = np.append(mfccs, [s[4],0])
    


In [116]:
# Uncomment to write features to files

# zero_co.tofile("Extracted_Features\zcr_features.txt",sep = ",",format = "%s")
# cent_freq.tofile("Extracted_Features\cfreq_features.txt",sep = ",",format = "%s")
# tonal_ctroid.tofile("Extracted_Features\tonal_features.txt",sep = ",",format = "%s")
# mfccs.tofile("Extracted_Features\mfcc_features.txt",sep = ",",format = "%s")
