### Importing Libraries

In [1]:
import numpy as np
import pandas as pd

import os
import librosa

from scipy.stats import skew
from tqdm import tqdm

tqdm.pandas()

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

### Loading in the data

In [2]:
audio_train_files = os.listdir('audio_train')
audio_test_files = os.listdir('audio_test')


train = pd.read_csv('train_full.csv')
submission = pd.read_csv('results_template.csv')

### Defining the necessary functions

In [3]:
# functions taken from https://www.kaggle.com/amlanpraharaj/random-forest-using-mfcc-features
SAMPLE_RATE = 44100

def clean_filename(fname, string):   
    file_name = fname.split('/')[1]
    if file_name[:2] == '__':        
        file_name = string + file_name
    return file_name

# Generate mfcc features with mean and standard deviation
def get_mfcc(name, path):
    data, _ = librosa.core.load(path + name, sr = SAMPLE_RATE)
    assert _ == SAMPLE_RATE
    try:
        ft1 = librosa.feature.mfcc(data, sr = SAMPLE_RATE, n_mfcc=30)
        ft2 = librosa.feature.zero_crossing_rate(data)[0]
        ft3 = librosa.feature.spectral_rolloff(data)[0]
        ft4 = librosa.feature.spectral_centroid(data)[0]
        ft5 = librosa.feature.spectral_contrast(data)[0]
        ft6 = librosa.feature.spectral_bandwidth(data)[0]
        ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.median(ft1, axis = 1), np.min(ft1, axis = 1)))
        ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.median(ft2), np.min(ft2)))
        ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.median(ft3), np.min(ft3)))
        ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.median(ft4), np.min(ft4)))
        ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.median(ft5), np.min(ft5)))
        ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.median(ft6), np.max(ft6)))
        return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc)))
    except:
        print('bad file')
        return pd.Series([0]*210)

In [4]:
def convert_to_labels(preds, i2c, k=3):
    ans = []
    ids = []
    for p in preds:
        idx = np.argsort(p)[::-1]
        ids.append([i for i in idx[:k]])
        ans.append(' '.join([i2c[i] for i in idx[:k]]))

    return ans, ids

### Training 

In [5]:
train_data = pd.DataFrame()
train_data['fname'] = train['fname']
test_data = pd.DataFrame()
test_data['fname'] = audio_test_files

train_data = train_data['fname'].progress_apply(get_mfcc, path='audio_train/')
print('done loading train mfcc')
test_data = test_data['fname'].progress_apply(get_mfcc, path='audio_test/')
print('done loading test mfcc')

train_data['fname'] = train['fname']
test_data['fname'] = audio_test_files

train_data['label'] = train['label']
test_data['label'] = np.zeros((len(audio_test_files)))

100%|██████████| 9473/9473 [19:53<00:00,  7.94it/s]


done loading train mfcc


  4%|▍         | 391/9400 [00:39<12:10, 12.33it/s]

bad file


 43%|████▎     | 4013/9400 [06:54<06:21, 14.11it/s]

bad file


 70%|███████   | 6606/9400 [11:28<03:13, 14.43it/s]

bad file


100%|██████████| 9400/9400 [16:31<00:00,  9.48it/s]

done loading test mfcc





In [6]:
# Functions from Random Foresth using MFCC ttps://www.kaggle.com/amlanpraharaj/random-forest-using-mfcc-features
# Construct features set
X = train_data.drop(['label', 'fname'], axis=1)
feature_names = list(X.columns)
X = X.values
labels = np.sort(np.unique(train_data.label.values))
num_class = len(labels)
c2i = {}
i2c = {}
for i, c in enumerate(labels):
    c2i[c] = i
    i2c[i] = c
y = np.array([c2i[x] for x in train_data.label.values])

In [7]:
X_test = test_data.drop(['label', 'fname'], axis=1)
X_test = X_test.values

In [8]:
print(X.shape)
print(X_test.shape)

(9473, 210)
(9400, 210)


In [9]:
# Apply scaling for PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Apply PCA for dimension reduction
pca = PCA(n_components=65).fit(X_scaled)
X_pca = pca.transform(X_scaled)
X_test_pca = pca.transform(X_test_scaled)

print(sum(pca.explained_variance_ratio_)) 

0.9144277526966157


In [11]:
# Fit an SVM model
X_train, X_val, y_train, y_val = train_test_split(X_pca, y, test_size = 0.2, random_state = 42, shuffle = True)

clf = SVC(kernel = 'rbf', probability=True)

clf.fit(X_train, y_train)

print(accuracy_score(clf.predict(X_val), y_val))

0.6722955145118733


In [12]:
# # Define the paramter grid for C from 0.001 to 10, gamma from 0.001 to 10
# C_grid = [0.001, 0.01, 0.1, 1, 10]
# gamma_grid = [0.001, 0.01, 0.1, 1, 10]
# param_grid = {'C': C_grid, 'gamma' : gamma_grid}
#
# grid = GridSearchCV(SVC(kernel='rbf'), param_grid, cv = 3, scoring = "accuracy")
# grid.fit(X_train, y_train)
#
# # Find the best model
# print(grid.best_score_)
#
# print(grid.best_params_)
#
# print(grid.best_estimator_)

### Accuracy

In [13]:
# Optimal model
clf = SVC(kernel = 'rbf', C = 4, gamma = 0.01, probability=True)

clf.fit(X_train, y_train)

print(accuracy_score(clf.predict(X_val), y_val))

0.7182058047493404


### Generating output file

In [14]:
# Fit the entire training sets
clf.fit(X_pca, y)
str_preds, _ = convert_to_labels(clf.predict_proba(X_test_pca), i2c, k=3)

# Write to outputs
subm = pd.DataFrame()
subm['fname'] = audio_test_files
subm['label'] = str_preds
subm.to_csv('results.csv', index=False)

In [15]:
output_file = pd.read_csv('results.csv')
output_file

Unnamed: 0,fname,label
0,00063640.wav,Scissors Shatter Finger_snapping
1,0013a1db.wav,Flute Violin_or_fiddle Electric_piano
2,002bb878.wav,Bass_drum Computer_keyboard Knock
3,002d392d.wav,Bass_drum Double_bass Knock
4,00326aa9.wav,Oboe Saxophone Electric_piano
...,...,...
9395,ffe76321.wav,Bass_drum Snare_drum Gunshot_or_gunfire
9396,ffe8d42d.wav,Snare_drum Writing Bass_drum
9397,ffeba2cd.wav,Cello Violin_or_fiddle Harmonica
9398,fff6d073.wav,Bass_drum Flute Electric_piano
