In [2]:
import glob
import numpy as np
import librosa
import os
import vega
import altair as alt
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

In [10]:
#File reading
all_dirs = []
for root, dirs, files in os.walk('./Dataset'): #change directory here
        for name in files:
            if '.wav' in name:
                filedir = os.path.join(root, name)
                all_dirs.append(filedir)
file_no = len(all_dirs)

#Feature Computation
all_mfcc = []
file_names = []

print(file_no)
for i in range(file_no):
    if (librosa.get_duration(filename=all_dirs[i]) == 15.):
        file_names.append(all_dirs[i])
        #Progress Report
        if (i==file_no-1):
            print('100%')
        elif (i==int(file_no*0.75)):
            print('75%')
        elif (i==int(file_no*0.5)):
            print('50%')
        elif (i==int(file_no*0.25)):
            print('25%')
        #Load file
        y, sr = librosa.core.load(all_dirs[i], duration=15.)
        #Features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        all_mfcc.append(mfcc.flatten())

#Standardization
scl1 = StandardScaler()
X = np.asarray(scl1.fit_transform(all_mfcc))

#Labels
y = []
y_a = 0
y_e = 0
for i in range(len(file_names)):
    if 'ArabMashriq' in file_names[i]:
        y.append(0)
        y_a += 1
    elif 'EisenbergCollection' in file_names[i]:
        y.append(1)
        y_e += 1
print('Arab: ' + str(y_a))
print('Eisenberg: ' + str(y_e))

y = np.asarray(y)

#KFold
cv = KFold(n_splits=6, shuffle=True)

#Classifiers
svc = SVC(gamma='auto')
mlp =  MLPClassifier()

svc_scores = []
mlp_scores = []
for train_index, test_index in cv.split(X):
    print("Train Index: ", train_index, "\n")
    print("Test Index: ", test_index)

    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    svc.fit(X_train, y_train)
    svc_scores.append(svc.score(X_test, y_test))
    mlp.fit(X_train, y_train)
    mlp_scores.append(mlp.score(X_test, y_test))

print(svc_scores)
print(mlp_scores)

2770
25%
50%
75%
100%
Arab: 2328
Eisenberg: 438
Train Index:  [   0    1    2 ... 2763 2764 2765] 

Test Index:  [   3    5   27   30   33   35   38   56   59   64   68   71  100  111
  139  156  159  168  170  172  174  181  182  183  185  189  196  198
  206  209  210  211  217  231  232  242  247  250  252  256  258  260
  274  282  287  290  294  298  303  316  322  327  331  332  335  339
  343  346  347  350  354  358  363  372  375  381  391  393  421  426
  431  439  444  453  454  462  463  471  492  496  519  533  535  537
  540  549  550  553  564  572  573  582  587  588  589  593  599  604
  605  607  618  633  664  676  679  681  684  687  691  698  700  704
  711  717  718  719  723  726  727  738  741  744  753  759  764  768
  772  776  779  784  786  802  806  807  810  819  824  828  830  841
  844  851  862  865  871  875  878  880  884  900  910  912  913  920
  921  922  928  931  934  950  951  954  956  962  963  972  977 1006
 1007 1009 1020 1023 1024 1025 1028