In [1]:
import glob
import numpy as np
import librosa
import os
import vega
import altair as alt
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

In [5]:
#File reading
all_dirs = []
for root, dirs, files in os.walk('./Dataset'): #change directory here
        for name in files:
            if '.wav' in name:
                filedir = os.path.join(root, name)
                all_dirs.append(filedir)
file_no = len(all_dirs)

#Feature Computation
all_mel = []
file_names = []

print(file_no)
for i in range(file_no):
    if (librosa.get_duration(filename=all_dirs[i]) == 15.):
        file_names.append(all_dirs[i])
        #Progress Report
        if (i==file_no-1):
            print('100%')
        elif (i==int(file_no*0.75)):
            print('75%')
        elif (i==int(file_no*0.5)):
            print('50%')
        elif (i==int(file_no*0.25)):
            print('25%')
        #Load file
        y, sr = librosa.core.load(all_dirs[i], duration=15.)
        #Features
        mel = librosa.feature.melspectrogram(y=y, sr=sr)
        all_mel.append(mel.flatten())

#Standardization
scl1 = StandardScaler()
X = np.asarray(scl1.fit_transform(all_mel))

#Labels
y = []
y_a = 0
y_e = 0
for i in range(len(file_names)):
    if 'ArabMashriq' in file_names[i]:
        y.append(0)
        y_a += 1
    elif 'EisenbergCollection' in file_names[i]:
        y.append(1)
        y_e += 1
print('Arab: ' + str(y_a))
print('Eisenberg: ' + str(y_e))

y = np.asarray(y)

#KFold
cv = KFold(n_splits=6, shuffle=True)

#Classifiers
svc = SVC(gamma='auto')
mlp =  MLPClassifier()

svc_scores = []
mlp_scores = []
for train_index, test_index in cv.split(X):
    print("Train Index: ", train_index, "\n")
    print("Test Index: ", test_index)

    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    svc.fit(X_train, y_train)
    svc_scores.append(svc.score(X_test, y_test))
    mlp.fit(X_train, y_train)
    mlp_scores.append(mlp.score(X_test, y_test))

print(svc_scores)
print(mlp_scores)

2770
25%
50%
75%
100%
Arab: 2328
Eisenberg: 438
Train Index:  [   1    2    3 ... 2763 2764 2765] 

Test Index:  [   0   10   14   21   26   29   31   39   43   46   47   51   55   61
   65   75   80   86   88   97   98  132  144  152  156  157  166  169
  174  175  176  178  185  188  200  206  210  211  212  215  216  229
  230  233  234  237  241  243  246  248  250  251  257  259  260  272
  274  284  289  296  298  310  320  329  331  336  341  342  348  349
  373  384  386  406  409  412  414  437  447  452  453  457  459  469
  472  475  486  489  500  505  514  515  522  524  526  533  544  546
  547  553  560  561  565  582  585  597  599  609  618  620  623  629
  630  638  649  656  664  686  690  693  700  701  702  708  714  717
  718  729  740  741  742  745  753  755  767  771  783  794  797  801
  804  809  812  814  818  826  829  831  832  838  850  852  857  858
  862  869  872  873  875  883  887  888  891  897  904  907  910  913
  918  922  935  943  945  947  952