In [4]:
import glob
import numpy as np
import librosa
import os
import vega
import altair as alt
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

In [15]:
#File reading
all_dirs = []
for root, dirs, files in os.walk('./Dataset'): #change directory here
        for name in files:
            if '.wav' in name:
                filedir = os.path.join(root, name)
                all_dirs.append(filedir)
file_no = len(all_dirs)

#Feature Computation
all_mfcc = []
file_names = []

print(file_no)
for i in range(file_no):
    if (librosa.get_duration(filename=all_dirs[i]) == 15.):
        file_names.append(all_dirs[i])
        #Progress Report
        if (i==file_no-1):
            print('100%')
        elif (i==int(file_no*0.75)):
            print('75%')
        elif (i==int(file_no*0.5)):
            print('50%')
        elif (i==int(file_no*0.25)):
            print('25%')
        #Load file
        y, sr = librosa.core.load(all_dirs[i], duration=15.)
        #Features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        all_mfcc.append(mfcc.flatten())

#Standardization
scl1 = StandardScaler()
X = np.asarray(scl1.fit_transform(all_mfcc))

#Labels
y = []
y_a = 0
y_e = 0
for i in range(len(file_names)):
    if 'ArabMashriq' in file_names[i]:
        y.append(0)
        y_a += 1
    elif 'EisenbergCollection' in file_names[i]:
        y.append(1)
        y_e += 1
print('Arab: ' + str(y_a))
print('Eisenberg: ' + str(y_e))

y = np.asarray(y)

#KFold
cv = KFold(n_splits=6, shuffle=True)

#Classifiers
lasso = Lasso()
svc = SVC(gamma='auto')
mlp =  MLPClassifier()

# lasso_scores = []
# svc_scores = []
# mlp_scores = []
# for train_index, test_index in cv.split(X):
#     print("Train Index: ", train_index, "\n")
#     print("Test Index: ", test_index)

#     X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
#     lasso.fit(X_train, y_train)
#     lasso_scores.append(lasso.score(X_test, y_test))
# #     svc.fit(X_train, y_train)
# #     svc_scores.append(lasso.score(X_test, y_test))
#     mlp.fit(X_train, y_train)
#     mlp_scores.append(lasso.score(X_test, y_test))
    
print(cross_val_score(lasso, X, y, cv=6))
print(cross_val_score(svc, X, y, cv=6))
print(cross_val_score(mlp, X, y, cv=6))


2770
25%
50%
75%
100%
Arab: 2328
Eisenberg: 438
[-19.04347826   0.           0.           0.           0.
   0.        ]
[0.98481562 0.92841649 0.9132321  0.96746204 0.90021692 0.88286334]
[0.98047722 0.90455531 0.87852495 0.96095445 0.90672451 0.89154013]
