# Classification of Eisenberg and Arab Mashriq Collections

Read audio files, compute audio features, and classify.



## Importing

In [None]:
import glob
import os
import numpy as np
import librosa
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

## Reading

In [None]:
# Read audio file paths
all_dirs = []
for root, dirs, files in os.walk('/Volumes/Extreme SSD/Collections_classification'): #change directory here
        for name in files:
            if '.wav' in name:
                filedir = os.path.join(root, name)
                if (librosa.get_duration(filename=filedir) == 15.):
                    all_dirs.append(filedir)
                    sys.stdout.write("\rLoading %i recordings." % (len(all_dirs)))
                    sys.stdout.flush()
print()
file_no = len(all_dirs)

# Get Labels
col = []
col_a = 0
col_e = 0
for i in range(file_no):
    if 'ArabMashriq' in all_dirs[i]:
        col.append('ArabMashriq')
        col_a += 1
    elif 'EisenbergCollection' in all_dirs[i]:
        col.append('EisenbergCollection')
        col_e += 1
col = np.asarray(col)
print('Arab Mashriq:', col_a, 'recordings.')
print('Eisenberg:', col_e, 'recordings.')

## Computing features

In [None]:
all_mfcc = []
all_mel = []

for f in range(file_no):

    #Load file
    y, sr = librosa.core.load(all_dirs[f], duration=15.)

    #Features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    all_mfcc.append(mfcc.flatten())
    all_mel.append(mel.flatten())

    sys.stdout.write("\rComputed for %i/%i recordings." % (f+1,len(all_dirs)))
    sys.stdout.flush()
print()

#Standardization
scl_mfcc = StandardScaler()
X_mfcc = np.asarray(scl_mfcc.fit_transform(all_mfcc))
scl_mel = StandardScaler()
X_mel = np.asarray(scl_mel.fit_transform(all_mel))
print('Completed standardization.')

## Classification

In [None]:
#KFold
cv = KFold(n_splits=6, shuffle=True)

#Classifiers
svc_mfcc = SVC(gamma='auto')
mlp_mfcc =  MLPClassifier()
svc_mel = SVC(gamma='auto')
mlp_mel =  MLPClassifier()

svc_scores_mfcc = []
mlp_scores_mfcc = []
svc_scores_mel = []
mlp_scores_mel = []

# classification using MFCC
for train_index, test_index in cv.split(X_mfcc):
    
    X_train, X_test, y_train, y_test = X_mfcc[train_index], X_mfcc[test_index], col[train_index], col[test_index]
    svc_mfcc.fit(X_train, y_train)
    svc_scores_mfcc.append(svc_mfcc.score(X_test, y_test))
    mlp_mfcc.fit(X_train, y_train)
    mlp_scores_mfcc.append(mlp_mfcc.score(X_test, y_test))
print('Classification for MFCC completed.')

# classification using Mel Spectrogram
for train_index, test_index in cv.split(X_mel):

    X_train, X_test, y_train, y_test = X_mel[train_index], X_mel[test_index], col[train_index], col[test_index]
    svc_mel.fit(X_train, y_train)
    svc_scores_mel.append(svc_mel.score(X_test, y_test))
    mlp_mel.fit(X_train, y_train)
    mlp_scores_mel.append(mlp_mel.score(X_test, y_test))
print('Classification for Mel Spectrogram completed.')

print('SVC MFCC Scores:', svc_scores_mfcc)
print('MLP MFCC Scores:', mlp_scores_mfcc)
print('SVC Mel Scores:', svc_scores_mel)
print('MLP Mel Scores:', mlp_scores_mel)

print('SVC MFCC Cross Validation Scores:', cross_val_score(svc_mfcc, X_mfcc, col, cv=6, verbose=0))
print('MLP MFCC Cross Validation Scores:', cross_val_score(mlp_mfcc, X_mfcc, col, cv=6, verbose=0))
print('SVC Mel Cross Validation Scores:', cross_val_score(svc_mel, X_mel, col, cv=6, verbose=0))
print('MLP Mel Cross Validation Scores:', cross_val_score(mlp_mel, X_mel, col, cv=6, verbose=0))