In [17]:
import librosa as lb
import os
import numpy as np
import sklearn as sk
import sklearn.ensemble

from glob import glob
from librosa import load
from librosa.feature import mfcc

# SVM Classifier

## Define constants

In [3]:
DEFAULT_BITRATE = 22050
DESIRED_DURATION = 25
PADDED_LENGTH = DEFAULT_BITRATE * DESIRED_DURATION

## Helper functions

In [4]:
def load_audio(folder):
    # load original mp3s
    loaded_mp3_files = [load(f) for f in glob(folder + '*.mp3')[:40]]
    
    # make same length
    fix_length_mp3 = normalize_audio(loaded_mp3_files)
    
    mfccs = [mfcc(y=y, sr=DEFAULT_BITRATE, n_mfcc=40) for y in fix_length_mp3]
    return mfccs

In [5]:
def normalize_audio(source_files):
    fixed_lengths = [lb.util.fix_length(y, PADDED_LENGTH) for y, _ in source_files]
    return [(f - np.mean(f)) / np.std(f) for f in fixed_lengths]

In [6]:
def flatten_input(input):
    return np.array([x.reshape(x.shape[0] * x.shape[1]) for x in input])

## Load data

In [7]:
us_X = load_audio('../data/speech-accent-archive/recordings/usa/')
uk_X = load_audio('../data/speech-accent-archive/recordings/uk/')
hk_X = load_audio('../data/speech-accent-archive/recordings/hongkong/')
cn_X = load_audio('../data/speech-accent-archive/recordings/china/')
ger_X = load_audio('../data/speech-accent-archive/recordings/germany/')

## Flatten MFCC data into vectors

In [8]:
us_X[0].shape
us_X = flatten_input(us_X)
uk_X = flatten_input(uk_X)
hk_X = flatten_input(hk_X)
cn_X = flatten_input(cn_X)
ger_X = flatten_input(ger_X)

In [9]:
X = np.concatenate([us_X, uk_X, hk_X, cn_X, ger_X])
Y = np.zeros(len(us_X) + len(uk_X) + len(hk_X) + len(cn_X) + len(ger_X))
Y[21:40] = 1
Y[41:60] = 2
Y[61:80] = 3
Y[81:100] = 4

## Train / Test split

In [10]:
X_train, X_test, y_train, y_test = sk.model_selection.train_test_split(X, Y, test_size=0.3)

In [11]:
C_params = [0.1, 10, 25, 50, 100, 200, 500]
accuracies = [0, 0, 0, 0, 0, 0, 0]

## Train SVM

In [12]:
for idx, C_param in enumerate(C_params):
    svm_rbf = sk.svm.SVC(C=C_param, kernel='rbf', probability=True)
    svm_rbf.fit(X_train, y_train)
    y_test_predict = svm_rbf.predict(X_test)
    accuracies[idx] = sk.metrics.accuracy_score(y_true=y_test, y_pred=y_test_predict)

In [13]:
print accuracies

[0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788]


# Train Gradient Boosting

In [24]:
accuracies = [0, 0, 0, 0, 0, 0]
C_params = C_params[1:]

In [25]:
for idx, C_param in enumerate(C_params):
    rf = sk.ensemble.GradientBoostingClassifier(n_estimators=C_param)
    rf.fit(X_train, y_train)
    y_test_predict = rf.predict(X_test)
    accuracies[idx] = sk.metrics.accuracy_score(y_true=y_test, y_pred=y_test_predict)

In [26]:
print accuracies

[0.45283018867924529, 0.54716981132075471, 0.52830188679245282, 0.54716981132075471, 0.56603773584905659, 0.62264150943396224]


# Train AdaBoost

In [28]:
accuracies = [0, 0, 0, 0, 0, 0, 0]
for idx, C_param in enumerate(C_params):
    rf = sk.ensemble.AdaBoostClassifier(n_estimators=C_param)
    rf.fit(X_train, y_train)
    y_test_predict = rf.predict(X_test)
    accuracies[idx] = sk.metrics.accuracy_score(y_true=y_test, y_pred=y_test_predict)
print accuracies

[0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788, 0]


# Train Random Forest

In [32]:
accuracies = [0, 0, 0, 0, 0, 0]
for idx, C_param in enumerate(C_params):
    rf = sk.ensemble.RandomForestClassifier(n_estimators=C_param)
    rf.fit(X_train, y_train)
    y_test_predict = rf.predict(X_test)
    accuracies[idx] = sk.metrics.accuracy_score(y_true=y_test, y_pred=y_test_predict)
print accuracies

[0.64150943396226412, 0.660377358490566, 0.660377358490566, 0.67924528301886788, 0.67924528301886788, 0.67924528301886788]
