In [1]:
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [2]:
X_all = np.loadtxt('voice_merge_features.csv', delimiter=',')
y_all = np.array(np.loadtxt('voice_merge_labels.csv', delimiter=','), dtype=np.int)

In [36]:
from sklearn.preprocessing import normalize, MinMaxScaler
from sklearn.model_selection import train_test_split
import time

In [90]:
X_sub, _, y_sub, _ = train_test_split(
        X_all, y_all, stratify=y_all, train_size=0.2, random_state=round(time.time()))



In [91]:
X_train, X_test, y_train, y_test = train_test_split(
        X_sub, y_sub, stratify=y_sub, train_size=0.8, random_state=round(time.time()))

X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train, stratify=y_train, train_size=0.8, random_state=round(time.time()))



In [92]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
print(X_test.shape)
print(y_test.shape)

(1814, 1280)
(1814,)
(454, 1280)
(454,)
(567, 1280)
(567,)


In [93]:
min_max_scalar = MinMaxScaler()

X_train = min_max_scalar.fit_transform(X_train)
X_val = min_max_scalar.transform(X_val)

In [214]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier, LogisticRegression

In [247]:
# clf = SVC(kernel='rbf', C=5, gamma=0.001, decision_function_shape='ovo')
# clf = RandomForestClassifier(n_estimators=1000, criterion='entropy')
clf = RidgeClassifier(alpha=100.)
clf.fit(X_train, y_train)
print("Training set accuracy: {:.2f}".format(clf.score(X_train, y_train)))
print("Validation set accuracy: {:.2f}".format(clf.score(X_val, y_val)))

Training set accuracy: 0.77
Validation set accuracy: 0.63


In [94]:
from sklearn.decomposition import PCA

In [228]:
pca = PCA(n_components=128)

In [229]:
pca.fit(X_train)

PCA(copy=True, iterated_power='auto', n_components=128, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [230]:
pca.explained_variance_ratio_[:13]

array([ 0.09110854,  0.04636927,  0.0302626 ,  0.02749712,  0.0212544 ,
        0.01792796,  0.01538577,  0.01297454,  0.0122737 ,  0.0114458 ,
        0.01050015,  0.01021754,  0.00976342])

In [232]:
X_train_pca = pca.transform(X_train)
X_val_pca = pca.transform(X_val)

In [316]:
clf = SVC(kernel='rbf', C=10, gamma=0.01, decision_function_shape='ovo')
# clf = RandomForestClassifier(n_estimators=10, criterion='entropy')
# clf = RidgeClassifier(alpha=10.)
clf.fit(X_train_pca, y_train)
print("Training set accuracy: {:.2f}".format(clf.score(X_train_pca, y_train)))
print("Validation set accuracy: {:.2f}".format(clf.score(X_val_pca, y_val)))

Training set accuracy: 0.84
Validation set accuracy: 0.66
