In [2]:
import datetime, os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import re
import numpy as np
from scipy.stats import kurtosis, skew
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedShuffleSplit

import pandas as pd

# Set the seed value for experiment reproducibility.
seed = 0
np.random.seed(seed)

In [3]:
DATA_DIR = "./data/SAVEE"
CHECKPOINT_DIR ='./tmp/checkpoint'


TRAIN_SIZE, VAL_SIZE, TEST_SIZE = (300, 100, 80)

SAMPLE_RATE_HZ = 44100

In [4]:
def get_label(file_path):
  parts = re.sub('.+\_|[0-9]+.wav', '', file_path)
  return parts

def get_speaker_name(file_path):
  parts = re.sub('.*[/]+|\_|[a-z]+[0-9]+.wav', '', file_path)
  return parts

In [5]:
filenames = [f'{DATA_DIR}/{p}' for p in os.listdir(DATA_DIR)]
labels_true = list(map(get_label, filenames))
labels = pd.get_dummies(labels_true).to_numpy()
df = pd.DataFrame({'filenames': filenames, 'speaker': map(get_speaker_name, filenames), 'label': labels_true})
df = pd.get_dummies(df['label'], prefix="label").join(df[['filenames', 'speaker', 'label']])
one_hot_lables = [col for col in df if col.startswith('label_')]

In [6]:
import librosa
def get_mfccs_values(filename):
  y, sr = librosa.load(filename)
  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12)
  means, vars, stds, kurtosiss, skewness = [], [], [], [], []
  for mfcc in mfccs:
    means.append(np.mean(mfcc))
    vars.append(np.var(mfcc))
    stds.append(np.std(mfcc))
    kurtosiss.append(kurtosis(mfcc))
    skewness.append(skew(mfcc))
  return means, vars, stds, kurtosiss, skewness

In [7]:
from IPython.display import clear_output

for n, row in df.iterrows():
  means, vars, stds, kurtosiss, skewness = get_mfccs_values(row['filenames'])
  for i in range(1, 13):
    df.loc[n, f'mean_{i}'] = means[i - 1]
    df.loc[n, f'var_{i}'] = vars[i - 1]
    df.loc[n, f'std_{i}'] = stds[i - 1]
    df.loc[n, f'kurtosis_{i}'] = kurtosiss[i - 1]
    df.loc[n, f'skewnes_{i}'] = skewness[i - 1]
  clear_output(wait=True)
  perc = float(n + 1) / df.shape[0] * 100
  print("{:d} / {:d} - {:.2f}% [{:s}]".format(n + 1, df.shape[0], perc, ((int(perc / 100 * 50) * '=') + ">").ljust(50, '.')))
df



Unnamed: 0,label_a,label_d,label_f,label_h,label_n,label_sa,label_su,filenames,speaker,label,...,mean_11,var_11,std_11,kurtosis_11,skewnes_11,mean_12,var_12,std_12,kurtosis_12,skewnes_12
0,1,0,0,0,0,0,0,./data/SAVEE/JK_a13.wav,JK,a,...,-6.646858,165.243103,12.854692,-0.879058,-0.261332,-6.741989,99.565918,9.978272,-0.610047,-0.394239
1,0,0,0,0,1,0,0,./data/SAVEE/JE_n14.wav,JE,n,...,0.138198,144.018234,12.000760,-0.396406,-0.164542,2.160614,148.127335,12.170757,0.419638,-0.885636
2,0,0,0,0,1,0,0,./data/SAVEE/DC_n16.wav,DC,n,...,-2.694603,226.153198,15.038391,-0.935509,-0.053739,0.693370,74.217918,8.614983,-0.368126,-0.638332
3,0,0,0,0,1,0,0,./data/SAVEE/KL_n30.wav,KL,n,...,-7.937403,158.766617,12.600263,-0.267623,-0.699280,-1.072644,167.212433,12.931065,-0.362790,-0.396109
4,0,1,0,0,0,0,0,./data/SAVEE/DC_d12.wav,DC,d,...,1.636026,89.347778,9.452395,-0.130789,-0.435178,-0.288571,64.167778,8.010479,0.069534,-0.802489
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,0,0,1,0,0,0,0,./data/SAVEE/JE_f08.wav,JE,f,...,-1.519831,138.699570,11.777078,-0.269425,-0.661269,-4.461388,209.952225,14.489728,0.018996,-0.599243
476,1,0,0,0,0,0,0,./data/SAVEE/JE_a02.wav,JE,a,...,-0.255791,107.803078,10.382826,-0.198744,-0.291963,-2.482802,78.361244,8.852189,-0.167043,-0.404736
477,0,0,0,0,1,0,0,./data/SAVEE/JK_n24.wav,JK,n,...,-2.984914,237.634888,15.415411,0.033531,-0.059147,-5.123469,136.137314,11.667789,-0.122876,-0.305884
478,0,1,0,0,0,0,0,./data/SAVEE/JE_d07.wav,JE,d,...,5.010161,112.984978,10.629439,1.284389,-0.533607,1.250057,84.182808,9.175119,0.268180,-0.673294


In [8]:
df.drop(['filenames'], axis=1, inplace=True)

In [9]:
df.drop(['label_a', 'label_d', 'label_f', 'label_h', 'label_n', 'label_sa', 'label_su'], axis=1, inplace=True)

In [10]:
df

Unnamed: 0,speaker,label,mean_1,var_1,std_1,kurtosis_1,skewnes_1,mean_2,var_2,std_2,...,mean_11,var_11,std_11,kurtosis_11,skewnes_11,mean_12,var_12,std_12,kurtosis_12,skewnes_12
0,JK,a,-199.594589,16804.917969,129.633789,-0.906781,-0.270596,100.260063,4159.931641,64.497536,...,-6.646858,165.243103,12.854692,-0.879058,-0.261332,-6.741989,99.565918,9.978272,-0.610047,-0.394239
1,JE,n,-453.183502,7893.224609,88.843819,-1.271009,0.077585,110.863014,5183.675781,71.997749,...,0.138198,144.018234,12.000760,-0.396406,-0.164542,2.160614,148.127335,12.170757,0.419638,-0.885636
2,DC,n,-427.463348,7212.339355,84.925491,-0.748990,-0.119880,142.868271,5073.224121,71.226570,...,-2.694603,226.153198,15.038391,-0.935509,-0.053739,0.693370,74.217918,8.614983,-0.368126,-0.638332
3,KL,n,-638.402771,5842.780762,76.438087,-1.232317,-0.070780,132.120758,4284.165527,65.453537,...,-7.937403,158.766617,12.600263,-0.267623,-0.699280,-1.072644,167.212433,12.931065,-0.362790,-0.396109
4,DC,d,-366.166534,10924.393555,104.519821,-1.611043,-0.054946,115.484131,6481.183105,80.505798,...,1.636026,89.347778,9.452395,-0.130789,-0.435178,-0.288571,64.167778,8.010479,0.069534,-0.802489
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,JE,f,-361.477844,13524.562500,116.295151,-1.311135,0.080970,85.935600,4055.468994,63.682564,...,-1.519831,138.699570,11.777078,-0.269425,-0.661269,-4.461388,209.952225,14.489728,0.018996,-0.599243
476,JE,a,-267.097961,16899.519531,129.998154,-1.426013,-0.135080,103.743088,5193.733398,72.067558,...,-0.255791,107.803078,10.382826,-0.198744,-0.291963,-2.482802,78.361244,8.852189,-0.167043,-0.404736
477,JK,n,-334.993347,11030.074219,105.024162,-0.713784,-0.425978,128.489075,4980.812012,70.574867,...,-2.984914,237.634888,15.415411,0.033531,-0.059147,-5.123469,136.137314,11.667789,-0.122876,-0.305884
478,JE,d,-402.019012,11040.783203,105.075134,-1.184002,0.618307,82.270081,4243.263184,65.140335,...,5.010161,112.984978,10.629439,1.284389,-0.533607,1.250057,84.182808,9.175119,0.268180,-0.673294


In [11]:
from sklearn.model_selection import StratifiedShuffleSplit
import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 

X_train, X_val, X_test = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
y_train, y_val, y_test = pd.Series(), pd.Series(), pd.Series()


for speaker in df['speaker'].unique():
  data = df[df['speaker'] == speaker]

  for train_val_index, test_index in StratifiedShuffleSplit(n_splits=10, test_size=20, random_state=seed).split(data.drop(['speaker', 'label'], axis=1), data['label']):
    X_train_val_candidate, X_test_candidate = data.drop(['speaker', 'label'], axis=1).iloc[train_val_index, :], data.drop(['speaker', 'label'], axis=1).iloc[test_index, :]
    y_train_val_candidate, y_test_candidate = data['label'].iloc[train_val_index], data['label'].iloc[test_index]
  
  #  take 20 samples per speaker for val set
  for train_index, val_index in StratifiedShuffleSplit(n_splits=10, test_size=25, random_state=seed).split(X_train_val_candidate, y_train_val_candidate):
    X_train_candidate, X_val_candidate = X_train_val_candidate.iloc[train_index, :], X_train_val_candidate.iloc[val_index, :]
    y_train_candidate, y_val_candidate = y_train_val_candidate.iloc[train_index], y_train_val_candidate.iloc[val_index]



  X_train = X_train.append(X_train_candidate)
  y_train = y_train.append(y_train_candidate)
  X_val = X_val.append(X_val_candidate)
  y_val = y_val.append(y_val_candidate)
  X_test = X_test.append(X_test_candidate)
  y_test = y_test.append(y_test_candidate)

print(f'train: {X_train.shape} , {y_train.shape}')
print(f'val: {X_val.shape} , {y_val.shape}')
print(f'test: {X_test.shape} , {y_test.shape}')

train: (300, 60) , (300,)
val: (100, 60) , (100,)
test: (80, 60) , (80,)


In [12]:

from itertools import product
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC

scalers = [StandardScaler().fit(X_train), MinMaxScaler().fit(X_train)]
cs = np.logspace(1, 3, 5, dtype=np.float32)

results = pd.DataFrame()

# linear
print('kernel: linear')
for scaler, c in product(scalers, cs):
  # print(f'  scaler: {scaler} , c: {c}')
  model = SVC(kernel='linear', C=c)
  model.fit(scaler.transform(X_train), y_train)
  score = model.score(scaler.transform(X_val), y_val)
  results = results.append({'classifier': 'svc', 'kernel': 'linear', 'scaler': scaler, 'c': c, 'score': score }, ignore_index=True)
  # results.append({'classifier': 'svc', 'kernel': 'linear', 'scaler': scaler, 'c': c, 'score': score }, ignore_index=True)
  # print(f'  score: {score}')

# poly
print('kernel: poly')
degrees = range(2, 5)
for scaler, c, degree in product(scalers, cs, degrees):
  # print(f'  scaler: {scaler} , c: {c}, degree: {degree}')
  model = SVC(kernel='poly', C=c, degree=degree)
  model.fit(scaler.transform(X_train), y_train)
  score = model.score(scaler.transform(X_val), y_val)
  results = results.append({'classifier': 'svc', 'kernel': 'poly', 'scaler': scaler, 'c': c, 'degree': degree, 'score': score }, ignore_index=True)
  # results.append({'classifier': 'svc', 'kernel': 'poly', 'scaler': scaler, 'c': c, 'degree': degree, 'score': score }, ignore_index=True)
  # print(f'  score: {score}')

# rbf
print('kernel: rbf')
gammas = np.logspace(-2, 2, 5, dtype=np.float32)
for scaler, c, gamma in product(scalers, cs, gammas):
  # print(f'  scaler: {scaler} , c: {c}, gamma: {gamma}')
  model = SVC(kernel='rbf', C=c, gamma=gamma)
  model.fit(scaler.transform(X_train), y_train)
  score = model.score(scaler.transform(X_val), y_val)
  results = results.append({'classifier': 'svc', 'kernel': 'rbf', 'scaler': scaler, 'c': c, 'gamma': gamma, 'score': score }, ignore_index=True)
  # results.append({'classifier': 'svc', 'kernel': 'rbf', 'scaler': scaler, 'c': c, 'gamma': gamma, 'score': score }, ignore_index=True)
  # print(f'  score: {score}')

cols = list(results.columns.values)
cols.pop(cols.index('score'))
results = results[cols + ['score']] 


kernel: linear
kernel: poly
kernel: rbf


In [13]:
cs = np.logspace(1, 3, 5, dtype=np.float32)

# linear
print('kernel: linear')
for c in cs:
  # print(f'  scaler: none , c: {c}')
  model = SVC(kernel='linear', C=c)
  model.fit(X_train, y_train)
  score = model.score(X_val, y_val)
  results = results.append({'classifier': 'svc', 'kernel': 'linear', 'scaler': 'none', 'c': c, 'score': score }, ignore_index=True)
  # print(f'  score: {score}')

# poly
print('kernel: poly')
degrees = range(2, 5)
for c, degree in product(cs, degrees):
  # print(f'  scaler: none , c: {c}, degree: {degree}')
  model = SVC(kernel='poly', C=c, degree=degree)
  model.fit(X_train, y_train)
  score = model.score(X_val, y_val)
  results = results.append({'classifier': 'svc', 'kernel': 'linear', 'scaler': 'none', 'c': c, 'degree': degree, 'score': score }, ignore_index=True)
  # print(f'  score: {score}')

# rbf
print('kernel: rbf')
gammas = np.logspace(-2, 2, 5, dtype=np.float32)
for c, gamma in product(cs, gammas):
  # print(f'  scaler: none , c: {c}, gamma: {gamma}')
  model = SVC(kernel='rbf', C=c, gamma=gamma)
  model.fit(X_train, y_train)
  score = model.score(X_val, y_val)
  results = results.append({'classifier': 'svc', 'kernel': 'linear', 'scaler': 'none', 'c': c, 'gamma': gamma, 'score': score }, ignore_index=True)
  # print(f'  score: {score}')

cols = list(results.columns.values)
cols.pop(cols.index('score'))
results = results[cols + ['score']] 

kernel: linear
kernel: poly
kernel: rbf


In [19]:
results.sort_values(by=['score'], ascending=False).head(10)

Unnamed: 0,classifier,kernel,scaler,c,degree,gamma,score
36,svc,poly,MinMaxScaler(),316.227753,4.0,,0.7
33,svc,poly,MinMaxScaler(),100.0,4.0,,0.7
39,svc,poly,MinMaxScaler(),1000.0,4.0,,0.7
27,svc,poly,MinMaxScaler(),10.0,4.0,,0.7
30,svc,poly,MinMaxScaler(),31.622776,4.0,,0.7
40,svc,rbf,StandardScaler(),10.0,,0.01,0.67
31,svc,poly,MinMaxScaler(),100.0,2.0,,0.66
81,svc,rbf,MinMaxScaler(),316.227753,,0.1,0.66
34,svc,poly,MinMaxScaler(),316.227753,2.0,,0.66
76,svc,rbf,MinMaxScaler(),100.0,,0.1,0.66
