# ecapatdnn Features

Related methods.

> Schneider, S., Baevski, A., Collobert, R., & Auli, M. (2019). ecapatdnn: Unsupervised Pre-Training for Speech Recognition. Proc. Interspeech 2019, 3465-3469.

In [6]:
import pandas as pd

df_ecapatdnn = pd.read_csv('ecapatdnn_features.csv').sort_values(by='sound_filepath')
df_ecapatdnn

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,184,185,186,187,188,189,190,191,sound_filepath,label
0,1.775928,33.939613,-1.255207,20.727728,-24.638384,11.650988,-11.616547,-8.657018,-16.473078,-18.036320,...,21.401560,-14.570674,-10.431826,-21.781740,-12.113140,-27.092291,15.228859,-19.804628,../../data/train/bfamcv01_segment163_neutral.wav,neutral
1,19.241816,13.259179,9.128108,-5.033468,-22.560678,5.658721,17.572769,-12.740870,-1.310747,8.753231,...,10.904234,-1.946110,13.530838,-9.567898,-21.670910,-42.901207,28.016594,3.097153,../../data/train/bfamcv01_segment168_non-neutr...,non-neutral-male
2,-22.071964,29.279020,-31.540411,4.363389,-11.664008,11.250226,3.126503,-9.531599,17.802624,-12.044325,...,29.976244,-2.596224,-2.955117,15.218815,29.883274,1.182688,-13.540177,10.954507,../../data/train/bfamcv01_segment170_non-neutr...,non-neutral-male
3,13.510550,19.781698,-21.485544,6.591959,-1.028654,30.273680,21.171167,-18.135620,-2.706141,-2.042898,...,12.084364,-16.065744,-9.216212,-17.524847,9.124699,-10.265999,15.116346,-22.886625,../../data/train/bfamcv01_segment173_neutral.wav,neutral
4,8.291192,18.211643,-1.159041,41.646988,-22.960949,14.939216,-11.423697,-5.032195,-8.175071,-4.690393,...,2.687372,-20.794718,-1.655759,-10.727356,-32.339008,-13.840977,18.429293,-8.271866,../../data/train/bfamcv01_segment177_neutral.wav,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,5.344962,1.517670,-7.755565,-5.839283,28.485147,-20.233873,12.346156,17.666477,-5.593419,32.408909,...,4.545439,19.579426,-17.956511,-12.893394,12.547358,-36.556561,30.745852,5.659055,../../data/train/bpubmn14_segment87_neutral.wav,neutral
621,-4.140793,8.729709,-17.619707,-15.676598,17.560957,-20.921505,20.434975,19.679485,-9.771207,9.943653,...,1.737341,23.627665,-10.329123,-9.265481,5.112249,-25.908445,26.856272,3.520078,../../data/train/bpubmn14_segment89_neutral.wav,neutral
622,-6.924351,20.706198,-20.963850,-6.675403,40.434940,-35.556854,7.372674,18.567125,-25.480097,0.682347,...,13.318611,21.930140,21.847281,-9.193823,25.662962,-14.353584,12.686361,2.389238,../../data/train/bpubmn14_segment92_neutral.wav,neutral
623,13.067240,11.781367,-11.009836,-10.825350,18.395868,-29.970449,13.194541,16.161209,-23.027617,18.811863,...,-13.974383,25.025301,-16.120775,-10.001205,6.724483,-24.922993,28.144653,-7.678248,../../data/train/bpubmn14_segment95_neutral.wav,neutral


In [7]:
import numpy as np
y = df_ecapatdnn.label.to_list()
del df_ecapatdnn['label']
del df_ecapatdnn['sound_filepath']
X = np.array(df_ecapatdnn)
X.shape

(625, 192)

# Model Selection

In [8]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.dummy import DummyClassifier

import numpy as np


classifiers = [MLPClassifier(activation='logistic',random_state=1,max_iter=3000),
               MLPClassifier(activation='tanh',random_state=1,max_iter=3000),
               MLPClassifier(activation='relu',random_state=1,max_iter=3000),
               DummyClassifier(strategy="most_frequent",random_state=1),
               DummyClassifier(strategy="stratified",random_state=1),
               DummyClassifier(strategy="uniform",random_state=1)]

In [9]:
for classifier in classifiers:
    print('Running ',classifier)
    kf=StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    scores=cross_val_score(classifier, X, y, cv=kf, scoring='f1_macro')
    print(scores,'f1_macro=',np.mean(scores))
    print('-------')

Running  MLPClassifier(activation='logistic', max_iter=3000, random_state=1)
[0.47301587 0.50915751 0.4497169  0.40656737 0.49752085] f1_macro= 0.46719569873986116
-------
Running  MLPClassifier(activation='tanh', max_iter=3000, random_state=1)
[0.43893637 0.33699634 0.38821311 0.42867885 0.3771322 ] f1_macro= 0.39399137248575655
-------
Running  MLPClassifier(max_iter=3000, random_state=1)
[0.43826978 0.44517254 0.51374831 0.44315334 0.40139788] f1_macro= 0.4483483697077789
-------
Running  DummyClassifier(random_state=1, strategy='most_frequent')
[0.29464286 0.29297459 0.29297459 0.29297459 0.29297459] f1_macro= 0.293308242579543
-------
Running  DummyClassifier(random_state=1, strategy='stratified')
[0.25215889 0.3655754  0.25545635 0.31498016 0.29563492] f1_macro= 0.2967611440085533
-------
Running  DummyClassifier(random_state=1, strategy='uniform')
[0.33344038 0.29380901 0.22442136 0.28958045 0.31497303] f1_macro= 0.29124484573188997
-------


# Model Training

In [11]:
clf = MLPClassifier(activation='logistic',random_state=1,max_iter=3000)
clf.fit(X,y)

MLPClassifier(activation='logistic', max_iter=3000, random_state=1)

In [14]:
clf.score(X,y)

1.0

In [34]:
import pickle

with open('model.ecapatdnn.pkl', 'wb') as fid_model:
    pickle.dump(clf,fid_model)
