## Building NN for speech recogniton and optimize result 

In [2]:
import numpy as np
import os
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from keras.models import Sequential
from sklearn.utils import shuffle
from keras.layers import Dropout
from keras.layers import Dense
from sklearn import preprocessing
%matplotlib inline
import pandas as pd

from utils import *

### Get data

In [3]:
# read data
features_df = pd.read_csv('datagirl.csv')

In [4]:
# get train label and data
all_labels = features_df.values[:,0]
x_data = features_df.values[:,1:]

In [5]:
# prepare categorical target values (e.g [0,0,0,1,0])
target = np.zeros((len(all_labels),41),dtype=int)
for i,_ in enumerate(all_labels):
    target[i][int(_)]=1

In [39]:
# read ADA data
features_df_ada = pd.read_csv('data41ADA.csv')

In [40]:
# get train label and data for ADA
all_labels_ada = features_df_ada.values[:,0]
x_data_ada = features_df_ada.values[:,1:]

In [41]:
# prepare categorical target values (e.g [0,0,0,1,0])
target_ada = np.zeros((len(all_labels_ada),41),dtype=int)
for i,_ in enumerate(all_labels_ada):
    target_ada[i][int(_)]=1

In [42]:
x_data_ada, target_ada = shuffle(x_data_ada, target_ada, random_state=0)

In [44]:
x_data = np.vstack((x_data, x_data_ada[0:4500]))
target = np.vstack((target, target_ada[0:4500]))

## Create train and test dataset

In [6]:
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=10, test_size=0.15, random_state=0)

for train_index, test_index in sss.split(x_data,target):
    X_train, X_test = x_data[train_index], x_data[test_index]
    y_train, y_test = target[train_index], target[test_index]
    

print('Size of training matrix:', X_train.shape)
print('Size of testing matrix:', X_test.shape)

Size of training matrix: (6040, 2808)
Size of testing matrix: (1067, 2808)


#### Normalize values

In [7]:
# normalize train
scaler = preprocessing.StandardScaler().fit(X_train)
X_train=scaler.transform(X_train)

In [8]:
# normalize test
scaler = preprocessing.StandardScaler().fit(X_test)
X_test=scaler.transform(X_test)

In [9]:
#get number of columns in training data
n_cols = x_data.shape[1]

In [10]:
model = Sequential()

#add layers to model
model.add(Dense(200, activation='sigmoid', input_shape=(n_cols,)))
model.add(Dense(200, activation='sigmoid'))
model.add(Dense(41, activation='softmax'))

In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200)               561800    
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_3 (Dense)              (None, 41)                8241      
Total params: 610,241
Trainable params: 610,241
Non-trainable params: 0
_________________________________________________________________


In [12]:
# compile model parameters
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [24]:
#train model
model.fit(X_train, y_train, epochs=20, batch_size=20,validation_split=0.2)

Train on 4832 samples, validate on 1208 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2cb0a635208>

### Evaluate model

In [25]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 97.38%


In [26]:
def calculate_acc(X_test,y_test,h1,h2):
    '''
    how to evaluate evaluate model:
        select the ones with larget prob.
        if prob of ones are euqal for any model then select the model with min others prob
    '''
    prob = model.predict_proba(X_test)
    predicted_label = np.argmax(y_test, axis=1)
    result=np.zeros((y_test.shape[0]), dtype=int)

    for i in range(y_test.shape[0]):
        max_array=[]
        max_n=-100
        idx=0
        for j in range(41):
            max_array.append(prob[i][j])

            if prob[i][j]>max_n and prob[i][j]>=h1:
                max_n=prob[i][j]
                idx=j   

        #sort max array
        max_array.sort()

        # compare result with the actual labels
        if(int(predicted_label[i])==int(idx) and max_array[-1]-max_array[-2]>=h2):
            result[i]=1
            
    return result

In [34]:
overall_acc = np.mean(calculate_acc(X_test,y_test,h1=.9,h2=.0))*100
print('Overall unseen test accuracy: %.2f percent' % overall_acc)

Overall unseen test accuracy: 96.06 percent


**Accuracy on unseen test data (normal model)**

* Normal Accuracy: 98.79

* Accuracy where (h1>=0.9): 98.38

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 98.38

**Accuracy on unseen test data (normal+ada model)**

* Normal Accuracy: 98.61

* Accuracy where (h1>=0.9): 97.84

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 97.81

**Accuracy on unseen test data (ada model girls)**

* Normal Accuracy: 97.38

* Accuracy where (h1>=0.9): 96.06

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 96.06

### Get ADA Data

In [28]:
# read data
features_df_ada = pd.read_csv('databoy.csv')

In [29]:
# get train label and data
all_labels_ada = features_df_ada.values[:,0]
x_data_ada = features_df_ada.values[:,1:]

In [30]:
# prepare categorical target values (e.g [0,0,0,1,0])
target_ada = np.zeros((len(all_labels_ada),41),dtype=int)
for i,_ in enumerate(all_labels_ada):
    target_ada[i][int(_)]=1

In [31]:
# normalize train
# x_data_ada=x_data_ada[4500:]
# target_ada=target_ada[4500:]

x_data_ada, target_ada = shuffle(x_data_ada, target_ada, random_state=0)

scaler = preprocessing.StandardScaler().fit(x_data_ada)
x_data_ada=scaler.transform(x_data_ada)

In [32]:
scores = model.evaluate(x_data_ada, target_ada, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 76.22%


In [36]:
overall_acc = np.mean(calculate_acc(x_data_ada,target_ada,h1=.9,h2=.5))*100
print('Overall unseen test accuracy: %.2f percent' % overall_acc)

Overall unseen test accuracy: 69.21 percent


**Accuracy on unseen test ada data (normal model)**

* Normal Accuracy: 83.1

* Accuracy where (h1>=0.9): 76.63

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 76.61

**Accuracy on unseen test ada data (normal + ada model)**

* Normal Accuracy: 95.80

* Accuracy where (h1>=0.9): 93.41

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 93.38

**Accuracy on unseen test ada boys data (ada girl model)**

* Normal Accuracy: 76.22

* Accuracy where (h1>=0.9): 69.26

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 69.21