## Building NN for speech recogniton and optimize result 

In [1]:
import numpy as np
import os
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from keras.models import Sequential
from sklearn.utils import shuffle
from keras.layers import Dropout
from keras.layers import Dense
from sklearn import preprocessing
%matplotlib inline
import pandas as pd

from utils import *

Using TensorFlow backend.


### Get data

In [2]:
# read data
features_df = pd.read_csv('data.csv')

In [3]:
# get train label and data
all_labels = features_df.values[:,0]
x_data = features_df.values[:,1:]

In [4]:
# prepare categorical target values (e.g [0,0,0,1,0])
target = np.zeros((len(all_labels),41),dtype=int)
for i,_ in enumerate(all_labels):
    target[i][int(_)]=1

## Create train and test dataset

In [5]:
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=10, test_size=0.15, random_state=0)

for train_index, test_index in sss.split(x_data,target):
    X_train, X_test = x_data[train_index], x_data[test_index]
    y_train, y_test = target[train_index], target[test_index]
    

print('Size of training matrix:', X_train.shape)
print('Size of testing matrix:', X_test.shape)

Size of training matrix: (18217, 2808)
Size of testing matrix: (3215, 2808)


#### Normalize values

In [6]:
# normalize train
scaler = preprocessing.StandardScaler().fit(X_train)
X_train=scaler.transform(X_train)

In [7]:
# normalize test
scaler = preprocessing.StandardScaler().fit(X_test)
X_test=scaler.transform(X_test)

In [8]:
#get number of columns in training data
n_cols = x_data.shape[1]

In [9]:
model = Sequential()

#add layers to model
model.add(Dense(200, activation='sigmoid', input_shape=(n_cols,)))
model.add(Dense(200, activation='sigmoid'))
model.add(Dense(41, activation='softmax'))

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200)               561800    
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_3 (Dense)              (None, 41)                8241      
Total params: 610,241
Trainable params: 610,241
Non-trainable params: 0
_________________________________________________________________


In [11]:
# compile model parameters
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
#train model
model.fit(X_train, y_train, epochs=50, batch_size=20,validation_split=0.2)

Train on 14573 samples, validate on 3644 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2479cfbfcf8>

### Evaluate model

In [13]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 98.79%


In [22]:
prob = model.predict_proba(X_test)

In [45]:
'''
how to evaluate evaluate model:
    select the ones with larget prob.
    if prob of ones are euqal for any model then select the model with min others prob
'''

predicted_label = np.argmax(y_test, axis=1)
 
result=np.zeros((y_test.shape[0]), dtype=int)

for i in range(y_test.shape[0]):
    max_array=[]
    max_n=-100
    idx=0
    for j in range(41):
        max_array.append(prob[i][j])
        
        if prob[i][j]>max_n and prob[i][j]>=0.9:
            max_n=prob[i][j]
            idx=j   
       
    #sort max array
    max_array.sort()
    
    # compare result with the actual labels
    if(int(predicted_label[i])==int(idx) and max_array[-1]-max_array[-2]>=0.5):
        result[i]=1
        
        
overall_acc = np.mean(result)*100
print('Overall unseen test accuracy: %.2f percent' % overall_acc)

Overall unseen test accuracy: 98.38 percent


**Accuracy on unseen test data (normal model)**

* Normal Accuracy: 98.79

* Accuracy where (h1>=0.9): 98.38

* Accuracy where (h1>=0.9 and h2-h2>=0.5): 98.38

In [35]:
d=[[3,3,3,5,5,2],[12434,7,4,3,2,6]]
d=np.array(d)
predicted_label = np.argmax(d, axis=1)
predicted_label

array([3, 0], dtype=int64)