## Building NN for speech recogniton and optimize result 

In [2]:
import numpy as np
import os
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
%matplotlib inline
import pandas as pd

In [37]:
# our utils functions
from src.utils import *

# our classes
from classes.PreprocessData import *
from classes.EvaluateModel import *

In [5]:
folder_name="NNs/mix"
model_name="mix"

### Get data

In [6]:
# read data
train_df = pd.read_csv('data/mix/data41mix_train.csv')
test_df = pd.read_csv('data/mix/data41mix_test.csv')

### PREPROCESS DATA

In [7]:
# initialize preprocess class
preprocess = PreprocessData()

In [8]:
# split data, normalize, shuffle
X_train, y_train = preprocess.preprocess_data(train_df)
X_test, y_test = preprocess.preprocess_data(test_df)

In [9]:
print('Size of training matrix:', X_train.shape)
print('Size of testing matrix:', X_test.shape)

Size of training matrix: (26027, 2808)
Size of testing matrix: (4593, 2808)


In [10]:
unique_words = set(y_train)
print(unique_words)

{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0}


In [11]:
class_size = len(set(y_train))

In [12]:
y_train = preprocess.categorize_y(y_train, class_size)
y_test = preprocess.categorize_y(y_test, class_size)

#### BUILD MODEL

In [13]:
#get number of columns in training data
n_cols = X_train.shape[1]

In [14]:
model = Sequential()

#add layers to model
model.add(Dense(200, activation='sigmoid', input_shape=(n_cols,)))
model.add(Dense(200, activation='sigmoid'))
model.add(Dense(41, activation='softmax'))

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200)               561800    
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_3 (Dense)              (None, 41)                8241      
Total params: 610,241
Trainable params: 610,241
Non-trainable params: 0
_________________________________________________________________


In [16]:
# compile model parameters
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [17]:
#train model
model.fit(X_train, y_train, epochs=50, batch_size=20,validation_split=0.2)

Train on 20821 samples, validate on 5206 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2126ccbeda0>

#### SAVE MODEL

In [18]:
# Creates a HDF5 file 'my_model.h5'
model.save('models/{path}/{model}.h5'.format(path=folder_name,model=model_name))
# Deletes the existing model
del model

### Evaluate model

In [19]:
y_train = preprocess.uncategorize_y(y_train)
y_test = preprocess.uncategorize_y(y_test)

In [21]:
# initialize evaluate model
evaluate = EvaluateModel("MixModel_NN", "normal", "models/NNs/mix", class_size=class_size)

In [22]:
evaluate.y_train = preprocess.categorize_y(y_train, evaluate.class_size)
evaluate.y_test = preprocess.categorize_y(y_test, evaluate.class_size)

In [23]:
# get model
models = evaluate.get_models()

In [24]:
target = preprocess.uncategorize_y(evaluate.y_test)

result, predicted_labels = evaluate.calculate_res(models, h1=0.9, h2=0.5, X_test=X_test, target=target)

In [25]:
print("Accuracy: " + str(np.mean(result)))

Accuracy: 0.9762682342695406


**Accuracy on unseen test data (mix model)**

* Normal Accuracy: 98.62

* Accuracy where (h1>=0.9): 97.69

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 97.62