## Building NN for speech recogniton and optimize result 

In [13]:
import numpy as np
import os
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from keras.models import load_model
from keras.models import Sequential
from sklearn.utils import shuffle
from keras.layers import Dropout
from keras.layers import Dense
from sklearn import preprocessing
%matplotlib inline
import pandas as pd

from utils import *

In [2]:
folder_name="NNs/normal"
model_name="normal_model"

### Get data

In [3]:
# read data
train_df = pd.read_csv('data/normal/data41normal_train.csv')
test_df = pd.read_csv('data/normal/data41normal_test.csv')

### PREPROCESS DATA

In [4]:
#     """
#     get data and return preprocessed data
    
#     Parameters
#     ----------
#     train_df: train data
#     test_df: test data
#     label_increment: increment model index only for one vs. all models, default False
#     categorical: categorical preperation of data, default True
#     category_size: category size for catageorical preperation, default: 41
#     normalize: normalize data, default True;
    
#     Return
#     ------
#     X_out, y_out
#     """

In [5]:
X_train, y_train = utils_prepare_data(train_df, category_size=41)
X_test, y_test = utils_prepare_data(test_df, category_size=41)

In [6]:
print('Size of training matrix:', X_train.shape)
print('Size of testing matrix:', X_test.shape)

Size of training matrix: (18218, 2808)
Size of testing matrix: (3215, 2808)


In [7]:
y_out = train_df.values[:,0]
X_out = train_df.values[:,1:]

In [8]:
train_df.shape

(18218, 2809)

#### BUILD MODEL

In [9]:
#get number of columns in training data
n_cols = X_train.shape[1]

In [10]:
model = Sequential()

#add layers to model
model.add(Dense(200, activation='sigmoid', input_shape=(n_cols,)))
model.add(Dense(200, activation='sigmoid'))
model.add(Dense(41, activation='softmax'))

In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200)               561800    
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_3 (Dense)              (None, 41)                8241      
Total params: 610,241
Trainable params: 610,241
Non-trainable params: 0
_________________________________________________________________


In [12]:
# compile model parameters
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
#train model
model.fit(X_train, y_train, epochs=50, batch_size=20,validation_split=0.2)

Train on 14574 samples, validate on 3644 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1ecaf2049e8>

### Evaluate model

In [15]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("%s: %.7f%%" % (model.metrics_names[1], scores[1]*100))

acc: 98.9424572%


In [16]:
def calculate_acc(X_test,y_test,h1,h2):
    '''
    how to evaluate evaluate model:
        select the ones with larget prob.
        if prob of ones are euqal for any model then select the model with min others prob
    '''
    prob = model.predict_proba(X_test)
    predicted_label = np.argmax(y_test, axis=1)
    result=np.zeros((y_test.shape[0]), dtype=int)

    for i in range(y_test.shape[0]):
        max_array=[]
        max_n=-100
        idx=0
        for j in range(41):
            max_array.append(prob[i][j])

            if prob[i][j]>max_n and prob[i][j]>=h1:
                max_n=prob[i][j]
                idx=j   

        #sort max array
        max_array.sort()

        # compare result with the actual labels
        if(int(predicted_label[i])==int(idx) and max_array[-1]-max_array[-2]>=h2):
            result[i]=1
            
    return result

In [19]:
overall_acc = np.mean(calculate_acc(X_test,y_test,h1=.0,h2=.0))*100
print('Overall unseen test accuracy: %.2f percent' % overall_acc)

Overall unseen test accuracy: 98.94 percent


#### SAVE MODEL

In [None]:
# Creates a HDF5 file 'my_model.h5'
model.save('models/{path}/{model}.h5'.format(path=folder_name,model=model_name))
# Deletes the existing model
del model

In [None]:
# Returns a compiled model identical to the previous one
model = load_model('models/{path}/{model}.h5'.format(path=folder_name, model=model_name))

In [None]:
overall_acc = np.mean(calculate_acc(X_test,y_test,h1=.9,h2=.5))*100
print('Overall unseen test accuracy: %.2f percent' % overall_acc)

**Accuracy on unseen test data (normal model)**

* Normal Accuracy: 98.94

* Accuracy where (h1>=0.9): 98.51

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 98.51

**Accuracy on unseen test data (normal+ada model)**

* Normal Accuracy: 98.61

* Accuracy where (h1>=0.9): 97.84

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 97.81

**Accuracy on unseen test data (ada model girls)**

* Normal Accuracy: 97.28

* Accuracy where (h1>=0.9): 94.85

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 94.85

### Get ADA Data

In [None]:
# read data
features_df_ada = pd.read_csv('databoy.csv')

In [None]:
# get train label and data
all_labels_ada = features_df_ada.values[:,0]
x_data_ada = features_df_ada.values[:,1:]

In [None]:
# prepare categorical target values (e.g [0,0,0,1,0])
target_ada = np.zeros((len(all_labels_ada),41),dtype=int)
for i,_ in enumerate(all_labels_ada):
    target_ada[i][int(_)]=1

In [None]:
# normalize train
# x_data_ada=x_data_ada[4500:]
# target_ada=target_ada[4500:]

x_data_ada, target_ada = shuffle(x_data_ada, target_ada, random_state=0)

scaler = preprocessing.StandardScaler().fit(x_data_ada)
x_data_ada=scaler.transform(x_data_ada)

In [None]:
scores = model.evaluate(x_data_ada, target_ada, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
overall_acc = np.mean(calculate_acc(x_data_ada,target_ada,h1=.9,h2=.5))*100
print('Overall unseen test accuracy: %.2f percent' % overall_acc)

**Accuracy on unseen test ada data (normal model)**

* Normal Accuracy: 84.04

* Accuracy where (h1>=0.9): 78.29

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 78.21

**Accuracy on unseen test ada data (normal + ada model)**

* Normal Accuracy: 95.80

* Accuracy where (h1>=0.9): 93.41

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 93.38

**Accuracy on unseen test ada boys data (ada girl model)**

* Normal Accuracy: 76.07

* Accuracy where (h1>=0.9): 66.82

* Accuracy where (h1>=0.9 and h1-h2>=0.5): 66.72