In [18]:
import glob
import os
# import librosa
# from librosa import display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
%matplotlib inline
plt.style.use('ggplot')
# import pydot
# import graphviz
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [19]:
features = np.loadtxt('voice_merge_features.csv', delimiter=',')


In [20]:
labels = np.array(np.loadtxt('voice_merge_labels.csv', delimiter=','), dtype=np.int)

In [39]:
label_mapping = {
    0:3,
    1:1,
    2:5,
    3:4,
    4:0,
    5:2
}

In [40]:
labels = list(map(lambda label: label_mapping[label], labels))

In [42]:
np.savetxt("nn_simple_labels.csv", labels, delimiter=",")

In [21]:
from sklearn.preprocessing import normalize

X_all = features
labels = np.array(labels)

from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(sparse=False)
y_all = enc.fit_transform(labels.reshape((-1, 1)))

In [36]:
X_all.shape

(14178, 1280)

In [23]:
from sklearn.model_selection import train_test_split
import time

X_not_test, X_test, y_not_rest, y_test = train_test_split(
        X_all, y_all, stratify=y_all, train_size=.85, random_state=round(time.time()))

X_train, X_val, y_train, y_val = train_test_split(
    X_not_test, y_not_rest, stratify=y_not_rest, train_size=.95, random_state=round(time.time()))



In [24]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
print(X_test.shape)
print(y_test.shape)

(11448, 1280)
(11448, 6)
(603, 1280)
(603, 6)
(2127, 1280)
(2127, 6)


In [25]:
## Imports
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import print_summary, plot_model

In [26]:
# Training parameters
batch_size = 32
num_classes = 6
epochs = 40
img_rows, img_cols = 40, 32
print(K.image_data_format())

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)

input_shape = (img_rows, img_cols, 1)
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
print(X_test.shape)
print(y_test.shape)

channels_last
(11448, 40, 32, 1)
(11448, 6)
(603, 40, 32, 1)
(603, 6)
(2127, 40, 32, 1)
(2127, 6)


In [37]:
from keras import regularizers
from keras.layers import BatchNormalization

def build_model():
    # Model definition
    model = Sequential()
    model.add(Conv2D(4, kernel_size=(5, 5),
                     activation='relu',
                     input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(8, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    
    return model

In [28]:
from keras.models import load_model

model = load_model("voice_model.h5")

In [39]:
model.fit(X_train[:], y_train[:],
          batch_size=128,
          epochs=epochs,
          verbose=1,
          validation_data=(X_val, y_val))

Train on 11448 samples, validate on 603 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40

KeyboardInterrupt: 

In [43]:
score = model.evaluate(X_all.reshape(X_all.shape[0], img_rows, img_cols, 1), y_all, verbose=1)
print()
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Test loss: 0.129940912857
Test accuracy: 0.961066440965
