In [1]:
import glob
import numpy as np
import random
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

from sklearn.preprocessing import OneHotEncoder

import keras
from keras.layers import LSTM, Dense, Dropout, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
SEED = 2017
DATA_DIR = 'Data/newData/' 

In [3]:
files = glob.glob(DATA_DIR + "*.wav")
X_train, X_val = train_test_split(files, test_size=0.2, random_state=SEED)

print('# Training examples: {}'.format(len(X_train)))
print('# Validation examples: {}'.format(len(X_val)))

# Training examples: 480
# Validation examples: 120


In [4]:
labels = []
for i in range(len(X_train)):
    label = X_train[i].split('/')[-1].split('_')[0]
    if label not in labels:
        labels.append(label)
print(labels)


['kareem', 'Hadeer', 'sawy', 'gendy', 'mohamed', 'atwa', 'Emad', 'Hady', 'kareemBrother', 'Metwally']


In [5]:
label_binarizer = LabelBinarizer()
label_binarizer.fit(list(set(labels)))

def one_hot_encode(x): return label_binarizer.transform(x)

In [6]:
n_features = 20
max_length = 80
n_classes = len(labels)

In [7]:
def batch_generator(data, batch_size=16):
    while 1:
        random.shuffle(data)
        X, y = [], []
        for i in range(batch_size):
            wav = data[i]
            wave, sr = librosa.load(wav, mono=True)
            label = wav.split('/')[-1].split('_')[0]
            y.append(one_hot_encode([label])[-1])
            mfcc = librosa.feature.mfcc(wave, sr)
            mfcc = np.pad(mfcc, ((0,0), (0, max_length-
            len(mfcc[0]))), mode='constant', constant_values=0) 
            X.append(np.array(mfcc))
        yield np.array(X), np.array(y)


In [8]:
learning_rate = 0.001
batch_size = 64
n_epochs = 50
dropout = 0.5

input_shape = (n_features, max_length)
steps_per_epoch = 50


In [9]:
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=input_shape,
   dropout=dropout))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(n_classes, activation='softmax'))


In [10]:
opt = Adam(lr=learning_rate)
model.compile(loss='categorical_crossentropy', optimizer=opt,
metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 20, 256)           345088    
_________________________________________________________________
flatten (Flatten)            (None, 5120)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               655488    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 1,001,866
Trainable params: 1,001,866
Non-trainable params: 0
_________________________________________________________________


In [11]:
callbacks = [ModelCheckpoint('checkpoints/voice_recognition_best_model_{epoch:02d}.hdf5', save_best_only=True),
            EarlyStopping(monitor='val_accuracy', patience=2)]


In [12]:
history = model.fit_generator(
   generator=batch_generator(X_train, batch_size),
   steps_per_epoch=steps_per_epoch,
   epochs=n_epochs,
   verbose=1,
   validation_data=batch_generator(X_val, 32),
   validation_steps=5,
   callbacks=callbacks
 )


Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
