# Training of Recurrenct Neural Network (RNN) model on spectrograms

In [19]:
import numpy as np
import pandas as pd

from scipy import signal
from scipy.io import wavfile
import matplotlib.pyplot as plt

from PIL import Image
import matplotlib
import random

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras import layers


tf.config.run_functions_eagerly

<function tensorflow.python.eager.polymorphic_function.polymorphic_function.run_functions_eagerly(run_eagerly)>

In [55]:
# Loading of datasets

spectr_train_list = pd.read_csv("spectr_train_list.txt", index_col = 0).iloc[:,0].values.tolist()
spectr_test_list = pd.read_csv("spectr_test_list.txt", index_col = 0).iloc[:,0].values.tolist()

# labels_train, labels_test
# spectrograms_train, spectrograms_test

labels_train = []
spectrograms_train = []

for path in spectr_train_list:
    labels_train.append(path.split("/")[1])
    
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled

    spectrograms_train.append(spectrogram)

labels_test = []
spectrograms_test = []

for path in spectr_test_list:
    labels_test.append(path.split("/")[1])
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled
    
    spectrograms_test.append(spectrogram)


In [58]:
labels_test[0:9]

['no', 'up', 'off', 'go', 'yes', 'stop', 'down', 'on', 'down']

In [66]:
# preparing datasets for models

dummies = pd.get_dummies(labels_train)
y_train = tf.one_hot(dummies,10)

dataset_train = tf.data.Dataset.from_tensor_slices((spectrograms_train, dummies)).shuffle(200).batch(8)

####################

dummies2 = pd.get_dummies(labels_test)
y_test = tf.one_hot(dummies2, 10)

dataset_test = tf.data.Dataset.from_tensor_slices((spectrograms_test, dummies2)).batch(8)

#####################

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                           mode='min',
                           verbose=1,
                           patience=4)


# Model architecture

In [62]:
# model - LSTM
def get_model_RNN():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.LSTM(units = 64, return_sequences = True, input_shape=[64,64]))
  model.add(tf.keras.layers.LSTM(units = 64, return_sequences = True))

  model.add(tf.keras.layers.Dense(units = 32))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units = 10, activation='softmax'))

  model.compile(loss = 'categorical_crossentropy',
             optimizer = keras.optimizers.Adam(learning_rate = 0.0001),
             metrics = ['accuracy'])
  return model

# Model training & ealuation

In [63]:
# model 1

model_RNN = get_model_RNN()

model_RNN.fit(dataset_train, epochs=50,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 17: early stopping


<keras.callbacks.History at 0x1ef0e304310>

# Model 2 - more robust

In [64]:
# model - LSTM
def get_model_RNN2():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True, input_shape=[64,64]))
  model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True))

  model.add(tf.keras.layers.Dense(units = 64))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units = 10, activation='softmax'))

  model.compile(loss = 'categorical_crossentropy',
             optimizer = keras.optimizers.Adam(learning_rate = 0.0001),
             metrics = ['accuracy'])
  return model

In [65]:
# model 2

model_RNN2 = get_model_RNN2()

model_RNN2.fit(dataset_train, epochs=50,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 11: early stopping


<keras.callbacks.History at 0x1ef1efad0f0>

In [67]:
model_RNN2.fit(dataset_train, epochs=20,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping


<keras.callbacks.History at 0x1ef19e0e4d0>

# Saving the model

In [68]:
model_RNN2.save('./model2.h5',save_format='h5')


In [69]:
model_RNN.save('./model.h5',save_format='h5')

In [70]:
model = tf.keras.models.load_model('./model2.h5')

# Conclusions

The first model obtained **74\% accuracy** on validation data set, whereas the second model obtained almost **80\% accuracy** with the same setting. 

Reminder: it was a 10 class classification problem with balanced classes.

Possible ideas to train better version of the final model:

- decreasing learning rate, as models started to overfit ( extended learning process)
- use more layers and units within each layer ( more robust models)
- prepare other version of spectrograms (better data means better model)