# Training of Recurrenct Neural Network (RNN) model on spectrograms ( second version of spectrograms)

In [1]:
import numpy as np
import pandas as pd

from scipy import signal
from scipy.io import wavfile
import matplotlib.pyplot as plt

from PIL import Image
import matplotlib
import random

In [2]:
import tensorflow as tf
from tensorflow import keras
from keras import layers


tf.config.run_functions_eagerly

<function tensorflow.python.eager.polymorphic_function.polymorphic_function.run_functions_eagerly(run_eagerly)>

In [3]:
# Loading of datasets

spectr_train_list = pd.read_csv("spectr2_train_list.txt", index_col = 0).iloc[:,0].values.tolist()
spectr_test_list = pd.read_csv("spectr2_test_list.txt", index_col = 0).iloc[:,0].values.tolist()

# labels_train, labels_test
# spectrograms_train, spectrograms_test

labels_train = []
spectrograms_train = []

for path in spectr_train_list:
    labels_train.append(path.split("/")[1])
    
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled

    spectrograms_train.append(spectrogram)

labels_test = []
spectrograms_test = []

for path in spectr_test_list:
    labels_test.append(path.split("/")[1])
    
    rgb = matplotlib.image.imread(path)
    gray = np.mean(rgb, -1)
    spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled
    
    spectrograms_test.append(spectrogram)


  spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled
  spectrogram = (255*(gray - np.min(gray))/np.ptp(gray)).astype(int)  # [0, 255] scaled


In [4]:
labels_test[0:9]

['no', 'up', 'off', 'go', 'yes', 'stop', 'down', 'on', 'down']

In [8]:
# preparing datasets for models

dummies = pd.get_dummies(labels_train)
y_train = tf.one_hot(dummies,10)

dataset_train = tf.data.Dataset.from_tensor_slices((spectrograms_train, dummies)).shuffle(200).batch(8)

####################

dummies2 = pd.get_dummies(labels_test)
y_test = tf.one_hot(dummies2, 10)

dataset_test = tf.data.Dataset.from_tensor_slices((spectrograms_test, dummies2)).batch(8)

#####################

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                           mode='max',
                           verbose=1,
                           patience=4)


# Model architecture

In [12]:
# model - LSTM
def get_model_RNN():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.LSTM(units = 64, return_sequences = True, input_shape=[64,64]))
  model.add(tf.keras.layers.LSTM(units = 64, return_sequences = True))

  model.add(tf.keras.layers.Dense(units = 32))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units = 10, activation='softmax'))

  model.compile(loss = 'categorical_crossentropy',
             optimizer = keras.optimizers.Adam(learning_rate = 0.0001),
             metrics = ['accuracy'])
  return model

# Model training & ealuation

In [63]:
# model 1 ( using spectrograms v1 - for comparison)

model_RNN = get_model_RNN()

model_RNN.fit(dataset_train, epochs=50,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 17: early stopping


<keras.callbacks.History at 0x1ef0e304310>

In [9]:
# model 1 - using spectrograms v2 (after logarithmic scaling)

model_RNN = get_model_RNN()

model_RNN.fit(dataset_train, epochs=50,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x221ad98de40>

Accuracy: 0.45\%

# Model 2 - more robust

In [14]:
# model - LSTM
def get_model_RNN2():
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True, input_shape=[64,64]))
  model.add(tf.keras.layers.LSTM(units = 128, return_sequences = True))

  model.add(tf.keras.layers.Dense(units = 64))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(units = 10, activation='softmax'))

  model.compile(loss = 'categorical_crossentropy',
             optimizer = keras.optimizers.Adam(learning_rate = 0.0001),
             metrics = ['accuracy'])
  return model

In [65]:
# model 2

model_RNN2 = get_model_RNN2()

model_RNN2.fit(dataset_train, epochs=50,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 11: early stopping


<keras.callbacks.History at 0x1ef1efad0f0>

In [67]:
model_RNN2.fit(dataset_train, epochs=20,  callbacks =[early_stop], validation_data=dataset_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 6: early stopping


<keras.callbacks.History at 0x1ef19e0e4d0>

In [15]:
# model 2 more robust - using spectrograms v2 (after logarithmic scaling)

# model 2

model_RNN2 = get_model_RNN2()

model_RNN2.fit(dataset_train, epochs=50,  callbacks =[early_stop], validation_data=dataset_test)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 30: early stopping


<keras.callbacks.History at 0x221c6f0c4c0>

Accuracy: 0.51\%

# (old) Conclusions from training on old spectrograms:


The first model obtained **74\% accuracy** on validation data set, whereas the second model obtained almost **80\% accuracy** with the same spectrograms, optimizer and learning rate settings. 

Reminder: it was a 10 class classification problem with balanced classes.

Possible ideas to train better version of the final model:

- decreasing learning rate, as models started to overfit ( extended learning process)
- use more layers and units within each layer ( more robust models)
- prepare other version of spectrograms (better data means better model)

# Conclusions from training on new spectrograms ( v2):


The first model obtained **45\% accuracy** on validation data set, whereas the second model obtained almost **52\% accuracy** with the same spectrograms, optimizer and learning rate settings. 

Having considered this modification to the machine learning pipeline:

- prepare other version of spectrograms (better data means better model)


The models' training time was considerably longer, so the modification consisting of **creation of new version of spectrograms** -- preserving more information, **was**, in this case, **leading to worse results**. Perhaps it was caused by too much background noise in the audio files and the old version of spectrograms was sufficient.