<h2> Music Genre Classification with RNN-LSTM Model: </h2>

In [1]:
import json
import librosa
import numpy as np
import tensorflow as tf



In [3]:
DATA_PATH = "data.json"

def load_data(data_path = DATA_PATH):
  with open(data_path, "r") as fp:
    data = json.load(fp)

  mfcc = np.array(data["mfcc"])
  labels = np.array(data["labels"])
  return mfcc, labels

In [24]:
mfcc, labels = load_data(DATA_PATH)
delta_mfcc = librosa.feature.delta(mfcc, order = 1)
delta2_mfcc = librosa.feature.delta(mfcc, order = 2)

print(mfcc.shape)
print(delta_mfcc.shape)
print(delta2_mfcc.shape)

(9989, 132, 13)
(9989, 132, 13)
(9989, 132, 13)


In [26]:
from sklearn.model_selection import train_test_split

def split_data(features, labels):
  X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.30, random_state = 42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15, random_state = 42)
  return X_train, X_test, X_val, y_train, y_test, y_val

X_train, X_test, X_validation, y_train, y_test, y_validation = split_data(np.stack([mfcc, delta_mfcc, delta2_mfcc], axis = -1), labels)

input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
print(input_shape)

(132, 13, 3)


In [40]:
import tensorflow.keras as keras

lstm_model = keras.Sequential()

lstm_model.add(keras.layers.TimeDistributed(keras.layers.Conv1D(filters = 32, kernel_size = 3, activation = 'relu'), 
                                  input_shape = input_shape))
lstm_model.add(keras.layers.TimeDistributed(keras.layers.BatchNormalization()))
lstm_model.add(keras.layers.TimeDistributed(keras.layers.Conv1D(filters = 64, kernel_size = 3, activation='relu')))
lstm_model.add(keras.layers.TimeDistributed(keras.layers.BatchNormalization()))
lstm_model.add(keras.layers.TimeDistributed(keras.layers.Flatten()))

lstm_model.add(keras.layers.LSTM(64, return_sequences = True, dropout = 0.2, recurrent_dropout = 0.2))
lstm_model.add(keras.layers.GRU(128, return_sequences = True, dropout = 0.3, recurrent_dropout = 0.3))
lstm_model.add(keras.layers.LSTM(128, dropout = 0.3, recurrent_dropout = 0.3))

lstm_model.add(keras.layers.Dense(256, activation = 'relu'))
lstm_model.add(keras.layers.Dropout(0.4))
lstm_model.add(keras.layers.Dense(128, activation = 'relu'))
lstm_model.add(keras.layers.Dropout(0.4))

lstm_model.add(keras.layers.Dense(10, activation = 'softmax'))

lstm_model.compile(optimizer = keras.optimizers.Adam(learning_rate = 0.001),
                   loss = 'sparse_categorical_crossentropy',
                   metrics = ['accuracy'])

lstm_model.summary()

  super().__init__(**kwargs)


In [41]:
early_stopping = keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 2, restore_best_weights = True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.5, patience = 2, min_lr = 1e-6)

In [42]:
history = lstm_model.fit(X_train, y_train, 
                        validation_data = (X_validation, y_validation),
                        batch_size = 32,
                        epochs = 5,
                        callbacks = [early_stopping, reduce_lr])

Epoch 1/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 1s/step - accuracy: 0.2385 - loss: 2.0815 - val_accuracy: 0.3727 - val_loss: 1.7416 - learning_rate: 0.0010
Epoch 2/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 1s/step - accuracy: 0.3708 - loss: 1.7453 - val_accuracy: 0.4423 - val_loss: 1.4750 - learning_rate: 0.0010
Epoch 3/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 1s/step - accuracy: 0.4454 - loss: 1.5211 - val_accuracy: 0.4948 - val_loss: 1.3597 - learning_rate: 0.0010
Epoch 4/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 1s/step - accuracy: 0.4836 - loss: 1.4496 - val_accuracy: 0.5253 - val_loss: 1.3138 - learning_rate: 0.0010
Epoch 5/5
[1m186/186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 1s/step - accuracy: 0.5281 - loss: 1.3240 - val_accuracy: 0.5710 - val_loss: 1.1867 - learning_rate: 0.0010


In [43]:
_, accuracy = lstm_model.evaluate(X_test, y_test)
print("Model accuracy: " + str(round(100 * accuracy, 3)) + "%.")

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 134ms/step - accuracy: 0.6110 - loss: 1.1320
Model accuracy: 60.761%.
