In [5]:
import librosa
import os
import numpy as np
import pandas as pd
from math import floor
from numpy import asarray
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, BatchNormalization
from sklearn.model_selection import KFold, StratifiedKFold
#!pip install tensorflow==2.11.0

# CRNN Model

In [2]:
# Define the input shape
input_shape = (235, 352, 4)

# Define the CRNN architecture
model = Sequential()

# Conv Block 1
model.add(Conv2D(filters=32, kernel_size=(3,3), activation="relu", input_shape=input_shape))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.1))
model.add(BatchNormalization())

# Conv Block 2
model.add(Conv2D(filters=64, kernel_size=(3,3), activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.1))
model.add(BatchNormalization())

# Conv Block 3
model.add(Conv2D(filters=128, kernel_size=(3,3), activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.1))
model.add(BatchNormalization())

# Recurrent layers
model.add(TimeDistributed(Flatten()))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(32))

# Fully connected layers
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
num_classes = 8
model.add(Dense(num_classes, activation="softmax"))

# Compile the model
model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])
#model.summary()


In [3]:
#model.fit(train_spectrogram, y_train, epochs = 1)

## Dataset split and load into numpy arrays

In [4]:
# load dataset
fma_small = pd.read_csv("fma_small.csv")

X_train = fma_small[fma_small["Set"] == "training"][["ID","Genre"]]
y_train = fma_small[fma_small["Set"] == "training"][["Genre"]]

X_test = fma_small[fma_small["Set"] == "test"][["ID","Genre"]]
y_test = fma_small[fma_small["Set"] == "test"][["Genre"]]

# print the shapes of the new datasets
print("Training Features Shape:", X_train.shape)
print("Training Target Shape:", y_train.shape)
print("Testing Features Shape:", X_test.shape)
print("Testing Target Shape:", y_test.shape)


Training Features Shape: (7115, 2)
Training Target Shape: (7115, 1)
Testing Features Shape: (789, 2)
Testing Target Shape: (789, 1)


In [None]:
# Used to set all pixel values between 0 and 1.
def normalize_image(path):
    image = Image.open(path)
    pixels = asarray(image)
    # convert from integers to floats
    pixels = pixels.astype("float32")
    # normalize to the range 0-1
    pixels /= 255.0
    # confirm the normalization
    #print('Min: %.3f, Max: %.3f' % (pixels.min(), pixels.max()))
    return pixels



# Loading data into RAM
train_spectrogram = []
test_spectrogram = []
#fma_small = pd.read_csv("fma_small.csv")

for train_id in X_train["ID"]:
    for filename in os.listdir("SpectrogramData"):
        if filename.endswith(".png"): # Not checking directories
            fileID = int(filename.split(".")[0]) # Extract ID from filename
            if train_id == fileID:
                path = "SpectrogramData"+"/"+str(filename)
                normalized_img = normalize_image(path)
                train_spectrogram.append(np.array(normalized_img))

for test_id in X_test["ID"]:
    for filename in os.listdir("SpectrogramData"):
        if filename.endswith(".png"): # Not checking directories
            fileID = int(filename.split(".")[0]) # Extract ID from filename
            if test_id == fileID:
                path = "SpectrogramData"+"/"+str(filename)
                normalized_img = normalize_image(path)
                test_spectrogram.append(np.array(normalized_img))


In [None]:
print("Train Data:",len(train_spectrogram))
print("Test Data:",len(test_spectrogram))
len(train_spectrogram)//len(test_spectrogram)

In [None]:
# Stacking the numpy arrays to format to the CNN models requirements
train_spectrogram = np.stack(train_spectrogram, axis=0)
test_spectrogram = np.stack(test_spectrogram, axis=0)
train_spectrogram.shape

In [None]:
y_train

In [None]:
# One-hot encodes the genres for the CNN
y_train = np.array(pd.get_dummies(y_train))
y_test = np.array(pd.get_dummies(y_test))

In [None]:
# Defines function for model artitecture:
def create_model(n_filters, kernel_sizes, pool_sizes, lstm_units, dropout_rate): # 5 Hyperparameters
    # Define the input shape
    input_shape = (235, 352, 4)

    # Define the CRNN architecture
    model = Sequential()

    # Conv Block 1
    model.add(Conv2D(filters=n_filters[0], kernel_size=kernel_sizes, activation="relu", input_shape=input_shape))
    model.add(MaxPool2D(pool_size=pool_sizes))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())

    # Conv Block 2
    model.add(Conv2D(filters=n_filters[1], kernel_size=kernel_sizes, activation="relu"))
    model.add(MaxPool2D(pool_size=pool_sizes))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())
    
    # Conv Block 3
    model.add(Conv2D(filters=n_filters[2], kernel_size=kernel_sizes, activation="relu"))
    model.add(MaxPool2D(pool_size=pool_sizes))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())
    
    # Recurrent layers
    model.add(TimeDistributed(Flatten()))
    model.add(GRU(lstm_units*2, return_sequences=True))
    model.add(GRU(lstm_units))
    
    # Fully connected layers
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(dropout_rate))
    num_classes = 8
    model.add(Dense(num_classes, activation="softmax"))

    # Compile the model
    model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])
    return model

In [None]:
tf.random.set_seed(42)
np.random.seed(42)
# Step 3: Create a GridSearchCV object and specify the hyperparameters to tune
model = KerasClassifier(build_fn=create_model, n_filters=(16, 32, 64), kernel_sizes=(3, 3), pool_sizes=(2, 2), lstm_units=32, dropout_rate=0.1)

# n_filters, kernel_sizes, pool_sizes, lstm_units, dropout_rate
#hyperparams = {"n_filters": [(16,32,64), (32,64,128),(64,128,256)],
#               "kernel_sizes": [(3,3), (4,4)],
#               "pool_sizes": [(2,2),(3,3)],
#               "lstm_units": [32,64,128],
#               "dropout_rate": [0.1, 0.3, 0.5]
#              }

hyperparams = {"n_filters": [(2,4,6)],
               "kernel_sizes": [(3,3)],
               "pool_sizes": [(2,2)],
               "lstm_units": [8],
               "dropout_rate": [0.1]
              }

#grid = GridSearchCV(estimator=model, param_grid=hyperparams, cv=5, verbose=1)
grid = GridSearchCV(estimator=model, param_grid=hyperparams, cv=2, verbose=1)

# Step 4: Fit the GridSearchCV object to your data
#grid_result = grid.fit(train_generator, steps_per_epoch=100)
grid_result = grid.fit(train_spectrogram, y_train, epochs=1, steps_per_epoch=10)


# Step 5: Retrieve the best hyperparameters and train your final model
best_params = grid.best_params_
final_model = create_model(**best_params)
final_model.fit(train_spectrogram, y_train, steps_per_epoch=10)



In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

