In [2]:
import os
import numpy as np
import pandas as pd
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, concatenate
from keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image
import matplotlib.pyplot as plt
import cv2  # or use PIL for image handling
import joblib

In [3]:
working_OS = 'Windows'

if working_OS == 'MacOS':
    os.chdir(r"/Users/jordanlee/Code/School/CSCI416/music-genre-classification")
    print("Current Working Directory:", os.getcwd())

elif working_OS == 'Windows':
    os.chdir(r"C:\Code\School\CSCI416\music_genre_classification\music-genre-classification")
    print("Current Working Directory:", os.getcwd())

Current Working Directory: C:\Code\School\CSCI416\music_genre_classification\music-genre-classification


In [4]:
df = pd.read_csv('data/features/features_cleaned.csv')
X = df.drop(columns=['label'])
y = df['label']
label_encoder =LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train_tabular, X_test_tabular, y_train_tabular, y_test_tabular = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [5]:
spectrogram_dir = 'data/spectrograms'  # Path to the spectrograms folder

In [6]:
def load_spectrograms_and_labels(spectrogram_dir):
    spectrograms = []  # List to store spectrogram data
    labels = []        # List to store genre labels
    
    # Loop through all subdirectories (each representing a genre)
    for genre in os.listdir(spectrogram_dir):
        genre_folder = os.path.join(spectrogram_dir, genre)
        
        # Skip files, process only directories
        if os.path.isdir(genre_folder):
            for img_file in os.listdir(genre_folder):
                img_path = os.path.join(genre_folder, img_file)
                
                # Read the image using PIL (you can resize or preprocess here if needed)
                img = Image.open(img_path).convert('L')  # Convert to grayscale ('L')
                img = img.resize((128, 128))  # Resize to 128x128 (adjust as needed)
                
                # Convert image to numpy array
                img_array = np.array(img)
                
                # Append the image and its genre label
                spectrograms.append(img_array)
                labels.append(genre)  # The genre is the label for classification
    
    # Convert lists to numpy arrays
    X_spectrograms = np.array(spectrograms)
    y_labels = np.array(labels)
    
    return X_spectrograms, y_labels

In [7]:
# Load spectrograms and labels
X_spectrograms, y_labels = load_spectrograms_and_labels(spectrogram_dir)

# Normalize pixel values (between 0 and 1)
X_spectrograms = X_spectrograms.astype('float32') / 255.0

# Reshape to add channel dimension (since images are grayscale, the channel is 1)
X_spectrograms = X_spectrograms.reshape(-1, 128, 128, 1)

# Encode labels (genres) as numeric values
label_encoder = LabelEncoder()
y_labels_encoded = label_encoder.fit_transform(y_labels)

# Optional: one-hot encode the labels if needed (for multi-class classification)
y_labels_one_hot = np.eye(len(label_encoder.classes_))[y_labels_encoded]

In [8]:
# Split the data into training and test sets (80% train, 20% test)
X_train_spectrogram, X_test_spectrogram, y_train, y_test = train_test_split(
    X_spectrograms, y_labels_one_hot, test_size=0.2, random_state=42
)

In [64]:
# Define the input for tabular data
tabular_input = layers.Input(shape=(X_train_tabular.shape[1],))  # Adjust shape based on your tabular data

# Define the input for spectrogram images
spectrogram_input = layers.Input(shape=(128, 128, 1))  # Adjust shape based on your spectrogram size

# Tabular data processing: Dense layers
tabular_x = layers.Dense(64, activation='relu')(tabular_input)
tabular_x = layers.Dense(32, activation='relu')(tabular_x)

# Spectrogram data processing: Convolutional layers
spectrogram_x = layers.Conv2D(32, (3, 3), activation='relu')(spectrogram_input)
spectrogram_x = layers.MaxPooling2D((2, 2))(spectrogram_x)
spectrogram_x = layers.Conv2D(64, (3, 3), activation='relu')(spectrogram_x)
spectrogram_x = layers.MaxPooling2D((2, 2))(spectrogram_x)
spectrogram_x = layers.Flatten()(spectrogram_x)

# Concatenate the two branches (tabular and spectrogram)
combined = layers.concatenate([tabular_x, spectrogram_x])

# Dense layers after concatenation
x = layers.Dense(128, activation='relu')(combined)
x = layers.Dense(64, activation='relu')(x)

# Output layer (for classification)
output = layers.Dense(y_train.shape[1], activation='softmax')(x)  # Softmax for multi-class classification

# Define the model
small_raw_model = models.Model(inputs=[tabular_input, spectrogram_input], outputs=output)

# Compile the model
small_raw_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary to visualize the architecture
small_raw_model.summary()


In [9]:
# Tabular input branch
tabular_input = layers.Input(shape=(X_train_tabular.shape[1],))
tabular_x = layers.BatchNormalization()(tabular_input)
tabular_x = layers.Dense(128, activation='relu')(tabular_x)
tabular_x = layers.Dropout(0.3)(tabular_x)
tabular_x = layers.Dense(64, activation='relu')(tabular_x)
tabular_x = layers.Dropout(0.2)(tabular_x)

# Spectrogram input branch
spectrogram_input = layers.Input(shape=(128, 128, 1))
spec_x = layers.BatchNormalization()(spectrogram_input)
spec_x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(spec_x)
spec_x = layers.BatchNormalization()(spec_x)
spec_x = layers.MaxPooling2D((2, 2))(spec_x)
spec_x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(spec_x)
spec_x = layers.BatchNormalization()(spec_x)
spec_x = layers.MaxPooling2D((2, 2))(spec_x)
spec_x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(spec_x)
spec_x = layers.BatchNormalization()(spec_x)
spec_x = layers.Flatten()(spec_x)

# Concatenate branches
combined = layers.concatenate([tabular_x, spec_x])

# More robust classification layers
x = layers.Dense(256, activation='relu')(combined)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

# Output layer
output = layers.Dense(y_train.shape[1], activation='softmax')(x)

# Create model
large_raw_model = models.Model(inputs=[tabular_input, spectrogram_input], outputs=output)

# Compile the model
large_raw_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary to visualize the architecture
large_raw_model.summary()

In [76]:
small_raw_model.fit(
    [X_train_tabular, X_train_spectrogram], y_train, 
    epochs=10, 
    batch_size=32, 
    validation_data=([X_test_tabular, X_test_spectrogram], y_test)
)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - accuracy: 1.0000 - loss: 0.0037 - val_accuracy: 0.6800 - val_loss: 1.3526
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - accuracy: 1.0000 - loss: 0.0034 - val_accuracy: 0.6800 - val_loss: 1.3645
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - accuracy: 1.0000 - loss: 0.0028 - val_accuracy: 0.6650 - val_loss: 1.3825
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - accuracy: 1.0000 - loss: 0.0025 - val_accuracy: 0.6750 - val_loss: 1.4092
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - accuracy: 1.0000 - loss: 0.0022 - val_accuracy: 0.6850 - val_loss: 1.4356
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 169ms/step - accuracy: 1.0000 - loss: 0.0018 - val_accuracy: 0.6700 - val_loss: 1.4316
Epoch 7/10
[1m25/25[0m [32m━━━

<keras.src.callbacks.history.History at 0x2759e60ca00>

In [10]:
large_raw_model.fit(
    [X_train_tabular, X_train_spectrogram], 
    y_train, 
    epochs=10, 
    batch_size=32, 
    validation_data=([X_test_tabular, X_test_spectrogram], y_test)
)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 357ms/step - accuracy: 0.2669 - loss: 2.4368 - val_accuracy: 0.0950 - val_loss: 4.4471
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 341ms/step - accuracy: 0.5537 - loss: 1.3296 - val_accuracy: 0.0900 - val_loss: 13.5188
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 349ms/step - accuracy: 0.6772 - loss: 0.9437 - val_accuracy: 0.0900 - val_loss: 19.9366
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 339ms/step - accuracy: 0.8636 - loss: 0.5141 - val_accuracy: 0.0900 - val_loss: 23.8642
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 350ms/step - accuracy: 0.9117 - loss: 0.3150 - val_accuracy: 0.0900 - val_loss: 26.3892
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 342ms/step - accuracy: 0.9410 - loss: 0.2348 - val_accuracy: 0.0900 - val_loss: 29.4887
Epoch 7/10
[1m25/25[

<keras.src.callbacks.history.History at 0x24c60de2800>

In [75]:
joblib.dump(small_raw_model, 'models/small_raw_CNN_model.joblib')
joblib.dump(large_raw_model, 'models/large_raw_CNN_model.joblib')

['models/large_raw_CNN_model.joblib']

In [70]:
test_small_loss, test_small_accuracy = small_raw_model.evaluate([X_test_tabular, X_test_spectrogram], y_test)

# Print the results
print(f"Test loss: {test_small_loss}")
print(f"Test accuracy: {test_small_accuracy}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6818 - loss: 1.1231
Test loss: 1.0134004354476929
Test accuracy: 0.6949999928474426


In [11]:
test_large_loss, test_large_accuracy = large_raw_model.evaluate([X_test_tabular, X_test_spectrogram], y_test)

# Print the results
print(f"Test loss: {test_large_loss}")
print(f"Test accuracy: {test_large_accuracy}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.1341 - loss: 30.8314
Test loss: 31.834091186523438
Test accuracy: 0.125


In [61]:
# Make predictions on the test data
y_pred = model.predict([X_test_tabular, X_test_spectrogram])

# Convert predictions to class labels (if using softmax, for classification)
y_pred_classes = y_pred.argmax(axis=1)

# Print predictions for the first few test samples
print(f"Predicted classes: {y_pred_classes[:5]}")
print(f"True classes: {y_test[:5]}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Predicted classes: [9 9 9 9 9]
True classes: [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]


In [62]:
# Convert y_test (one-hot encoded) to integer labels
y_test_classes = np.argmax(y_test, axis=1)

# Now compare the predicted classes with the true class labels
print(f"Predicted classes: {y_pred_classes[:5]}")
print(f"True classes: {y_test_classes[:5]}")

Predicted classes: [9 9 9 9 9]
True classes: [4 7 2 3 7]


In [63]:
# Confusion Matrix
cm = confusion_matrix(y_test_classes, y_pred_classes)
print("Confusion Matrix:")
print(cm)

# Classification Report (precision, recall, f1-score)
print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes))

Confusion Matrix:
[[ 0  0  1  0  0  0  0  0  0 20]
 [ 0  0  5  0  0  0  0  0  0  7]
 [ 0  0  0  0  0  0  0  0  0 24]
 [ 0  0  3  0  0  0  0  0  0 19]
 [ 0  0  6  0  0  0  0  0  0  9]
 [ 0  1 10  0  0  0  0  0  0 16]
 [ 0  0  1  0  0  0  0  0  0 17]
 [ 0  0  0  0  0  0  0  0  0 19]
 [ 0  0  3  0  0  0  0  0  0 19]
 [ 0  0  2  0  0  0  0  0  0 18]]
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        21
           1       0.00      0.00      0.00        12
           2       0.00      0.00      0.00        24
           3       0.00      0.00      0.00        22
           4       0.00      0.00      0.00        15
           5       0.00      0.00      0.00        27
           6       0.00      0.00      0.00        18
           7       0.00      0.00      0.00        19
           8       0.00      0.00      0.00        22
           9       0.11      0.90      0.19        20

    accuracy                    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
