In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Masking
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
import numpy as np
import librosa
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from PIL import Image

In [2]:
def load_data(data_dir):
    X = []
    y = []
    labels = [label for label in os.listdir(data_dir) if not label.startswith('.')]
    for label in labels:
        label_dir = os.path.join(data_dir, label)
        if not os.path.isdir(label_dir):
            continue  # Skip non-directory entries
        for file in os.listdir(label_dir):
            file_path = os.path.join(label_dir, file)
            if os.path.isdir(file_path) or not file.endswith('.png'):
                continue  # Skip directories and non-png files
            image = Image.open(file_path).convert('L')  # Convert to grayscale
            mel_spectrogram = np.array(image)
            mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram), n_mfcc=13)
            X.append(mfcc.T)
            y.append(label)
    return X, y

# Prepare data
data_dir = 'mel_spectrograms2'
X, y = load_data(data_dir)

# Check unique labels
print(f'Unique labels: {np.unique(y)}')

X = tf.keras.preprocessing.sequence.pad_sequences(X, dtype='float32')
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y = tf.keras.utils.to_categorical(y)

# Check the number of classes after encoding
print(f'Number of classes: {y.shape[1]}')

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Unique labels: ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']
Number of classes: 10


In [3]:
print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')

X_train shape: (755, 775, 13)
X_test shape: (189, 775, 13)
y_train shape: (755, 10)
y_test shape: (189, 10)


In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Masking, Dropout, BatchNormalization, Bidirectional, GlobalAveragePooling1D

# Define RNN model
model = Sequential([
    Masking(mask_value=0.0, input_shape=(None, 13)),
    Bidirectional(SimpleRNN(512, return_sequences=True)),
    BatchNormalization(),
    GlobalAveragePooling1D(),
    Dense(10, activation='softmax')
])


callback = EarlyStopping(monitor='val_loss', patience=3)
model_checkpoint = ModelCheckpoint(
    'best_model.h5',  # Path where the model will be saved
    monitor='val_loss',  # Metric to monitor
    save_best_only=True,  # Save only the best model
    mode='min',  # Mode for the monitored metric
    verbose=1  # Verbosity mode
)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


Metal device set to: Apple M2

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2024-11-06 01:00:27.748299: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-06 01:00:27.748507: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:
# Train model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test),callbacks=[callback,model_checkpoint], verbose=1)

Epoch 1/5


2024-11-06 01:00:31.304704: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2024-11-06 01:00:32.765060: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')

import matplotlib.pyplot as plt

# Predict the values from the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Compute the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()