In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical


In [None]:
# Function to load audio files from a directory
def load_audio_files(directory):
    audio_data = []
    labels = []
    for genre in os.listdir(directory):
        genre_path = os.path.join(directory, genre)
        for filename in os.listdir(genre_path):
            filepath = os.path.join(genre_path, filename)
            audio, sr = librosa.load(filepath, duration=30, sr=None)
            audio_data.append(audio)
            labels.append(genre)
    return np.array(audio_data), np.array(labels)

# Function to split data into training and testing sets
def split_train_test_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

#Function to create 3-second audio segments from the original 30-second clips
def create_3s_audio_segments(audio_data):
    audio_segments = []
    for audio in audio_data:
        for i in range(0, len(audio), 3 * 22050):  # Assuming 22050 samples per second
            segment = audio[i:i + 3 * 22050]
            if len(segment) == 3 * 22050:
                audio_segments.append(segment)
    return np.array(audio_segments)

# Function to generate mel spectrograms from audio data
def create_spectrograms_30s(audio_data, labels, save_path):
    for i, (audio, genre) in enumerate(zip(audio_data, labels)):
        plt.figure(figsize=(2, 2))
        S = librosa.feature.melspectrogram(y=audio, sr=22050)
        librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
        plt.axis('off')
        spectrogram_filename = f'spectrogram_{i}_{genre}.png'
        plt.savefig(os.path.join(save_path, spectrogram_filename))
        plt.close()

# Function to build a CNN model for music genre classification
def build_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Main function for music genre classification
def music_genre_classification():
    # Load original GTZAN dataset
    data_directory = '../Data/genres_original'
    audio_data, labels = load_audio_files(data_directory)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = split_train_test_data(audio_data, labels)

    # # Create 3-second audio segments
    # X_train_3s = create_3s_audio_segments(X_train)
    # X_test_3s = create_3s_audio_segments(X_test)

    # Create spectrograms and save to folders
    spectrogram_save_path_train = '../Data/spectograms_train'
    spectrogram_save_path_test = '../Data/spectograms_test'
    create_spectrograms_30s(X_train, y_train, spectrogram_save_path_train)
    create_spectrograms_30s(X_test, y_test, spectrogram_save_path_test)

    # Build CNN model
    input_shape = (X_train.shape[1], X_train.shape[2], 1)  # Adjust dimensions based on spectrogram size
    num_classes = len(np.unique(labels))
    model = build_cnn_model(input_shape, num_classes)

    # Preprocess data for CNN
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

    y_train_categorical = to_categorical(y_train, num_classes=num_classes)
    y_test_categorical = to_categorical(y_test, num_classes=num_classes)

    # Train the model
    model.fit(X_train, y_train_categorical, epochs=10, batch_size=32, validation_split=0.2)

    # Evaluate the model on the test set
    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_true = np.argmax(y_test_categorical, axis=1)

    # Print accuracy and confusion matrix
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy:.2%}")

    conf_matrix = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

# Run the music genre classification
music_genre_classification()


  return np.array(audio_data), np.array(labels)


In [None]:

def load_data(data_path, target_shape=(128, 2500)):
    genres = os.listdir(data_path)
    labels = []
    mel_specs = []

    for genre in genres:
        genre_path = os.path.join(data_path, genre)
        for filename in os.listdir(genre_path):
            file_path = os.path.join(genre_path, filename)
            try:
                audio, _ = librosa.load(file_path, res_type='kaiser_fast', duration=30)
                mel_spec = librosa.feature.melspectrogram(y=audio, sr=22050, n_mels=128)

                # Ensure all mel spectrograms have the same shape
                mel_spec = librosa.util.fix_length(mel_spec, target_shape[1], axis=1, mode='constant', value=0)

                mel_specs.append(mel_spec)
                labels.append(genre)
            except (librosa.util.exceptions.LibrosaError, FileNotFoundError) as e:
                print(f"Error processing {file_path}: {e}")

    return np.array(mel_specs), np.array(labels)

data_path = "../Data/genres_original"
mel_specs, labels = load_data(data_path)


In [None]:
# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(mel_specs, labels, test_size=0.2, random_state=42)

# Step 3: Build the CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(128, X_train.shape[2], 1)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(np.unique(labels)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Step 4: Train the model
X_train = X_train[..., np.newaxis]  # Add channel dimension
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Step 5: Evaluate the model
X_test = X_test[..., np.newaxis]  # Add channel dimension
y_pred = np.argmax(model.predict(X_test), axis=1)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Step 6: Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(labels), yticklabels=np.unique(labels))
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
