# 🎧 Music Genre Classifier

This notebook loads audio files, extracts features using `librosa`, trains a classifier to predict music genres, and evaluates performance.

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

In [None]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=30)
    features = {
        'chroma_stft': np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
        'rmse': np.mean(librosa.feature.rms(y=y)),
        'spectral_centroid': np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
        'spectral_bandwidth': np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        'rolloff': np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
        'zero_crossing_rate': np.mean(librosa.feature.zero_crossing_rate(y)),
        'mfcc1': np.mean(librosa.feature.mfcc(y=y, sr=sr)[0]),
        'mfcc2': np.mean(librosa.feature.mfcc(y=y, sr=sr)[1])
    }
    return list(features.values())

In [None]:
# Replace 'data_path' with your actual path to the GTZAN dataset
data_path = './data/genres_original/'
genres = os.listdir(data_path)

X = []
y = []

for genre in genres:
    genre_path = os.path.join(data_path, genre)
    if not os.path.isdir(genre_path):
        continue
    for file in os.listdir(genre_path):
        if file.endswith('.wav'):
            file_path = os.path.join(genre_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(genre)

# Convert to DataFrame
X_df = pd.DataFrame(X)
y = pd.Series(y)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, xticklabels=model.classes_, yticklabels=model.classes_)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()