<a href="https://www.kaggle.com/code/vijayadithyabk/music-genre-classification?scriptVersionId=217095084" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import os
import librosa
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Step 1: Function to Extract Audio Features
def extract_audio_features(file_path, duration=3, sr=22050):
    """
    Extracts features from an audio file.
    Args:
        file_path (str): Path to the audio file.
        duration (int): Duration of each audio segment in seconds.
        sr (int): Sampling rate.
    Returns:
        dict: Extracted audio features.
    """
    try:
        y, sr = librosa.load(file_path, sr=sr, duration=duration)
        features = {
            "filename": os.path.basename(file_path),
            "length": librosa.get_duration(y=y, sr=sr),
            "chroma_stft_mean": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
            "chroma_stft_var": np.var(librosa.feature.chroma_stft(y=y, sr=sr)),
            "rms_mean": np.mean(librosa.feature.rms(y=y)),
            "rms_var": np.var(librosa.feature.rms(y=y)),
            "spectral_centroid_mean": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
            "spectral_centroid_var": np.var(librosa.feature.spectral_centroid(y=y, sr=sr)),
            "spectral_bandwidth_mean": np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
            "spectral_bandwidth_var": np.var(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        }
        # Add MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        for i in range(1, 14):
            features[f"mfcc{i}_mean"] = np.mean(mfccs[i-1])
            features[f"mfcc{i}_var"] = np.var(mfccs[i-1])
        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [3]:
# Step 2: Generate Feature Files
def generate_feature_file(input_dir, output_csv, segment_duration=30):
    """
    Generate a CSV file containing audio features.
    Args:
        input_dir (str): Directory containing genre folders.
        output_csv (str): Path to save the feature CSV file.
        segment_duration (int): Duration of each audio segment in seconds.
    """
    features_list = []
    genres = os.listdir(input_dir)
    
    for genre in genres:
        genre_path = os.path.join(input_dir, genre)
        if not os.path.isdir(genre_path):
            continue
        
        for file in os.listdir(genre_path):
            file_path = os.path.join(genre_path, file)
            features = extract_audio_features(file_path, duration=segment_duration)
            if features:
                features["genre"] = genre
                features_list.append(features)
    
    features_df = pd.DataFrame(features_list)
    features_df.to_csv(output_csv, index=False)
    print(f"Feature file saved to {output_csv}")

# Example: Generate features for 30-sec and 3-sec segments
input_directory = "/kaggle/input/genres/genres"
generate_feature_file(input_directory, "features_30_sec.csv", segment_duration=30)
generate_feature_file(input_directory, "features_3_sec.csv", segment_duration=3)

Feature file saved to features_30_sec.csv
Feature file saved to features_3_sec.csv


In [4]:
# Step 3: Train a Classifier
# Load the feature file
features_df = pd.read_csv("features_3_sec.csv")

# Split data into features (X) and labels (y)
X = features_df.drop(columns=["filename", "genre", "length"])
y = features_df["genre"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train a Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Evaluate the model
y_pred = classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.59
Classification Report:
               precision    recall  f1-score   support

       blues       0.56      0.50      0.53        20
   classical       0.72      0.90      0.80        20
     country       0.50      0.50      0.50        20
       disco       0.46      0.55      0.50        20
      hiphop       0.60      0.45      0.51        20
        jazz       0.57      0.60      0.59        20
       metal       0.72      0.90      0.80        20
         pop       0.78      0.70      0.74        20
      reggae       0.57      0.60      0.59        20
        rock       0.31      0.20      0.24        20

    accuracy                           0.59       200
   macro avg       0.58      0.59      0.58       200
weighted avg       0.58      0.59      0.58       200



In [5]:
# Step 4: Predict Genre for New Audio Files
def predict_genre(file_path, classifier, feature_extraction_fn, duration=3):
    """
    Predicts the genre of an audio file.
    Args:
        file_path (str): Path to the audio file.
        classifier: Trained classifier model.
        feature_extraction_fn: Function to extract features.
        duration (int): Duration of audio segments for feature extraction.
    Returns:
        str: Predicted genre.
    """
    features = feature_extraction_fn(file_path, duration=duration)
    if not features:
        return "Error processing file"
    
    features_df = pd.DataFrame([features])
    features_df = features_df.drop(columns=["filename", "length"], errors="ignore")
    predicted_genre = classifier.predict(features_df)[0]
    return predicted_genre

# Example usage
file_to_predict = "/kaggle/input/genres/genres/metal/metal.00005.au"
predicted_genre = predict_genre(file_to_predict, classifier, extract_audio_features)
print(f"Predicted Genre: {predicted_genre}")

Predicted Genre: metal
