In [1]:
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import warnings
import os
warnings.filterwarnings('ignore')

In [2]:
def extract_features(file_path):
    """Extract audio features from a file using librosa"""
    try:
        # Load audio file
        audio, sr = librosa.load(file_path, duration=30)
        
        # Extract features
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        
        # Calculate statistics
        features = []
        for feature in [mfccs, spectral_centroid, chroma]:
            features.extend([
                np.mean(feature),
                np.std(feature),
                np.max(feature),
                np.min(feature)
            ])
            
        return features
    except Exception as e:
        print(f"Error extracting features from {file_path}: {str(e)}")
        return None

In [3]:
def process_data(data_path):
    features = []
    labels = []
    
    # Process each genre folder
    for genre in os.listdir(data_path):
        genre_path = os.path.join(data_path, genre)
        if os.path.isdir(genre_path):
            print(f"Processing {genre} files...")
            
            # Process each audio file in the genre folder
            for file_name in os.listdir(genre_path):
                if file_name.endswith('.wav'):
                    file_path = os.path.join(genre_path, file_name)
                    extracted_features = extract_features(file_path)
                    
                    if extracted_features:
                        features.append(extracted_features)
                        labels.append(genre)
    
    return np.array(features), np.array(labels)

In [4]:
def train_model(X, y):
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Initialize and train the model
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)
    
    # Evaluate the model
    y_pred = clf.predict(X_test)
    print("\nModel Performance:")
    print(classification_report(y_test, y_pred))
    
    return clf, X_test, y_test

In [5]:
def predict_genre(model, file_path):
    """Predict the genre of a single audio file"""
    features = extract_features(file_path)
    if features:
        features = np.array(features).reshape(1, -1)
        prediction = model.predict(features)
        probabilities = model.predict_proba(features)
        
        # Get top 3 predictions with probabilities
        top_3 = np.argsort(probabilities[0])[-3:][::-1]
        genres = model.classes_[top_3]
        probs = probabilities[0][top_3]
        
        return list(zip(genres, probs))
    return None

In [7]:
if __name__ == "__main__":
    # Replace with your dataset path
    DATA_PATH = "./datasets/SoundSynth/Data/genres_original"
    
    # Process all files
    print("Extracting features from audio files...")
    X, y = process_data(DATA_PATH)
    
    # Train the model
    print("\nTraining the model...")
    model, X_test, y_test = train_model(X, y)
    
    # Example prediction
    test_file = "/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/metal/metal.00009.wav"
    print("\nPredicting genre for test file...")
    predictions = predict_genre(model, test_file)
    
    if predictions:
        print("\nTop 3 predicted genres:")
        for genre, prob in predictions:
            print(f"{genre}: {prob:.2%}")


Extracting features from audio files...
Processing blues files...
Processing classical files...
Processing country files...
Processing disco files...
Processing hiphop files...
Processing jazz files...
Error extracting features from ./datasets/SoundSynth/Data/genres_original\jazz\jazz.00054.wav: 
Processing metal files...
Processing pop files...
Processing reggae files...
Processing rock files...

Training the model...

Model Performance:
              precision    recall  f1-score   support

       blues       0.57      0.76      0.65        21
   classical       0.63      1.00      0.77        12
     country       0.53      0.33      0.41        24
       disco       0.58      0.50      0.54        22
      hiphop       0.41      0.60      0.49        15
        jazz       0.70      0.52      0.60        27
       metal       0.72      0.72      0.72        18
         pop       0.79      0.79      0.79        19
      reggae       0.68      0.68      0.68        22
        rock    