Questo codice implementa una demo per la classificazione automatica di brani musicali in base al genere. Per effettuare la classificazione, è necessario disporre dei file audio da analizzare localmente sul proprio dispositivo.

E' inoltre necessario importare i modelli salvati in drive

#LIBRERIE

In [None]:
!pip install Gradio

Collecting Gradio
  Downloading gradio-5.16.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from Gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from Gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from Gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from Gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from Gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from Gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from Gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from Gradio)
  Downloading ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [None]:
import joblib
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
import requests
import tensorflow as tf
from pydub import AudioSegment
import os

#DEMO

La logica consiste nel suddividere la canzone in segmenti in modo che le medie e le varianze delle caratteristiche siano paragonabili tra i segmenti. Ogni segmento viene quindi classificato in un genere musicale. Successivamente, per determinare il genere finale, si calcola la moda tra i generi assegnati ai segmenti, al fine di evitare problemi derivanti dall'uso di una probabilità media, che potrebbe non riflettere accuratamente il genere predominante nel brano

In [None]:
# Load models and scaler
scaler = joblib.load("/content/scaler (3).pkl")
models = {
    "Ensemble": joblib.load("/content/voting_classifier_model.pkl"),
    "RandomForest": joblib.load("/content/random_forest_model.pkl"),
    "KNN": joblib.load("/content/knn_model (3).pkl"),
    "SVM": joblib.load("/content/best_svm_model (2).pkl"),
    "XGBoost": joblib.load("/content/xgboost_best_model.pkl")
}

GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

def mp3_to_wav(mp3_path, wav_path):
    """ Convert MP3 to WAV format """
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format="wav")

def extract_features(y, sr, n_mfcc=13):
    """ Extract features from an audio segment """
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    rms = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    features = [
        np.mean(chroma), np.var(chroma),
        np.mean(rms), np.var(rms),
        np.mean(spectral_centroid), np.var(spectral_centroid),
        np.mean(spectral_bandwidth), np.var(spectral_bandwidth),
        np.mean(rolloff), np.var(rolloff),
        np.mean(zcr), np.var(zcr),
        librosa.beat.tempo(y=y, sr=sr)[0],
    ]
    for i in range(n_mfcc):
        features.append(np.mean(mfccs[i]))
        features.append(np.var(mfccs[i]))

    return np.array(features)

def split_audio(audio_file, segment_duration=3):
    """ Split the audio into segments of specified duration (in seconds) """
    if audio_file.endswith(".mp3"):
        wav_path = "/tmp/temp_audio.wav"
        mp3_to_wav(audio_file, wav_path)
        audio_file = wav_path

    y, sr = librosa.load(audio_file, sr=None)
    segment_samples = segment_duration * sr
    segments = [y[i:i + segment_samples] for i in range(0, len(y), segment_samples) if len(y[i:i + segment_samples]) == segment_samples]
    return segments, sr

def predict_genre(audio_file, classifier_name):
    try:
        segments, sr = split_audio(audio_file)
        if not segments:
            return None, None

        genre_counts = {genre: 0 for genre in GENRES}
        total_segments = len(segments)

        for segment in segments:
            features = extract_features(segment, sr)
            if len(features) != 39:
                return None, None

            features = np.array(features).reshape(1, -1)
            features = scaler.transform(features)

            model = models[classifier_name]
            predicted_genre_index = model.predict(features)[0]

            predicted_genre = GENRES[predicted_genre_index]
            genre_counts[predicted_genre] += 1

        # Find the top 3 most frequent genres
        sorted_genres = sorted(genre_counts.items(), key=lambda x: x[1], reverse=True)

        # Get the first, second, and third most frequent genres
        top_genres = sorted_genres[:3]

        # Calculate the percentage frequency of the top genre
        predicted_genre = top_genres[0][0]
        genre_count = top_genres[0][1]
        frequency_percentage = (genre_count / total_segments) * 100

        genre_labels = [genre for genre, _ in top_genres]
        genre_counts_vals = [count for _, count in top_genres]

        fig, ax = plt.subplots(figsize=(8, 5))

        fig.patch.set_facecolor('#2E2E2E')
        ax.set_facecolor('#2E2E2E')
        ax.barh(genre_labels, genre_counts_vals, color=['#FF6F61', '#56B4D3', '#68A89D'])

        ax.set_xlabel('Number of Segments', color='white', fontsize=12, fontweight='bold')
        ax.set_title('Top 3 Predicted Genres', color='white', fontsize=14, fontweight='bold')

        for i, v in enumerate(genre_counts_vals):
            ax.text(v + 0.2, i, f'{v}/{total_segments} ({(v / total_segments) * 100:.2f}%)',
                    va='center', color='white', fontsize=11, fontweight='normal')

        ax.invert_yaxis()

        ax.tick_params(axis='x', colors='white', labelsize=10)
        ax.tick_params(axis='y', colors='white', labelsize=10)

        plt.tight_layout()

        return predicted_genre, fig

    except Exception as e:
        return None, None

# Gradio app
demo = gr.Interface(
    fn=predict_genre,
    inputs=[gr.Audio(type="filepath"), gr.Dropdown(choices=list(models.keys()), label="Classifier")],
    outputs=[gr.Label(), gr.Plot()],
    title="Music Genre Classifier",
    description="Upload an audio file and select a classifier to get its genre prediction."
)

demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://013026c42af809fa50.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


