# BeatBot
## Find the Beat, Discover the Genre


### Step 1: Setting Up Google Colab and Installing Libraries
Installing all libraries necessary for the project.

In [27]:
# Install the required libraries
!pip install librosa
!pip install scikit-learn
!pip install matplotlib



In [28]:
pip install gradio==3.16.2



### Step 2: Import Libraries and Load Dataset
We'll import the necessary libraries and prepare to use the GTZAN dataset for training. The GTZAN dataset can be downloaded from the UCI repository or Kaggle.

In [29]:
from google.colab import drive
drive.mount('/content/drive')

import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
import os

# Google Drive Path
DATASET_PATH = "/root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original"

# Genres in the dataset
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("andradaolteanu/gtzan-dataset-music-genre-classification")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1


In [31]:
import os

if os.path.exists(DATASET_PATH):
    print("Dataset path found.")
    print("Genres:", os.listdir(DATASET_PATH))
else:
    print("Dataset path not found.")

Dataset path found.
Genres: ['metal', 'jazz', 'country', 'rock', 'classical', 'hiphop', 'disco', 'blues', 'pop', 'reggae']


### Step 3: Feature Extraction Using Librosa
Now we will extract audio features such as MFCCs, tempo, and spectral features from the songs.

In [32]:
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, duration=30)  # Load 30 seconds of the track

    # Extract features
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)

    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma.T, axis=0)

    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_mean = np.mean(mel.T, axis=0)

    return np.hstack((mfccs_mean, chroma_mean, mel_mean))

# Prepare the dataset
features = []
labels = []

for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)

    if not os.path.exists(genre_path):
        print(f"Genre folder not found: {genre}")
        continue  # Skip this genre if folder is missing

    for file in os.listdir(genre_path):
        if file.endswith('.wav'):
            file_path = os.path.join(genre_path, file)
            print(f"Processing file: {file_path}")
            try:
                data = extract_features(file_path)
                features.append(data)
                labels.append(genre)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

# Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/blues/blues.00080.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/blues/blues.00016.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/blues/blues.00048.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/blues/blues.00015.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/blues/blues.00044.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/blues/blues.00059.wav
Processing file: /root/.cache/kagglehub/datase

  y, sr = librosa.load(file_path, duration=30)  # Load 30 seconds of the track
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing file /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/jazz/jazz.00054.wav: 
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/jazz/jazz.00017.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/jazz/jazz.00079.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/jazz/jazz.00078.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/jazz/jazz.00010.wav
Processing file: /root/.cache/kagglehub/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/versions/1/Data/genres_original/jazz/jazz.00038.wav
Processing file: /root/.cache/kagglehub/datasets/an

## Step 4: Save the Trainned KNN Model


In [33]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Evaluate the model
y_pred = knn.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print(classification_report(y_test, y_pred))

Accuracy: 56.00%
              precision    recall  f1-score   support

       blues       0.65      0.62      0.63        21
   classical       0.67      0.83      0.74        12
     country       0.42      0.46      0.44        24
       disco       0.42      0.45      0.43        22
      hiphop       0.43      0.40      0.41        15
        jazz       0.57      0.63      0.60        27
       metal       0.73      0.61      0.67        18
         pop       0.60      0.79      0.68        19
      reggae       0.70      0.64      0.67        22
        rock       0.45      0.25      0.32        20

    accuracy                           0.56       200
   macro avg       0.56      0.57      0.56       200
weighted avg       0.56      0.56      0.55       200



In [34]:
import joblib

# Save the trained model to a file
joblib.dump(knn, 'knn_model.pkl')

print("Model saved successfully.")

Model saved successfully.


In [35]:
# Load the model
loaded_knn = joblib.load('knn_model.pkl')

# Test the loaded model
print("Loaded model successfully.")
print(loaded_knn)

Loaded model successfully.
KNeighborsClassifier()


## Step 5: Make the GUI


In [36]:
!pip install gradio



In [43]:
pip install gradio --upgrade

Collecting gradio
  Using cached gradio-5.9.0-py3-none-any.whl.metadata (16 kB)
Using cached gradio-5.9.0-py3-none-any.whl (57.2 MB)
Installing collected packages: gradio
  Attempting uninstall: gradio
    Found existing installation: gradio 3.16.2
    Uninstalling gradio-3.16.2:
      Successfully uninstalled gradio-3.16.2
Successfully installed gradio-5.9.0


In [None]:
import gradio as gr
import librosa
import numpy as np
import joblib
import os

# Load the trained KNN model
try:
    model = joblib.load('knn_model.pkl')
except FileNotFoundError:
    raise FileNotFoundError("Ensure the file 'knn_model.pkl' is in the working directory.")

# Feature extraction function
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, duration=30)
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        return y, sr, np.hstack((mfccs, chroma, mel))
    except Exception as e:
        return None, None, f"Error in feature extraction: {str(e)}"

# Stats extraction function
def extract_song_stats(file_path):
    try:
        y, sr = librosa.load(file_path, duration=30)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr).T, axis=0)
        return float(tempo), float(spectral_centroid), float(spectral_rolloff)
    except Exception as e:
        return None, f"Error in extracting stats: {str(e)}", None

# Prediction function
def predict_genre(file_path):
    # Debug: Check if file_path is valid
    if not os.path.isfile(file_path):
        return "Error: File not found or uploaded incorrectly.", "No stats available."

    # Extract features and stats
    y, sr, features = extract_features(file_path)
    if features is None:
        return "Error in processing the audio file.", "No stats available."

    tempo, spectral_centroid, spectral_rolloff = extract_song_stats(file_path)
    if tempo is None or isinstance(tempo, str):  # If error occurred in stats extraction
        return "Error in extracting song statistics.", spectral_centroid

    # Predict the genre
    try:
        prediction = model.predict(features.reshape(1, -1))[0]
    except Exception as e:
        return f"Error in prediction: {str(e)}", "No stats available."

    # Ensure tempo and stats are formatted correctly
    stats = (
        f"Tempo: {tempo:.2f} BPM\n"
        f"Spectral Centroid: {spectral_centroid:.2f} Hz\n"
        f"Spectral Rolloff: {spectral_rolloff:.2f} Hz"
    )
    return f"The predicted genre is: {prediction}", stats


# Gradio Interface
def create_interface():
    return gr.Interface(
        fn=predict_genre,
        inputs=gr.Audio(type="filepath"),
        outputs=[
            gr.Textbox(label="Predicted Genre"),
            gr.Textbox(label="Song Statistics"),
        ],
        title="BeatBot",
        description="Upload a song (in .wav format) to classify its genre and discover its stats.",
        css="""
            body { background-color: #000000; color: white; }
            .gradio-container { background-color: #000000; }
            .gradio-button { background-color: #4260f5; border-color: #4260f5; }
            .gradio-button:hover { background-color: #2841c3; }
            .gradio-title { color: #4260f5; font-weight: bold; font-size: 24px; }
        """,
    )

# Launch the interface
interface = create_interface()
interface.launch(debug=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://2e8a3b3fc4b22a8b27.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  return float(tempo), float(spectral_centroid), float(spectral_rolloff)
  return float(tempo), float(spectral_centroid), float(spectral_rolloff)
