# 🎧 Music Recommendation Using Audio Features


This notebook demonstrates a content-based music recommendation system using extracted audio features from `.wav` files.
It uses a pre-trained K-Nearest Neighbors (KNN) model to suggest songs similar to a user-uploaded track.


In [None]:

import os 
import pandas as pd
import numpy as np
import librosa
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import joblib
import matplotlib.pyplot as plt
import seaborn as sns


## 🔧 Load Model and Feature Data

In [None]:

FEATURE_CSV = "features.csv"
MODEL_PATH = "knn_model.joblib"
SCALER_PATH = "scaler.joblib"
FEATURE_COLS_PATH = "feature_cols.pkl"

scaler = joblib.load(SCALER_PATH)
knn = joblib.load(MODEL_PATH)
feature_cols = joblib.load(FEATURE_COLS_PATH)
df = pd.read_csv(FEATURE_CSV)


## 🎚 Feature Extraction

In [None]:

def extract_features(file_path):
    y, sr = librosa.load(file_path, mono=True)
    features = {
        "chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
        "rms": np.mean(librosa.feature.rms(y=y)),  
        "spectral_centroid": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
        "spectral_bandwidth": np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        "rolloff": np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
        "zero_crossing_rate": np.mean(librosa.feature.zero_crossing_rate(y)),
        "tempo": librosa.beat.tempo(y=y, sr=sr)[0],
    }

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    for i in range(1, 21):
        features[f"mfcc{i}"] = np.mean(mfccs[i - 1])

    return features


## 🤖 Recommend Similar Songs

In [None]:

def recommend(file_path):
    print("\n📥 Extracting features from uploaded song...")
    input_features = extract_features(file_path)
    input_df = pd.DataFrame([input_features])

    print("\n📊 Scaling features...")
    missing_cols = [col for col in feature_cols if col not in input_df.columns]
    if missing_cols:
        print(f"\n🚨 Missing features: {missing_cols}")
        print("✅ Available features:", input_df.columns.tolist())
        return

    input_scaled = scaler.transform(input_df[feature_cols])
    distances, indices = knn.kneighbors(input_scaled)

    print("\n🎵 Recommended Songs:")
    recommended_songs = df.iloc[indices[0]][["filename", "genre"]].copy()
    print(recommended_songs.to_string(index=False))

    sns.barplot(y='filename', x='genre', data=recommended_songs, hue='genre', dodge=False, palette='coolwarm')
    plt.title("Recommended Songs")
    plt.xlabel("Genre")
    plt.ylabel("Filename")
    plt.tight_layout()
    plt.show()


## 📂 Run the Recommendation

In [None]:

# Example (Replace with your actual path)
# path = "path/to/your/song.wav"
# recommend(path)
