<a href="https://colab.research.google.com/github/codermillat/Music-Recommender-System/blob/main/music_recommender_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Music Recommender System
A comprehensive music recommendation system using collaborative filtering, content-based, and popularity-based approaches.

In [1]:
# Install required packages
%pip install -q numpy pandas scikit-learn scikit-surprise matplotlib seaborn

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import Dataset, Reader, KNNWithMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Set Seaborn style directly
sns.set_style("whitegrid")
sns.set_palette("husl")

# Optional: If you want to use Matplotlib's built-in styles inspired by Seaborn
# plt.style.use('seaborn-whitegrid')

In [5]:
cd /content/

/content


In [None]:
# Load data
triplets = pd.read_csv('data/kaggle_visible_evaluation_triplets.txt',
                      sep='\t', names=['user_id', 'song_id', 'freq'])

tracks = pd.read_csv('data/unique_tracks.txt',
                     sep='<SEP>', names=['track_id', 'song_id', 'artist_name', 'release'])

# Merge data
df = pd.merge(triplets, tracks[['song_id', 'artist_name', 'release']],
              on='song_id', how='left')

print("Dataset Statistics:")
print(f"Total Users: {df['user_id'].nunique():,}")
print(f"Total Songs: {df['song_id'].nunique():,}")
print(f"Total Artists: {df['artist_name'].nunique():,}")

## 1. Popularity-Based Recommendations

In [None]:
def get_popular_songs(df, n=10):
    return df.groupby(['song_id', 'artist_name', 'release'])['freq'].sum()\
             .sort_values(ascending=False).head(n).reset_index()

popular_songs = get_popular_songs(df)
print("\nTop 10 Most Popular Songs:")
display(popular_songs)

## 2. Collaborative Filtering Recommendations

In [None]:
def get_collaborative_recommendations(df, user_id, n=5):
    reader = Reader()
    data = Dataset.load_from_df(df[['user_id', 'song_id', 'freq']], reader)

    algo = KNNWithMeans(k=50, sim_options={'name': 'cosine', 'user_based': True})
    trainset = data.build_full_trainset()
    algo.fit(trainset)

    # Get songs the user hasn't listened to
    user_songs = set(df[df['user_id'] == user_id]['song_id'])
    songs_to_predict = list(set(df['song_id']) - user_songs)

    # Get predictions
    predictions = [algo.predict(user_id, song_id) for song_id in songs_to_predict[:100]]
    predictions.sort(key=lambda x: x.est, reverse=True)

    results = []
    for pred in predictions[:n]:
        song_info = df[df['song_id'] == pred.iid].iloc[0]
        results.append({
            'Artist': song_info['artist_name'],
            'Song': song_info['release'],
            'Score': f"{pred.est:.2f}"
        })

    return pd.DataFrame(results)

# Get recommendations for a sample user
sample_user = df['user_id'].iloc[0]
collab_recommendations = get_collaborative_recommendations(df, sample_user)
print(f"\nRecommendations for user {sample_user}:")
display(collab_recommendations)

## 3. Content-Based Recommendations

In [None]:
def get_content_based_recommendations(df, song_id, n=5):
    # Create song features
    song_features = df.drop_duplicates('song_id').apply(
        lambda x: f"{x['artist_name']} {x['release']}",
        axis=1
    )

    # Calculate TF-IDF
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(song_features)

    # Calculate similarity
    idx = df[df['song_id'] == song_id].index[0]
    sim_scores = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()

    # Get similar song indices
    similar_indices = sim_scores.argsort()[-n-1:-1][::-1]

    return df.iloc[similar_indices][['artist_name', 'release']].drop_duplicates()

# Get recommendations for a sample song
sample_song = df['song_id'].iloc[0]
song_info = df[df['song_id'] == sample_song].iloc[0]
print(f"\nSimilar songs to {song_info['artist_name']} - {song_info['release']}:")
content_recommendations = get_content_based_recommendations(df, sample_song)
display(content_recommendations)