In [1]:
# !pip install spotipy
# !pip install pandas
# !pip install sklearn
# !pip install numpy
# !pip install IPython

In [2]:
# Import necessary libraries
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from IPython.display import display

In [3]:
# Spotify API credentials
SPOTIPY_CLIENT_ID = '871648fbcb65493bb9a19c943024e3e6'
SPOTIPY_CLIENT_SECRET = '8f1f47ca317a43468209a9a7be22b20a'
SPOTIPY_REDIRECT_URI = 'http://localhost:8080/callback'

In [4]:
# Authenticate and create a Spotify client
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
                                               client_secret=SPOTIPY_CLIENT_SECRET,
                                               redirect_uri=SPOTIPY_REDIRECT_URI,
                                               scope='user-top-read user-library-read'))

# Method: 1 (Very High Similarity score)

## Advantages and Disadvantages of using this method:

**`Advantage`:** 
- <p> It has a higer similarity score, so the songs are very likely to be of your same taste as of your regular favourite ones.</p>

**`Distadvantage`:** 
- It may recommend you the exact same song (it's remix, etc.)
- You might already have a knowledge to these recommended songs.

In [5]:
def fetch_top_tracks(sp, limit=10):
    top_tracks = sp.current_user_top_tracks(limit=limit, time_range='medium_term')
    track_data = []
    for track in top_tracks['items']:
        track_data.append({
            'track_id': track['id'],
            'track_name': track['name'],
            'artist': track['artists'][0]['name'],
            'popularity': track['popularity']
        })
    return pd.DataFrame(track_data)

In [6]:
def fetch_audio_features(sp, track_ids):
    # Ensure track_ids is a list of valid Spotify track IDs
    if not track_ids or not isinstance(track_ids, list):
        raise ValueError("track_ids must be a non-empty list of valid track IDs.")
    
    # Spotify API accepts up to 50 IDs at a time
    features = []
    for i in range(0, len(track_ids), 50):
        chunk = track_ids[i:i+50]
        try:
            response = sp.audio_features(chunk)
            features.extend(response)
        except Exception as e:
            print(f"Error fetching features for chunk {chunk}: {e}")
    
    return pd.DataFrame(features).dropna()

In [7]:
def recommend_similar_songs(top_tracks_data, num_recommendations=5):
    # Get audio features of top tracks
    top_features = fetch_audio_features(sp, top_tracks_data['track_id'].tolist())
    
    # Compute similarity matrix for top tracks with themselves
    similarity_matrix = cosine_similarity(top_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']], 
                                          top_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']])
    
    # Aggregate similarity scores (average) to recommend similar songs
    similarity_scores = similarity_matrix.mean(axis=0)
    
    # Add similarity scores to the top_tracks_data DataFrame
    top_tracks_data['similarity_score'] = similarity_scores
    
    # Sort and get top recommendations
    recommendations = top_tracks_data.sort_values(by='similarity_score', ascending=False)
    return recommendations.head(num_recommendations)

In [8]:
# Fetch top 10 tracks
top_tracks_data = fetch_top_tracks(sp, limit=10)

In [9]:
# Get recommendations based on similarity within top 10 tracks
recommended_songs = recommend_similar_songs(top_tracks_data)

In [10]:
def display_top_tracks(top_tracks_data):
    """Display top tracks data as a table in a Jupyter Notebook."""
    # Ensure 'top_tracks_data' is a DataFrame
    if isinstance(top_tracks_data, pd.DataFrame):
        # Display the DataFrame as a table
        display(top_tracks_data)
    else:
        print("The provided data is not a DataFrame.")

In [11]:
# Display recommendations
display_top_tracks(recommended_songs[['track_name', 'artist', 'similarity_score']])

Unnamed: 0,track_name,artist,similarity_score
5,Robbers,The 1975,0.999192
6,FEEL NOTHING,The Plot In You,0.999112
9,"Love Me Like You Do - From ""Fifty Shades Of Grey""",Ellie Goulding,0.999042
0,Somebody Else,The 1975,0.998821
3,Aaoge Tum Kabhi,The Local Train,0.998781


# Method: 2 (Comparatively Lower Similarity Score)

## Advantages and Disadvantages of using this method:

**`Advantage`:** 
- <p> It has  a lower similarity score, so the songs are similar but not to that extent it would recommend songs of more broader variety instead of the exact same songs (like it's remix, etc.). </p>

**`Distadvantage`:** 
- The recommended songs might be different to your taste to some extent

In [12]:
def fetch_top_tracks(sp, limit=10):
    top_tracks = sp.current_user_top_tracks(limit=limit, time_range='medium_term')
    track_data = []
    for track in top_tracks['items']:
        track_data.append({
            'track_id': track['id'],
            'track_name': track['name'],
            'artist': track['artists'][0]['name'],
            'popularity': track['popularity']
        })
    return pd.DataFrame(track_data)

In [13]:
def fetch_audio_features(sp, track_ids):
    # Ensure track_ids is a list of valid Spotify track IDs
    if not track_ids or not isinstance(track_ids, list):
        raise ValueError("track_ids must be a non-empty list of valid track IDs.")
    
    # Spotify API accepts up to 50 IDs at a time
    features = []
    for i in range(0, len(track_ids), 50):
        chunk = track_ids[i:i+50]
        try:
            response = sp.audio_features(chunk)
            features.extend(response)
        except Exception as e:
            print(f"Error fetching features for chunk {chunk}: {e}")
    
    return pd.DataFrame(features).dropna()

In [14]:
def fetch_all_songs_features(sp, limit=50):
    # Fetch a larger sample of songs (e.g., user's saved tracks)
    results = sp.current_user_saved_tracks(limit=limit)
    track_ids = [item['track']['id'] for item in results['items']]
    return fetch_audio_features(sp, track_ids)

In [15]:
def recommend_similar_songs(top_tracks_data, num_recommendations=5):
    # Get audio features of top tracks
    top_features = fetch_audio_features(sp, top_tracks_data['track_id'].tolist())
    
    # Compute similarity matrix for top tracks with themselves
    similarity_matrix = cosine_similarity(top_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']], 
                                          top_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']])
    
    # Aggregate similarity scores (average) to recommend similar songs
    similarity_scores = similarity_matrix.mean(axis=0)
    
    # Add similarity scores to the top_tracks_data DataFrame
    top_tracks_data['similarity_score'] = similarity_scores
    
    # Sort and get top recommendations
    recommendations = top_tracks_data.sort_values(by='similarity_score', ascending=False)
    return recommendations.head(num_recommendations)

In [16]:
def recommend_similar_songs(top_tracks_data, all_songs_features, num_recommendations=5):
    top_features = fetch_audio_features(sp, top_tracks_data['track_id'].tolist())
    
    # Compute similarity matrix between top tracks and all songs
    all_features = all_songs_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
    top_features_subset = top_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
    
    similarity_matrix = cosine_similarity(top_features_subset, all_features)
    
    # Aggregate similarity scores (mean) for each song in the broader dataset
    similarity_scores = similarity_matrix.mean(axis=0)
    
    # Add similarity scores to the all_songs_features DataFrame
    all_songs_features['similarity_score'] = similarity_scores
    
    # Exclude top tracks from recommendations
    recommendations = all_songs_features[~all_songs_features['id'].isin(top_tracks_data['track_id'])]
    return recommendations.sort_values(by='similarity_score', ascending=False).head(num_recommendations)





In [17]:
# Fetch top 10 tracks
top_tracks_data = fetch_top_tracks(sp, limit=10)

In [18]:
# Fetch features for a broader dataset of songs
all_songs_features = fetch_all_songs_features(sp, limit=50)

In [19]:
# Get recommendations based on similarity to top 10 tracks
recommended_songs = recommend_similar_songs(top_tracks_data, all_songs_features)

In [20]:
def fetch_track_details(sp, track_ids):
    tracks = sp.tracks(track_ids)
    track_details = []
    for track in tracks['tracks']:
        track_details.append({
            'id': track['id'],
            'name': track['name'],
            'artist': track['artists'][0]['name']
        })
    return pd.DataFrame(track_details)

In [21]:
# Fetch track details for recommended songs
recommended_songs_details = fetch_track_details(sp, recommended_songs['id'].tolist())

In [22]:
# Merge with similarity scores
recommended_songs_with_details = pd.merge(recommended_songs_details, recommended_songs, on='id')

In [23]:
def display_top_tracks(top_tracks_data):
    """Display top tracks data as a table in a Jupyter Notebook."""
    # Ensure 'top_tracks_data' is a DataFrame
    if isinstance(top_tracks_data, pd.DataFrame):
        # Display the DataFrame as a table
        display(top_tracks_data)
    else:
        print("The provided data is not a DataFrame.")

In [24]:
# Display recommendations
display_top_tracks(recommended_songs_with_details[['name', 'artist', 'similarity_score']])

Unnamed: 0,name,artist,similarity_score
0,Story of My Life,One Direction,0.999269
1,Payphone,Maroon 5,0.999228
2,SUPERPOWER,VALORANT,0.999211
3,Wild Ones (feat. Sia),Flo Rida,0.99917
4,Cupid - Twin Ver.,FIFTY FIFTY,0.999164
