In [1]:
from dotenv import load_dotenv

load_dotenv()

import os

import pandas as pd
import numpy as np
import spotipy
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

pd.set_option("display.max_columns", 100)

In [2]:
scope = "user-top-read, user-library-read"

token = spotipy.util.prompt_for_user_token(
    os.environ["SPOTIFY_USERNAME"],
    scope,
    client_id=os.environ["SPOTIFY_CLIENT_ID"],
    client_secret=os.environ["SPOTIFY_CLIENT_SECRET"],
    redirect_uri=os.environ["SPOTIFY_REDIRECT_URL"],
)

sp = spotipy.Spotify(auth=token)

In [3]:
def download_tracks(download_func, items_per_page=25, pages=40):
    dfs = []
    for i in tqdm(list(range(pages))):
        items = download_func(limit=items_per_page, offset=i * items_per_page)['items']
    
        if len(items) == 0:
            break
            
        if 'track' in items[0]:
            tracks = [item['track'] for item in items]
        else:
            tracks = items

        tracks_data = [
            {
                'song_id': track['id'],
                'name': track['name'],
                'artists': ", ".join(artist['name'] for artist in track['artists']),
                'popularity': track['popularity'],
                'explicit': track['explicit'],
                'duration_ms': track['duration_ms'],
            }
            for track in tracks
        ]
        df_tracks = pd.DataFrame(tracks_data)

        df_features = (
            pd.DataFrame(sp.audio_features(df_tracks.song_id))
            .drop(columns=["type", "track_href", "analysis_url", "duration_ms", "uri"])
            .rename(columns={"id": "song_id"})
        )
        dfs.append(df_tracks.merge(df_features, on="song_id"))

    return pd.concat(dfs, ignore_index=True)

In [4]:
df = download_tracks(sp.current_user_saved_tracks)

  0%|          | 0/40 [00:00<?, ?it/s]

In [5]:
df.head()

Unnamed: 0,song_id,name,artists,popularity,explicit,duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,2I4u1DU7rCijbtR2F3dgx6,So Far So Good,Great Good Fine Ok,50,False,171290,0.777,0.809,8,-4.611,1,0.0802,0.0253,7e-06,0.296,0.913,110.006,4
1,3p2wS6G159mBIU50xl7uvc,Future Starts Now,Kim Petras,54,False,279270,0.693,0.853,6,-4.241,1,0.0503,0.00329,0.000193,0.123,0.432,119.948,4
2,6Mb0OgMvbb7FYQejZ6rusz,Lonely Ones,LOVA,61,True,187552,0.643,0.407,2,-7.36,1,0.102,0.613,0.0,0.147,0.47,143.786,4
3,7cQ0LAxT5HHCLGbxcjaxac,"Arcángel: Bzrp Music Sessions, Vol. 54","Bizarrap, Arcángel",89,False,223019,0.716,0.78,11,-3.495,0,0.297,0.55,0.0,0.146,0.786,123.856,4
4,6BvLxVbjHqWgDqjhVRbTaZ,Prisionero,"Miranda!, Cristian Castro",63,False,235520,0.738,0.716,1,-5.705,1,0.0368,0.0863,0.000948,0.0705,0.964,116.001,4


In [6]:
df_top = download_tracks(sp.current_user_top_tracks)

  0%|          | 0/40 [00:00<?, ?it/s]

In [7]:
df_top.head()

Unnamed: 0,song_id,name,artists,popularity,explicit,duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,2I4QyfxbaYASskhchbxsDw,Tennis Club,Talltale,40,False,201930,0.735,0.619,0,-6.76,1,0.0278,0.00482,0.0081,0.371,0.754,119.985,4
1,5CZ40GBx1sQ9agT82CLQCT,traitor,Olivia Rodrigo,86,False,229226,0.38,0.339,3,-7.885,1,0.0338,0.691,0.0,0.12,0.0849,100.607,4
2,0b18g3G5spr4ZCkz7Y6Q0Q,Rasputin,"Majestic, Boney M.",80,False,186209,0.758,0.913,6,-2.926,0,0.113,0.00364,6.9e-05,0.193,0.658,128.04,4
3,0MQKiRIilttk9ZpMp98WWP,Mis Nochecitas en San Telmo,AllCARAZ,11,False,209473,0.623,0.812,2,-7.113,1,0.0275,0.0773,6e-06,0.202,0.671,113.994,4
4,5U1OEoEW4xnalSAToamwZL,Did you know that there's a tunnel under Ocean...,Lana Del Rey,71,True,285050,0.431,0.34,0,-9.384,1,0.0311,0.794,0.0108,0.12,0.0529,119.877,4


In [52]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [77]:
feature_cols = df.select_dtypes(include=[float]).columns
feature_cols

Index(['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo'],
      dtype='object')

In [78]:
normalized_df = StandardScaler().fit_transform(df[feature_cols])

In [79]:
indices = pd.Series(df.index, index=df.name)

In [80]:
def generate_recommendation(song_title, model_type, n=10):
    """
    Purpose: Function for song recommendations
    Inputs: song title and type of similarity model
    Output: Pandas series of recommended songs
    """
    index = indices[song_title]
    score = enumerate(model_type[index])
    similarity_score = sorted(score, key=lambda x: x[1], reverse=True)
    similarity_score = similarity_score[0:n+1]
    top_songs_index = [i[0] for i in similarity_score]
    top_songs_score = [i[1] for i in similarity_score]
    return df.iloc[top_songs_index].assign(score=top_songs_score)

In [81]:
generate_recommendation('Tennis Club', model_type=cosine_similarity(normalized_df))

Unnamed: 0,song_id,name,artists,popularity,explicit,duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,score
44,2I4QyfxbaYASskhchbxsDw,Tennis Club,Talltale,40,False,201930,0.735,0.619,0,-6.76,1,0.0278,0.00482,0.0081,0.371,0.754,119.985,4,1.0
451,57yL3161hUMuw06zzzUCHi,Doubt,Twenty One Pilots,69,False,191493,0.719,0.645,2,-6.141,0,0.0358,0.121,0.00316,0.362,0.69,123.02,4,0.978675
785,26K3adqoohq5BQh1K3qhNY,Raise Your Glass (feat. Darren Criss),"Glee Cast, Darren Criss",47,False,198466,0.756,0.659,4,-5.593,1,0.0709,0.135,0.0,0.355,0.861,122.022,4,0.908518
635,1SOClUWhOi8vHZYMz3GluK,Infinity,Jaymes Young,84,False,237720,0.671,0.673,11,-6.941,0,0.0444,0.148,5.3e-05,0.304,0.494,121.963,4,0.902142
907,3GpbwCm3YxiWDvy29Uo3vP,Right Round,Flo Rida,77,False,204640,0.72,0.672,7,-6.852,1,0.0551,0.009,0.0,0.232,0.705,124.986,4,0.900004
884,3MjUtNVVq3C8Fn0MP3zhXa,...Baby One More Time,Britney Spears,80,False,211066,0.759,0.699,0,-5.745,0,0.0307,0.202,0.000131,0.443,0.907,92.96,4,0.892944
603,0uUNN1nSoUx1A4fkDCWDQ7,So Am I,Ava Max,1,False,183026,0.682,0.656,6,-4.67,1,0.0435,0.0737,0.0,0.353,0.607,130.089,4,0.875664
543,7aar99etqYOCluEnA2aNhr,Waiting Tables,Fly By Midnight,15,False,186193,0.832,0.543,5,-9.06,1,0.0576,0.0539,5.3e-05,0.336,0.658,120.048,4,0.859535
271,4atS6abF89basSrPrOQ0hT,NORTE,Lucas & The Woods,7,False,267025,0.725,0.524,5,-7.791,1,0.0303,0.0385,0.0,0.332,0.547,102.96,4,0.85574
610,3nOMGGeQppHhfrkQ65VLVQ,White Noise,"Disclosure, AlunaGeorge",1,False,340268,0.707,0.881,10,-7.295,0,0.0525,0.00572,0.00151,0.353,0.854,120.0,4,0.844448
