In [1]:
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
from secrets import *

In [2]:
#Authentication
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

### Functions

In [3]:
def get_features(dataframe, 
                column, 
                links = [],
                danceability = [],
                energy = [],
                loudness = [],
                speechiness = [],
                acousticness = [],
                instrumentalness = [],
                liveness = [],
                valence = [],
                tempo = []):

    for link in dataframe[column]:  
            
             connection = sp.audio_features(link)[0]

             links.append(link)
             danceability.append(connection["danceability"])
             energy.append(connection["energy"])
             loudness.append(connection["loudness"])
             speechiness.append(connection["speechiness"])
             acousticness.append(connection["acousticness"])
             instrumentalness.append(connection["instrumentalness"])
             liveness.append(connection["liveness"])
             valence.append(connection["valence"])
             tempo.append(connection["tempo"])
            
  
    features_df = pd.DataFrame({"song_url": links,
                                "danceability": danceability,
                                "energy": energy,
                                "loudness": loudness,
                                "speechiness": speechiness,
                                "acousticness": acousticness,
                                "instrumentalness": instrumentalness,
                                "liveness": liveness,
                                "valence": valence,
                                "tempo": tempo})
    return features_df

### Some custom searching

In [6]:
name = "070 Shake"
results = sp.search(q='artist:' + "Rex Orange County"+ " track:" + "Loving Is Easy (feat. Benny Sings)", type='track')

In [7]:
playlist_link = "https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF?si=1333723a6eff4b7f"
playlist_URI = playlist_link.split("/")[-1].split("?")[0]
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]

In [8]:
sp.playlist_tracks(playlist_URI)['items'][1]

{'added_at': '2022-12-16T11:02:52Z',
 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'},
  'href': 'https://api.spotify.com/v1/users/',
  'id': '',
  'type': 'user',
  'uri': 'spotify:user:'},
 'is_local': False,
 'primary_color': None,
 'track': {'album': {'album_type': 'single',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/5lpH0xAS4fVfLkACg9DAuM'},
     'href': 'https://api.spotify.com/v1/artists/5lpH0xAS4fVfLkACg9DAuM',
     'id': '5lpH0xAS4fVfLkACg9DAuM',
     'name': 'Wham!',
     'type': 'artist',
     'uri': 'spotify:artist:5lpH0xAS4fVfLkACg9DAuM'}],
   'available_markets': ['AD',
    'AE',
    'AG',
    'AL',
    'AM',
    'AO',
    'AR',
    'AT',
    'AU',
    'AZ',
    'BA',
    'BB',
    'BD',
    'BE',
    'BF',
    'BG',
    'BH',
    'BI',
    'BJ',
    'BN',
    'BO',
    'BR',
    'BS',
    'BT',
    'BW',
    'BY',
    'BZ',
    'CA',
    'CD',
    'CG',
    'CH',
    'CI',
    'CL',
    'CM',
    'CO',
    'C

### Downloading characteristics of songs from personal data

In [9]:
streaming_df = pd.read_csv('spotify_data/streaming_history_concat.csv', index_col=0)

In [10]:
plays_count = streaming_df.groupby("trackName")['artistName'].count().reset_index()

In [11]:
streaming_df = streaming_df.drop_duplicates(subset=['trackName'])
streaming_df.shape

(6432, 4)

In [12]:
streaming_df = streaming_df.merge(plays_count, on="trackName")

In [13]:
streaming_df = streaming_df.sort_values("artistName_y", ascending=False).iloc[1:200,]

In [14]:
streaming_df = streaming_df.rename(columns = {"artistName_x": "artistName", "artistName_y": "plays_count"})

In [15]:
songs_links = []
for index, song in streaming_df.iterrows():
    results = sp.search(q='artist:' + streaming_df["artistName"][index] + " track:" + streaming_df["trackName"][index], type='track')
    items = results['tracks']['items']

    if len(items) != 0:
        try:
            link = items[1]['href']
        except:
            link = items[0]['href']
    else:
        link = ""
    songs_links.append(link)

In [16]:
streaming_df['song_url'] = songs_links
streaming_df = streaming_df[streaming_df['song_url'] != ""] 

In [17]:
features = get_features(streaming_df, "song_url")

Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/70AYiGbc4mWZGEqiipBBDb
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/6slWOE0SO6HjBH0fNd13YB
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4n1WwhKzKHUX598tvU1wMu
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4Dvkj6JhhA12EX05fT7y2e
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/6sQ1IeoCqOF3RjpCitYDWq
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/6Ucrht7JfguIXoa4hF9Leo
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4XBIzFEVvF4stC7E6IigLl
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4k6Uh1HXdhtusDW5y8Gbvy
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4ydmav4vl7hebadtfZtMrg
Expected id of type track but found type tracks https:/

In [18]:
features.shape

(198, 10)

In [19]:
streaming_df = streaming_df.merge(features, on="song_url")
streaming_df = streaming_df.drop(["endTime", "song_url", "sec_played", "plays_count"], axis=1)
streaming_df.to_csv("spotify_data/personal_data_to_recommend")


In [21]:
streaming_df.head()

Unnamed: 0,artistName,trackName,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,ROSALÍA,CANDY,0.638,0.49,-5.726,0.226,0.885,0.0116,0.156,0.403,179.905
1,070 Shake,Skin and Bones,0.613,0.633,-6.494,0.0292,0.0439,0.0,0.0839,0.334,115.98
2,ROSALÍA,SAOKO,0.835,0.772,-5.711,0.247,0.793,2e-05,0.515,0.683,100.034
3,Harry Styles,As It Was,0.52,0.731,-5.338,0.0557,0.342,0.00101,0.311,0.662,173.93
4,Yung Lean,Trip,0.531,0.906,-6.437,0.0376,0.000922,0.406,0.198,0.512,174.006


### Building recommendation database

In [23]:
playlist_URL = "spotify:playlist:54A6wGeGp7yAra5hwK6xHq"

In [28]:
def call_playlist(creator, playlist_id):

    playlist_features_list = ["artist",
                              "track_name",  
                              "danceability",
                              "energy",
                              "loudness",
                              "speechiness", 
                              "acousticness", 
                              "instrumentalness",
                              "liveness",
                              "valence",
                              "tempo"]
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)

    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:

        # Create empty dict
        playlist_features = {}

        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)

    
    return playlist_df

In [29]:
first_df = call_playlist("spotify", "spotify:playlist:54A6wGeGp7yAra5hwK6xHq")
second_df = call_playlist("spotify", "spotify:playlist:3IsxzDS04BvejFJcQ0iVyW")
third_df = call_playlist("spotify", "spotify:playlist:37i9dQZEVXcEQgVh36QNFV")
fourth_df = call_playlist("spotify", "spotify:playlist:37i9dQZF1DX0YKekzl0blG")
fifth_df = call_playlist("spotify", "spotify:playlist:37i9dQZF1DX0YKekzl0blG")
sixth_df = call_playlist("spotify", "spotify:playlist:1coYrjao0tn6XY4HA6AXWV")
recommender_df = pd.concat([first_df, second_df, third_df, fourth_df, fifth_df, sixth_df], axis=0)

In [30]:
recommender_df.head()

Unnamed: 0,artist,track_name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,album,track_id
0,Sufjan Stevens,Back To Oz,,,-10.461,0.0314,0.528,0.00118,0.131,0.441,170.068,A Beginner's Mind,6s0F09N7jp4wSqN8vVgMQR
1,black midi,Diamond Stuff,,,-14.987,0.0274,0.287,0.926,0.146,0.105,134.984,Cavalcade,1oqeQqO2AWG0uyPZbGlqXt
2,black midi,Dethroned,,,-8.605,0.0712,0.13,0.549,0.64,0.232,144.909,Cavalcade,2tk6DxeaWPqjidof2LtGcL
3,Sufjan Stevens,Lady Macbeth In Chains,,,-13.346,0.0526,0.912,0.000734,0.0872,0.134,204.157,A Beginner's Mind,0W2a7Ga9auLPDDPQp11iDO
4,Dean Blunt,WOOSAH,,,-13.929,0.0305,0.544,0.869,0.104,0.16,147.895,BLACK METAL 2,5eKOyf0i14QZHCD6mbwkej


In [32]:
recommender_df.to_csv("spotify_data/playlists_data_to_recommend.csv")