In [1]:
import pandas as pd
import spotipy

In [2]:

from spotipy.oauth2 import SpotifyClientCredentials
cid = ''
secret = ''

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

<h2>Extracting features from the playlist songs</h2>

In [22]:
playlist_link = "https://open.spotify.com/playlist/52go3BbUEoIhc6dpAtJOw8?si=a0638ca764644872"
playlist_URI = playlist_link.split("/")[-1].split("?")[0]
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]

In [23]:

track_id = []
track_name = [] 
artist_uri = []
artist_name = []
artist_pop = []
artist_genres = []
album = []
track_pop = []

for track in sp.playlist_tracks(playlist_URI)["items"]:
    #URI
    track_id.append(track["track"]["id"])
    
    #Track name
    track_name.append(track["track"]["name"])
    
    #Main Artist
    artist_uri = track["track"]["artists"][0]["uri"]
    artist_info = sp.artist(artist_uri)
    
    #Name, popularity, genre
    artist_name.append(track["track"]["artists"][0]["name"])
    artist_pop.append(artist_info["popularity"])

    artist_genres.append(artist_info["genres"])
    
    #Album
    album.append(track["track"]["album"]["name"])
    
    #Popularity of the track
    track_pop.append(track["track"]["popularity"])


In [26]:
import pandas as pd
track_dataframe = pd.DataFrame({'artist_name' : artist_name, 'track_name' : track_name, 'track_id' : track_id, 'popularity' : track_pop, 'artist_genre': artist_genres})
print(track_dataframe.shape)
track_dataframe.head()

(100, 5)


Unnamed: 0,artist_name,track_name,track_id,popularity,artist_genre
0,Tiësto,Red Lights,5L2l7mI8J1USMzhsmdjat9,70,"[big room, brostep, dance pop, dutch edm, edm,..."
1,Mike Posner,I Took A Pill In Ibiza - Seeb Remix,0vbtURX4qv1l7besfwmnD8,79,"[dance pop, edm, pop, pop rap]"
2,Kid Cudi,Pursuit Of Happiness - Extended Steve Aoki Remix,5PX4uS1LqlWEPL69phPVQQ,78,"[hip hop, ohio hip hop, rap]"
3,Axwell /\ Ingrosso,More Than You Know,6h5PAsRni4IRlxWr6uDPTP,82,"[dance pop, edm, electro house, pop, pop dance..."
4,Avicii,I Could Be The One (Avicii Vs. Nicky Romero) -...,1sh6lL6cmlcwhqZKGiKBua,70,"[dance pop, edm, pop, pop dance]"


In [33]:
def create_data(track_dataframe):

    features = sp.audio_features(track_dataframe['track_id'].tolist())
    audio_features = pd.DataFrame(features)
    audio_features.insert(0, 'track_name', track_dataframe['track_name'])
    audio_features.insert(1, 'artist_name', track_dataframe['artist_name'])
    audio_features.insert(2, 'artist_genre', track_dataframe['artist_genre'])
    audio_features.insert(3, 'popularity', track_dataframe['popularity'])
    audio_features = audio_features.drop(['uri', 'track_href', 'analysis_url', 'type'], axis = 1)
    return audio_features

In [34]:

create_data(track_dataframe).to_csv('outputs/playlist_features.csv', index = False)

<h2> Creating our search space </h2>

This database is where we are going to search for songs to recommend to our playlist. First I search for songs which their albums were released between 2018-2021, this is because the API does not gives information about the release of the song itself.

In [14]:
artist_name = []
track_name = []
popularity = []
track_id = []
artist_uri = []

for i in range(0,1000,50):
    track_results = sp.search(q='year:2018-2021', type='track', limit=50,offset=i)
    for i, t in enumerate(track_results['tracks']['items']):
        

        artist_name.append(t['artists'][0]['name'])
        artist_ur = t['artists'][0]['uri']
        artist_uri.append(artist_ur)
        


        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])

Now I will use the artist_uri field to retrieve data about the genres of the artist, since the API does not gives information about the genres of the track itself. I separated the steps because we have a bigger dataset than our playlist, and the API would limit my requests if I were to make them in less than a 30 seconds window.

In [15]:
artist_genre = []
for i in artist_uri:
    
    artist_info = sp.artist(i)
    artist_genre.append(artist_info['genres'])
    

Let's take a look on our dateset. I notice that for artist_genre we have some missing data, I will address this issue ahead.

In [16]:
track_dataframe = pd.DataFrame({'artist_name' : artist_name, 'track_name' : track_name, 'track_id' : track_id, 'popularity' : popularity, 'artist_genre' : artist_genre})
print(track_dataframe.shape)
track_dataframe.head()

(1000, 5)


Unnamed: 0,artist_name,track_name,track_id,popularity,artist_genre
0,Carolina Gaitán - La Gaita,We Don't Talk About Bruno,52xJxFP6TqMuO4Yt0eOkMz,96,[]
1,Kodak Black,Super Gremlin,4A8cWXxKfIL3lAyUDzXbCF,91,"[florida rap, hip hop, melodic rap, miami hip ..."
2,Jessica Darrow,Surface Pressure,760jhRscwGbIIe1m1IIQpU,92,[]
3,GAYLE,abcdefu,4fouWK6XVHhzl78KzQ1UjL,100,[modern alternative pop]
4,Imagine Dragons,Enemy (with JID) - from the series Arcane Leag...,1r9xUipOqoNwggBpENDsvJ,98,"[modern rock, rock]"


At last, I will request the track features, add them to the previous dataframe and drop columns that are useless for the recommendation system, like the track_uri, id and etc.

In [17]:

for i in range(0,10):
    features = sp.audio_features(track_dataframe['track_id'][i*100:(i+1)*100])
    if i == 0:
        audio_features = pd.DataFrame(features)
    else:
        audio_features_aux = pd.DataFrame(features)
        audio_features = pd.concat([audio_features, audio_features_aux], ignore_index= True)
        




In [19]:
audio_features.insert(0, 'track_name', track_dataframe['track_name'])
audio_features.insert(1, 'artist_name', track_dataframe['artist_name'])
audio_features.insert(2, 'artist_genre', track_dataframe['artist_genre'])
audio_features.insert(3, 'popularity', track_dataframe['popularity'])
audio_features = audio_features.drop(['uri', 'track_href', 'analysis_url', 'type'], axis = 1)


In [21]:
audio_features.to_csv('outputs/searchspace.csv', index = False)

Some audio features -> https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-analysis

#Espaço de busca

Para criar o sistema de recomendação, buscaremos