In [1]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from tqdm import tqdm
from time import sleep
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [2]:
# load in user json
with open('/media/jesse/Number3/json/Jesse.p.tao.json') as f:
    data = json.load(f)
user_df = pd.DataFrame(data['items'])
user_df.head()

Unnamed: 0,track,played_at,context
0,"{'album': {'album_type': 'album', 'artists': [...",2021-02-15T05:19:45.999Z,
1,"{'album': {'album_type': 'album', 'artists': [...",2021-02-15T05:19:26.252Z,
2,"{'album': {'album_type': 'single', 'artists': ...",2021-02-15T05:18:06.671Z,{'external_urls': {'spotify': 'https://open.sp...
3,"{'album': {'album_type': 'album', 'artists': [...",2021-02-15T01:58:55.395Z,
4,"{'album': {'album_type': 'single', 'artists': ...",2021-02-15T01:45:07.511Z,


In [3]:
user_df.iloc[0]['track']['artists'][0]['id']

'45ft4DyTCEJfQwTBHXpdhM'

In [4]:
track_url = []
track_id = []
track_name = []
artist_uri = []
artist_name = []
track_uri = []
popularity = []

for i in range(len(user_df)):
    track_url.append(user_df.iloc[i]['track']['href'])
    track_name.append(user_df.iloc[i]['track']['name'])
    track_uri.append('spotify:track:' + user_df.iloc[i]['track']['id'])
    artist_uri.append('spotify:artist:' + user_df.iloc[i]['track']['artists'][0]['id'])
    artist_name.append(user_df.iloc[i]['track']['artists'][0]['name'])
user_df['track_url'] = track_url
user_df['track_uri'] = track_uri
user_df['track_name'] = track_name
user_df['artist_uri'] = artist_uri
user_df['artist'] = artist_name

# setup spotify with app credentials, use environment variables so GitHub scrapers don't get access to my spotify developer keys
cid = os.getenv('SPOTIPY_CLIENT_ID')
secret = os.getenv('SPOTIPY_CLIENT_SECRET')

#https://medium.com/@maxtingle/getting-started-with-spotifys-api-spotipy-197c3dc6353b

client_credentials_manager = SpotifyClientCredentials(client_id = cid, 
                                                      client_secret = secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

for i in tqdm(range(len(user_df))):
     popularity.append(sp.artist(user_df.iloc[i]['artist_uri'])['popularity'])
     sleep(0.02)
user_df['popularity'] = popularity

user_df.head()

100%|██████████| 50/50 [00:03<00:00, 13.41it/s]


Unnamed: 0,track,played_at,context,track_url,track_uri,track_name,artist_uri,artist,popularity
0,"{'album': {'album_type': 'album', 'artists': [...",2021-02-15T05:19:45.999Z,,https://api.spotify.com/v1/tracks/22LRgvP6LBtm...,spotify:track:22LRgvP6LBtmN5SqGrK3O7,Drown,spotify:artist:45ft4DyTCEJfQwTBHXpdhM,milet,63
1,"{'album': {'album_type': 'album', 'artists': [...",2021-02-15T05:19:26.252Z,,https://api.spotify.com/v1/tracks/7HAkrW8JUYoV...,spotify:track:7HAkrW8JUYoVR9dXwb6Uku,ホタルノヒカリ,spotify:artist:5YneEA2nLtAhkD5t2769lZ,Ikimonogakari,70
2,"{'album': {'album_type': 'single', 'artists': ...",2021-02-15T05:18:06.671Z,{'external_urls': {'spotify': 'https://open.sp...,https://api.spotify.com/v1/tracks/3wWUkfEmfO6q...,spotify:track:3wWUkfEmfO6qvT1iIrrusD,Too Good to Be True,spotify:artist:5UI7y2lrDYhqcM7sdbTbQ9,xerLK,54
3,"{'album': {'album_type': 'album', 'artists': [...",2021-02-15T01:58:55.395Z,,https://api.spotify.com/v1/tracks/7HAkrW8JUYoV...,spotify:track:7HAkrW8JUYoVR9dXwb6Uku,ホタルノヒカリ,spotify:artist:5YneEA2nLtAhkD5t2769lZ,Ikimonogakari,70
4,"{'album': {'album_type': 'single', 'artists': ...",2021-02-15T01:45:07.511Z,,https://api.spotify.com/v1/tracks/15TaZdhznRBa...,spotify:track:15TaZdhznRBaKzyHLHA9ad,unravel (acoustic version) - Acoustic Version,spotify:artist:3B9O5mYYw89fFXkwKh7jCS,TK from Ling tosite sigure,68


In [5]:
user_df.drop(['track', 'played_at', 'context'], axis = 1, inplace = True)

In [6]:
user_df.tail()

Unnamed: 0,track_url,track_uri,track_name,artist_uri,artist,popularity
45,https://api.spotify.com/v1/tracks/2y7f8qkrgIqY...,spotify:track:2y7f8qkrgIqY5KsfPNcuix,Black Catcher,spotify:artist:0PYPjvZaOa7bsCq26JOX8d,Vickeblanka,67
46,https://api.spotify.com/v1/tracks/5CyHyrAW1RPi...,spotify:track:5CyHyrAW1RPip3ruorIWyo,ワガママMIRROR HEART,spotify:artist:74VIJfMSLnKe5eU3yvv2RT,大橋彩香,40
47,https://api.spotify.com/v1/tracks/2MGzJVW4N1Su...,spotify:track:2MGzJVW4N1SuPGdTlH0fSr,Orange (Shigatsu Wa Kimi No Uso) [Ending],spotify:artist:47bagukDdx1Oqq6aL9JlwE,Berioska,57
48,https://api.spotify.com/v1/tracks/3qWtzuw4ma3I...,spotify:track:3qWtzuw4ma3I3NJUnqHjOs,"Centimeter (Rent a Girlfriend: Kanojo, Okarish...",spotify:artist:1MQ48GaF9nanTisRGu3Mtl,Shayne Orok,67
49,https://api.spotify.com/v1/tracks/65tu16zi8H7f...,spotify:track:65tu16zi8H7fGexuPINxJp,"Centimeter (From ""Rent a Girlfriend: Kanojo, O...",spotify:artist:5ZcXM1KFH6hhLoBIdgCnap,Miura Jam,67


In [7]:
# load in new tracks csv
new_tracks = pd.read_csv('../data/tracks_with_popularity.csv')
new_tracks.tail()

Unnamed: 0,artist_uri,artist,track_uri,track_name,track_url,popularity
12864,spotify:artist:2qmLRGo4Y18XiEprn1Wu78,RazorbAck,spotify:track:1W08G148Tn2SD7lBHGlN9x,Dream on,https://open.spotify.com/track/1W08G148Tn2SD7l...,2
12865,spotify:artist:3SIvdi6XFCP2Jvz1b2NyKg,Casper Carl,spotify:track:5qPdQniC351yNubD80kWEZ,Bliv ved,https://open.spotify.com/track/5qPdQniC351yNub...,6
12866,spotify:artist:2sh7XdDAPLnUil4q8yZueM,Janela Lateral,spotify:track:4F8GdSzajsFVXtPjKcRwVA,O Que Move as Palavras,https://open.spotify.com/track/4F8GdSzajsFVXtP...,2
12867,spotify:artist:1MD5vC0fF8lpoi7ZFmc5wg,Jimmy Loefgren,spotify:track:3oaljuOlbX9A5LBoHjCgER,Soldyrkaren (Radio Edit),https://open.spotify.com/track/3oaljuOlbX9A5LB...,0
12868,spotify:artist:77lzsMghtGHTfOJeEDqsuG,リーマンマイク,spotify:track:5pd95TrUOMuIkv2zCpeB29,恋愛神,https://open.spotify.com/track/5pd95TrUOMuIkv2...,13


In [8]:
new_tracks = new_tracks[new_tracks['popularity'] <= 70]

In [9]:
# only need to get user features after getting new_track features once
new_features_list = []
user_features_list = []
for i in tqdm(range(len(new_tracks))):
    new_features_list.append(sp.audio_features(new_tracks.iloc[i]['track_uri'])[0])
    sleep(0.02)
for i in tqdm(range(len(user_df))):
    user_features_list.append(sp.audio_features(user_df.iloc[i]['track_uri'])[0])
    sleep(0.02)

100%|██████████| 12749/12749 [20:55<00:00, 10.15it/s] 
100%|██████████| 50/50 [00:06<00:00,  7.96it/s]


In [10]:
def is_empty(any_structure):
    if any_structure:
        return False
    else:
        print('Structure is empty.')
        return True

In [11]:
user_features_list = [i for i in user_features_list if is_empty(i) == False]
new_features_list = [i for i in new_features_list if is_empty(i) == False]

Structure is empty.
Structure is empty.
Structure is empty.
Structure is empty.
Structure is empty.
Structure is empty.
Structure is empty.
Structure is empty.
Structure is empty.


In [12]:
user_features_df = pd.DataFrame(user_features_list)
user_features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.437,0.61,9,-4.212,0,0.0329,0.0102,0.000132,0.396,0.351,179.921,audio_features,22LRgvP6LBtmN5SqGrK3O7,spotify:track:22LRgvP6LBtmN5SqGrK3O7,https://api.spotify.com/v1/tracks/22LRgvP6LBtm...,https://api.spotify.com/v1/audio-analysis/22LR...,211787,4
1,0.566,0.919,2,-1.901,0,0.0639,0.0666,0.0,0.34,0.55,146.99,audio_features,7HAkrW8JUYoVR9dXwb6Uku,spotify:track:7HAkrW8JUYoVR9dXwb6Uku,https://api.spotify.com/v1/tracks/7HAkrW8JUYoV...,https://api.spotify.com/v1/audio-analysis/7HAk...,242293,4
2,0.127,0.0838,0,-28.353,1,0.0417,0.946,0.933,0.0906,0.0298,146.325,audio_features,3wWUkfEmfO6qvT1iIrrusD,spotify:track:3wWUkfEmfO6qvT1iIrrusD,https://api.spotify.com/v1/tracks/3wWUkfEmfO6q...,https://api.spotify.com/v1/audio-analysis/3wWU...,165205,4
3,0.566,0.919,2,-1.901,0,0.0639,0.0666,0.0,0.34,0.55,146.99,audio_features,7HAkrW8JUYoVR9dXwb6Uku,spotify:track:7HAkrW8JUYoVR9dXwb6Uku,https://api.spotify.com/v1/tracks/7HAkrW8JUYoV...,https://api.spotify.com/v1/audio-analysis/7HAk...,242293,4
4,0.518,0.39,9,-6.083,1,0.0937,0.829,0.0,0.0729,0.311,84.092,audio_features,15TaZdhznRBaKzyHLHA9ad,spotify:track:15TaZdhznRBaKzyHLHA9ad,https://api.spotify.com/v1/tracks/15TaZdhznRBa...,https://api.spotify.com/v1/audio-analysis/15Ta...,228834,5


In [13]:
new_features_df = pd.DataFrame(new_features_list)
new_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.590,0.684,0,-6.088,1,0.3270,0.2530,0.000000,0.1380,0.674,79.940,audio_features,7502n73GiwIzGj60co3zyg,spotify:track:7502n73GiwIzGj60co3zyg,https://api.spotify.com/v1/tracks/7502n73GiwIz...,https://api.spotify.com/v1/audio-analysis/7502...,161236,1
1,0.743,0.540,4,-7.726,0,0.0355,0.7520,0.000608,0.1080,0.540,100.024,audio_features,5wdYNXwZUzBw2gkVpSkym5,spotify:track:5wdYNXwZUzBw2gkVpSkym5,https://api.spotify.com/v1/tracks/5wdYNXwZUzBw...,https://api.spotify.com/v1/audio-analysis/5wdY...,201679,4
2,0.603,0.726,8,-6.974,0,0.4330,0.1940,0.000000,0.1580,0.514,72.297,audio_features,78Sw37ULFaFajzAmyvKkTA,spotify:track:78Sw37ULFaFajzAmyvKkTA,https://api.spotify.com/v1/tracks/78Sw37ULFaFa...,https://api.spotify.com/v1/audio-analysis/78Sw...,334561,4
3,0.731,0.540,3,-5.975,0,0.0512,0.3610,0.000000,0.1600,0.313,141.955,audio_features,2vcLUuRT3sqXnPJaIX5w5c,spotify:track:2vcLUuRT3sqXnPJaIX5w5c,https://api.spotify.com/v1/tracks/2vcLUuRT3sqX...,https://api.spotify.com/v1/audio-analysis/2vcL...,236620,4
4,0.628,0.671,8,-9.186,1,0.1580,0.0130,0.000000,0.3330,0.240,142.777,audio_features,6nJLOMCzucqMhcXrBA3IDo,spotify:track:6nJLOMCzucqMhcXrBA3IDo,https://api.spotify.com/v1/tracks/6nJLOMCzucqM...,https://api.spotify.com/v1/audio-analysis/6nJL...,198970,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12735,0.565,0.739,9,-3.295,0,0.0656,0.5420,0.000000,0.1210,0.530,84.789,audio_features,1W08G148Tn2SD7lBHGlN9x,spotify:track:1W08G148Tn2SD7lBHGlN9x,https://api.spotify.com/v1/tracks/1W08G148Tn2S...,https://api.spotify.com/v1/audio-analysis/1W08...,218730,4
12736,0.698,0.316,6,-10.014,0,0.3850,0.5430,0.000000,0.0906,0.577,159.976,audio_features,5qPdQniC351yNubD80kWEZ,spotify:track:5qPdQniC351yNubD80kWEZ,https://api.spotify.com/v1/tracks/5qPdQniC351y...,https://api.spotify.com/v1/audio-analysis/5qPd...,228000,4
12737,0.387,0.463,2,-8.988,1,0.0311,0.6800,0.000002,0.1450,0.143,157.038,audio_features,4F8GdSzajsFVXtPjKcRwVA,spotify:track:4F8GdSzajsFVXtPjKcRwVA,https://api.spotify.com/v1/tracks/4F8GdSzajsFV...,https://api.spotify.com/v1/audio-analysis/4F8G...,272927,3
12738,0.543,0.571,9,-9.655,0,0.0307,0.1560,0.000013,0.3090,0.388,109.999,audio_features,3oaljuOlbX9A5LBoHjCgER,spotify:track:3oaljuOlbX9A5LBoHjCgER,https://api.spotify.com/v1/tracks/3oaljuOlbX9A...,https://api.spotify.com/v1/audio-analysis/3oal...,264000,4


In [14]:
# save features of new releases
new_features_df.to_csv('../data/new_track_features.csv', index = False)

In [15]:
new_features_df = pd.read_csv('../data/new_track_features.csv')

In [16]:
new_features_df.describe()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
count,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0,12740.0
mean,0.64493,0.60112,5.311695,-8.904479,0.556593,0.133305,0.296394,0.163637,0.1772,0.469918,121.65869,211611.2,3.937834
std,0.1613,0.221371,3.599736,4.547515,0.496806,0.135398,0.296285,0.315918,0.138351,0.246379,29.570142,161755.7,0.393124
min,0.0,0.000251,0.0,-40.374,0.0,0.0,2e-06,0.0,0.0128,0.0,0.0,34307.0,0.0
25%,0.545,0.456,2.0,-10.79225,0.0,0.0417,0.037375,0.0,0.0967,0.27,97.085,156986.2,4.0
50%,0.666,0.613,5.0,-8.0985,1.0,0.0691,0.188,2.9e-05,0.12,0.456,122.014,192000.0,4.0
75%,0.764,0.77,8.0,-5.98,1.0,0.183,0.502,0.058975,0.21,0.662,140.08325,231976.8,4.0
max,0.983,1.0,11.0,3.509,1.0,0.961,0.996,0.992,0.984,0.983,216.077,4883448.0,5.0


In [17]:
combined_features_df = pd.concat([new_features_df, user_features_df])
combined_features_df.reset_index(drop = True, inplace = True)
combined_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.590,0.684,0,-6.088,1,0.3270,0.25300,0.000000,0.1380,0.674,79.940,audio_features,7502n73GiwIzGj60co3zyg,spotify:track:7502n73GiwIzGj60co3zyg,https://api.spotify.com/v1/tracks/7502n73GiwIz...,https://api.spotify.com/v1/audio-analysis/7502...,161236,1
1,0.743,0.540,4,-7.726,0,0.0355,0.75200,0.000608,0.1080,0.540,100.024,audio_features,5wdYNXwZUzBw2gkVpSkym5,spotify:track:5wdYNXwZUzBw2gkVpSkym5,https://api.spotify.com/v1/tracks/5wdYNXwZUzBw...,https://api.spotify.com/v1/audio-analysis/5wdY...,201679,4
2,0.603,0.726,8,-6.974,0,0.4330,0.19400,0.000000,0.1580,0.514,72.297,audio_features,78Sw37ULFaFajzAmyvKkTA,spotify:track:78Sw37ULFaFajzAmyvKkTA,https://api.spotify.com/v1/tracks/78Sw37ULFaFa...,https://api.spotify.com/v1/audio-analysis/78Sw...,334561,4
3,0.731,0.540,3,-5.975,0,0.0512,0.36100,0.000000,0.1600,0.313,141.955,audio_features,2vcLUuRT3sqXnPJaIX5w5c,spotify:track:2vcLUuRT3sqXnPJaIX5w5c,https://api.spotify.com/v1/tracks/2vcLUuRT3sqX...,https://api.spotify.com/v1/audio-analysis/2vcL...,236620,4
4,0.628,0.671,8,-9.186,1,0.1580,0.01300,0.000000,0.3330,0.240,142.777,audio_features,6nJLOMCzucqMhcXrBA3IDo,spotify:track:6nJLOMCzucqMhcXrBA3IDo,https://api.spotify.com/v1/tracks/6nJLOMCzucqM...,https://api.spotify.com/v1/audio-analysis/6nJL...,198970,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12785,0.592,0.894,11,-4.710,0,0.0420,0.00358,0.000005,0.0721,0.642,100.026,audio_features,2y7f8qkrgIqY5KsfPNcuix,spotify:track:2y7f8qkrgIqY5KsfPNcuix,https://api.spotify.com/v1/tracks/2y7f8qkrgIqY...,https://api.spotify.com/v1/audio-analysis/2y7f...,196947,4
12786,0.395,0.953,8,-3.845,1,0.0914,0.04530,0.000006,0.1250,0.631,90.072,audio_features,5CyHyrAW1RPip3ruorIWyo,spotify:track:5CyHyrAW1RPip3ruorIWyo,https://api.spotify.com/v1/tracks/5CyHyrAW1RPi...,https://api.spotify.com/v1/audio-analysis/5CyH...,276200,4
12787,0.393,0.413,6,-7.238,1,0.0304,0.64000,0.000000,0.0761,0.247,85.820,audio_features,2MGzJVW4N1SuPGdTlH0fSr,spotify:track:2MGzJVW4N1SuPGdTlH0fSr,https://api.spotify.com/v1/tracks/2MGzJVW4N1Su...,https://api.spotify.com/v1/audio-analysis/2MGz...,349960,4
12788,0.593,0.844,2,-6.716,1,0.0465,0.00671,0.000477,0.0739,0.794,139.936,audio_features,3qWtzuw4ma3I3NJUnqHjOs,spotify:track:3qWtzuw4ma3I3NJUnqHjOs,https://api.spotify.com/v1/tracks/3qWtzuw4ma3I...,https://api.spotify.com/v1/audio-analysis/3qWt...,89196,4


In [18]:
combined_features_df.drop(['type', 'id', 'duration_ms', 'time_signature', 'track_href', 
                  'analysis_url'], axis = 1, inplace = True)
combined_features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri
0,0.59,0.684,0,-6.088,1,0.327,0.253,0.0,0.138,0.674,79.94,spotify:track:7502n73GiwIzGj60co3zyg
1,0.743,0.54,4,-7.726,0,0.0355,0.752,0.000608,0.108,0.54,100.024,spotify:track:5wdYNXwZUzBw2gkVpSkym5
2,0.603,0.726,8,-6.974,0,0.433,0.194,0.0,0.158,0.514,72.297,spotify:track:78Sw37ULFaFajzAmyvKkTA
3,0.731,0.54,3,-5.975,0,0.0512,0.361,0.0,0.16,0.313,141.955,spotify:track:2vcLUuRT3sqXnPJaIX5w5c
4,0.628,0.671,8,-9.186,1,0.158,0.013,0.0,0.333,0.24,142.777,spotify:track:6nJLOMCzucqMhcXrBA3IDo


In [19]:
compare_df = combined_features_df[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'mode']]
mms = MinMaxScaler()
compare_df_sc = mms.fit_transform(compare_df)
compare_df_sc = pd.DataFrame(compare_df_sc, columns = compare_df.columns)
compare_df_sc.head()

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,mode
0,0.600203,0.683921,0.781305,0.340271,0.254015,0.0,0.128913,0.685656,0.369961,1.0
1,0.755849,0.539885,0.743978,0.036941,0.75502,0.000613,0.098023,0.549339,0.462909,0.0
2,0.613428,0.725931,0.761115,0.450572,0.194778,0.0,0.149506,0.522889,0.334589,0.0
3,0.743642,0.539885,0.78388,0.053278,0.362449,0.0,0.151565,0.318413,0.656965,0.0
4,0.638861,0.670917,0.710708,0.164412,0.01305,0.0,0.329695,0.244151,0.660769,1.0


In [20]:
def compute_distance(a, b):
    normalize_a = tf.nn.l2_normalize(a, 1)
    normalize_b = tf.nn.l2_normalize(b, 1)
    distance = 1 - tf.matmul(normalize_a, normalize_b, transpose_b = True)
    return distance

In [21]:
%%time
distances = compute_distance(compare_df_sc, compare_df_sc)

CPU times: user 882 ms, sys: 875 ms, total: 1.76 s
Wall time: 1.74 s


In [22]:
%%time
cosine_similarity(compare_df_sc)

CPU times: user 5.87 s, sys: 18.8 s, total: 24.7 s
Wall time: 1.13 s


array([[1.        , 0.75983239, 0.8230966 , ..., 0.91092956, 0.9640331 ,
        0.96148194],
       [0.75983239, 1.        , 0.88242845, ..., 0.75263493, 0.72995186,
        0.73338118],
       [0.8230966 , 0.88242845, 1.        , ..., 0.65899406, 0.79243338,
        0.79999897],
       ...,
       [0.91092956, 0.75263493, 0.65899406, ..., 1.        , 0.85597416,
        0.84987196],
       [0.9640331 , 0.72995186, 0.79243338, ..., 0.85597416, 1.        ,
        0.99293134],
       [0.96148194, 0.73338118, 0.79999897, ..., 0.84987196, 0.99293134,
        1.        ]])

In [23]:
distances_df = pd.DataFrame(distances.numpy(), index = combined_features_df['uri'], columns = combined_features_df['uri'])

In [24]:
distances_df.tail()

uri,spotify:track:7502n73GiwIzGj60co3zyg,spotify:track:5wdYNXwZUzBw2gkVpSkym5,spotify:track:78Sw37ULFaFajzAmyvKkTA,spotify:track:2vcLUuRT3sqXnPJaIX5w5c,spotify:track:6nJLOMCzucqMhcXrBA3IDo,spotify:track:30g0Ka64eZ3PzTWqUtJ62M,spotify:track:5ZTsje1HfASFxiffjLoITe,spotify:track:0fS1gL1DkZQWn789DZDHoY,spotify:track:5YUTDeynvOp9Ig8EjPmlS5,spotify:track:1n2fs63LPxGUVQa27Y2Ddo,...,spotify:track:4YXlnwnK3yt5xLfF5Q8wKG,spotify:track:5hbg2YisSRgoGG85pl0g1F,spotify:track:3mScGCzxiXA9OaHdBeuk7O,spotify:track:21z1GxeHjfD3qkaF5CgCTO,spotify:track:2XpV9sHBexcNrz0Gyf3l18,spotify:track:2y7f8qkrgIqY5KsfPNcuix,spotify:track:5CyHyrAW1RPip3ruorIWyo,spotify:track:2MGzJVW4N1SuPGdTlH0fSr,spotify:track:3qWtzuw4ma3I3NJUnqHjOs,spotify:track:65tu16zi8H7fGexuPINxJp
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
spotify:track:2y7f8qkrgIqY5KsfPNcuix,0.204549,0.146591,0.0571,0.09157,0.2444,0.347444,0.262021,0.180926,0.381404,0.10251,...,0.188029,0.138925,0.197188,0.199078,0.013738,-2.220446e-16,0.170372,0.369325,0.1530785,0.1528069
spotify:track:5CyHyrAW1RPip3ruorIWyo,0.033408,0.301993,0.210785,0.263408,0.064306,0.183123,0.106158,0.045991,0.493019,0.239038,...,0.038035,0.014201,0.044082,0.019439,0.176695,0.1703722,0.0,0.128947,0.01890349,0.02237501
spotify:track:2MGzJVW4N1SuPGdTlH0fSr,0.08907,0.247365,0.341006,0.273379,0.119708,0.148842,0.023842,0.084009,0.270145,0.285739,...,0.140825,0.149789,0.141833,0.131175,0.356792,0.3693254,0.128947,0.0,0.1440258,0.150128
spotify:track:3qWtzuw4ma3I3NJUnqHjOs,0.035967,0.270048,0.207567,0.223335,0.05903,0.151692,0.092928,0.041549,0.463157,0.222473,...,0.043547,0.00368,0.054575,0.047802,0.147796,0.1530785,0.018903,0.144026,-2.220446e-16,0.007068661
spotify:track:65tu16zi8H7fGexuPINxJp,0.038518,0.266619,0.200001,0.214642,0.046184,0.154995,0.095687,0.041498,0.45977,0.20471,...,0.050056,0.003006,0.048934,0.036783,0.151688,0.1528069,0.022375,0.150128,0.007068661,1.110223e-16


In [25]:
distances_df.shape

(12790, 12790)

In [26]:
distances_df.loc['score'] = distances_df.tail(len(user_features_list)).sum()
distances_df.loc['score'][:-len(user_features_list)]

uri
spotify:track:7502n73GiwIzGj60co3zyg     7.835362
spotify:track:5wdYNXwZUzBw2gkVpSkym5    11.717342
spotify:track:78Sw37ULFaFajzAmyvKkTA     9.277060
spotify:track:2vcLUuRT3sqXnPJaIX5w5c     8.965936
spotify:track:6nJLOMCzucqMhcXrBA3IDo     7.817437
                                          ...    
spotify:track:1W08G148Tn2SD7lBHGlN9x     9.121804
spotify:track:5qPdQniC351yNubD80kWEZ    12.179532
spotify:track:4F8GdSzajsFVXtPjKcRwVA    11.602095
spotify:track:3oaljuOlbX9A5LBoHjCgER     7.485490
spotify:track:5pd95TrUOMuIkv2zCpeB29     6.199288
Name: score, Length: 12740, dtype: float64

In [27]:
distances_df.loc['score'][:-len(user_features_list)].sort_values()[0:5]

uri
spotify:track:7zY35cpncy3UYs1ZBfASGW    5.656621
spotify:track:0DT1nb0hytfdzS1J9piX1W    5.723181
spotify:track:4IvzoLnAzFneKNpikShYMu    5.769391
spotify:track:0J71CQRti8BcKzdwlssDDG    5.776900
spotify:track:43HTmB9DuOU7DCzhK9bIqz    5.786268
Name: score, dtype: float64

In [28]:
sp.track('spotify:track:7zY35cpncy3UYs1ZBfASGW')

{'album': {'album_type': 'single',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3nbusTdXH6cs0IDPwpRmnI'},
    'href': 'https://api.spotify.com/v1/artists/3nbusTdXH6cs0IDPwpRmnI',
    'id': '3nbusTdXH6cs0IDPwpRmnI',
    'name': 'Kevin do recife',
    'type': 'artist',
    'uri': 'spotify:artist:3nbusTdXH6cs0IDPwpRmnI'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/3VU1prPpWqYhRlKS6FQNMR'},
    'href': 'https://api.spotify.com/v1/artists/3VU1prPpWqYhRlKS6FQNMR',
    'id': '3VU1prPpWqYhRlKS6FQNMR',
    'name': 'MC Vitin LC',
    'type': 'artist',
    'uri': 'spotify:artist:3VU1prPpWqYhRlKS6FQNMR'}],
  'available_markets': ['AD',
   'AE',
   'AL',
   'AR',
   'AT',
   'AU',
   'BA',
   'BE',
   'BG',
   'BH',
   'BO',
   'BR',
   'BY',
   'CA',
   'CH',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DE',
   'DK',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'FI',
   'FR',
   'GB',
   'GR',
   'GT',
   'HK',
   'HN',
   'HR'

In [29]:
distances_df.loc['score'][:-len(user_features_list)].sort_values()[0:5].index

Index(['spotify:track:7zY35cpncy3UYs1ZBfASGW',
       'spotify:track:0DT1nb0hytfdzS1J9piX1W',
       'spotify:track:4IvzoLnAzFneKNpikShYMu',
       'spotify:track:0J71CQRti8BcKzdwlssDDG',
       'spotify:track:43HTmB9DuOU7DCzhK9bIqz'],
      dtype='object', name='uri')

In [30]:
distances_df.loc['score'][:-len(user_features_list)].sort_values()[0:5]

uri
spotify:track:7zY35cpncy3UYs1ZBfASGW    5.656621
spotify:track:0DT1nb0hytfdzS1J9piX1W    5.723181
spotify:track:4IvzoLnAzFneKNpikShYMu    5.769391
spotify:track:0J71CQRti8BcKzdwlssDDG    5.776900
spotify:track:43HTmB9DuOU7DCzhK9bIqz    5.786268
Name: score, dtype: float64