In [1]:
import spotipy

In [2]:
from tqdm import tqdm

In [3]:
from pathlib import Path

In [4]:
project_dir = Path().absolute().parent
data_dir = Path(project_dir, "data")

In [5]:
from spotipy.oauth2 import SpotifyOAuth
from dotenv import load_dotenv, find_dotenv
import os
import math
from itertools import chain
import time
load_dotenv(find_dotenv())
scope = "user-top-read%20user-read-currently-playing%20user-read-playback-state%20playlist-read-collaborative%20playlist-read-private%20user-library-read%20user-read-recently-played%20user-follow-read"

In [6]:
import flatdict
import pandas as pd
tqdm.pandas()

  from pandas import Panel


In [7]:
def chunk(data, n):
    return [data[x:x+n] for x in range(0, len(data), n)]

In [8]:
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [9]:
def get_saved_tracks(page):
    #Force 1 second sleep
    time.sleep(1)
    return sp.current_user_saved_tracks(limit=50, offset=page*50)['items']

In [11]:
all_results = []
results = sp.current_user_saved_tracks(limit=50)
total_results = results['total']
total_pages = math.ceil(total_results/50)
all_tracks = list(chain.from_iterable([get_saved_tracks(page) for page in tqdm(list(range(total_pages)))]))

100%|██████████████████████████████████████████████████████████████████████████████████| 48/48 [00:05<00:00,  8.41it/s]


In [34]:
all_track_ids = [track['track']['id'] for track in all_tracks]
len(all_track_ids)

2352

In [13]:
chunked_track_ids = chunk(all_track_ids, 100)

In [10]:
def get_track_features(track_ids):
    #Force 2 second sleep
    time.sleep(2)
    if len(track_ids)>100:
        print("Too many tracks")
    else:
        return sp.audio_features(track_ids)

In [14]:
all_audio_features = [val for val in list(chain.from_iterable([get_track_features(chunked_tracks) for chunked_tracks in tqdm(chunked_track_ids)])) if val]

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [02:33<00:00,  6.40s/it]


In [15]:
flattened_tracks = [dict(flatdict.FlatterDict(track)) for track in tqdm(all_tracks)]

100%|████████████████████████████████████████████████████████████████████████████| 2352/2352 [00:01<00:00, 1510.62it/s]


In [16]:
tracks_df = pd.DataFrame(flattened_tracks)
market_cols = [col for col in tracks_df.columns if "market" in col]

In [16]:
#Drop columns with more than 60% nulls and market cols
subset_tracks_df = tracks_df.drop(market_cols, axis=1).dropna(axis=1, thresh=int(0.6*len(tracks_df)))

In [17]:
subset_tracks_df.head(1)

Unnamed: 0,added_at,track:album:album_type,track:album:artists:0:external_urls:spotify,track:album:artists:0:href,track:album:artists:0:id,track:album:artists:0:name,track:album:artists:0:type,track:album:artists:0:uri,track:album:external_urls:spotify,track:album:href,...,track:external_urls:spotify,track:href,track:id,track:is_local,track:name,track:popularity,track:preview_url,track:track_number,track:type,track:uri
0,2021-01-12T04:04:50Z,single,https://open.spotify.com/artist/1A8KsZFc0BTaqD...,https://api.spotify.com/v1/artists/1A8KsZFc0BT...,1A8KsZFc0BTaqD9mWD6ei0,Matik,artist,spotify:artist:1A8KsZFc0BTaqD9mWD6ei0,https://open.spotify.com/album/08Hgy2A7UVh7FBv...,https://api.spotify.com/v1/albums/08Hgy2A7UVh7...,...,https://open.spotify.com/track/3U3qBo7AcWFoxvG...,https://api.spotify.com/v1/tracks/3U3qBo7AcWFo...,3U3qBo7AcWFoxvGeChoJ92,False,Light,36,https://p.scdn.co/mp3-preview/fd02b6e6e9dd55fa...,1,track,spotify:track:3U3qBo7AcWFoxvGeChoJ92


In [18]:
track_features_df = pd.DataFrame(all_audio_features)

In [19]:
track_features_df.head(1)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.494,0.582,2,-6.62,1,0.0359,0.686,0.0,0.121,0.22,140.134,audio_features,3U3qBo7AcWFoxvGeChoJ92,spotify:track:3U3qBo7AcWFoxvGeChoJ92,https://api.spotify.com/v1/tracks/3U3qBo7AcWFo...,https://api.spotify.com/v1/audio-analysis/3U3q...,222857,4


In [20]:
tracks_and_features_df = pd.merge(subset_tracks_df, track_features_df, left_on="track:id", right_on="id")

In [21]:
tracks_and_features_df.head(1)

Unnamed: 0,added_at,track:album:album_type,track:album:artists:0:external_urls:spotify,track:album:artists:0:href,track:album:artists:0:id,track:album:artists:0:name,track:album:artists:0:type,track:album:artists:0:uri,track:album:external_urls:spotify,track:album:href,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,2021-01-12T04:04:50Z,single,https://open.spotify.com/artist/1A8KsZFc0BTaqD...,https://api.spotify.com/v1/artists/1A8KsZFc0BT...,1A8KsZFc0BTaqD9mWD6ei0,Matik,artist,spotify:artist:1A8KsZFc0BTaqD9mWD6ei0,https://open.spotify.com/album/08Hgy2A7UVh7FBv...,https://api.spotify.com/v1/albums/08Hgy2A7UVh7...,...,0.121,0.22,140.134,audio_features,3U3qBo7AcWFoxvGeChoJ92,spotify:track:3U3qBo7AcWFoxvGeChoJ92,https://api.spotify.com/v1/tracks/3U3qBo7AcWFo...,https://api.spotify.com/v1/audio-analysis/3U3q...,222857,4


In [27]:
artist_cols = [col for col in tracks_and_features_df.columns if "artists" in col and "id" in col]

In [31]:
all_artist_ids = set() 
for col in artist_cols:
   all_artist_ids.update(tracks_and_features_df[col].tolist())
all_artist_ids = list(all_artist_ids)
len(all_artist_ids)

In [54]:
def get_artist_features(artist_ids):
    #Force 2 second sleep
    time.sleep(2)
    if len(artist_ids)>50:
        print("Too many tracks")
    else:
        return sp.artists(artist_ids)

In [55]:
chunked_artist_ids = chunk(all_artist_ids, 50)

In [56]:
intermediate_list = [get_artist_features(chunked_artists) for chunked_artists in tqdm(chunked_artist_ids)]




  0%|                                                                                           | 0/22 [00:00<?, ?it/s][A[A[A


  5%|███▊                                                                               | 1/22 [00:02<00:43,  2.08s/it][A[A[A


  9%|███████▌                                                                           | 2/22 [00:04<00:41,  2.08s/it][A[A[A


 14%|███████████▎                                                                       | 3/22 [00:06<00:39,  2.08s/it][A[A[A


 18%|███████████████                                                                    | 4/22 [00:08<00:37,  2.08s/it][A[A[A


  9%|███████▌                                                                           | 2/22 [02:11<21:56, 65.84s/it][A[A[A



 27%|██████████████████████▋                                                            | 6/22 [00:40<02:09,  8.08s/it][A[A[A


 32%|██████████████████████████▍                                               

In [57]:
intermediate_list

[{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2qDIR2WlcW3llkGqJWg9VJ'},
    'followers': {'href': None, 'total': 134539},
    'genres': ['alternative r&b', 'electropop', 'pop'],
    'href': 'https://api.spotify.com/v1/artists/2qDIR2WlcW3llkGqJWg9VJ',
    'id': '2qDIR2WlcW3llkGqJWg9VJ',
    'images': [{'height': 640,
      'url': 'https://i.scdn.co/image/b2618179ccf4729d0d4eec624f7e2d9b5ba719d0',
      'width': 640},
     {'height': 320,
      'url': 'https://i.scdn.co/image/966da8942c4792e5eeecd9bd8a7c34787958e9e5',
      'width': 320},
     {'height': 160,
      'url': 'https://i.scdn.co/image/9ad70f1be082df306f250c4ec8776c66a11ce9d1',
      'width': 160}],
    'name': 'Lolo Zouaï',
    'popularity': 55,
    'type': 'artist',
    'uri': 'spotify:artist:2qDIR2WlcW3llkGqJWg9VJ'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/5k7Y2t4cB5dG219QgFCvXW'},
    'followers': {'href': None, 'total': 21989},
    'genres': ['electropop', 'indie el

In [51]:
all_artist_features = [val for val in list(chain.from_iterable(intermediate_list)) if val]



  0%|                                                                                           | 0/22 [00:00<?, ?it/s][A[A

  5%|███▊                                                                               | 1/22 [00:02<00:44,  2.12s/it][A[A

{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2qDIR2WlcW3llkGqJWg9VJ'}, 'followers': {'href': None, 'total': 134539}, 'genres': ['alternative r&b', 'electropop', 'pop'], 'href': 'https://api.spotify.com/v1/artists/2qDIR2WlcW3llkGqJWg9VJ', 'id': '2qDIR2WlcW3llkGqJWg9VJ', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/b2618179ccf4729d0d4eec624f7e2d9b5ba719d0', 'width': 640}, {'height': 320, 'url': 'https://i.scdn.co/image/966da8942c4792e5eeecd9bd8a7c34787958e9e5', 'width': 320}, {'height': 160, 'url': 'https://i.scdn.co/image/9ad70f1be082df306f250c4ec8776c66a11ce9d1', 'width': 160}], 'name': 'Lolo Zouaï', 'popularity': 55, 'type': 'artist', 'uri': 'spotify:artist:2qDIR2WlcW3llkGqJWg9VJ'}, {'external_urls': {'spotify': 'https://open.spotify.com/artist/5k7Y2t4cB5dG219QgFCvXW'}, 'followers': {'href': None, 'total': 21989}, 'genres': ['electropop', 'indie electropop'], 'href': 'https://api.spotify.com/v1/artists/5k7Y2t4cB5dG219QgFCvXW', 'id': '5k7Y2t



  9%|███████▌                                                                           | 2/22 [00:04<00:42,  2.12s/it][A[A

{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/5pokGZ1K9Hr6etaKPDxSG8'}, 'followers': {'href': None, 'total': 785990}, 'genres': ['thai indie rock', 'thai pop'], 'href': 'https://api.spotify.com/v1/artists/5pokGZ1K9Hr6etaKPDxSG8', 'id': '5pokGZ1K9Hr6etaKPDxSG8', 'images': [{'height': 640, 'url': 'https://i.scdn.co/image/4f0570457ba3d443eef817ec20563a5a655b78b0', 'width': 640}, {'height': 320, 'url': 'https://i.scdn.co/image/4a6d779e348b00c561828ac19839818488fcdbc3', 'width': 320}, {'height': 160, 'url': 'https://i.scdn.co/image/5ef3748b9f3301c86ed5642d8408b4a6b8afad8d', 'width': 160}], 'name': 'The Toys', 'popularity': 52, 'type': 'artist', 'uri': 'spotify:artist:5pokGZ1K9Hr6etaKPDxSG8'}, {'external_urls': {'spotify': 'https://open.spotify.com/artist/7MPGCB854Qo4alYMOPkBka'}, 'followers': {'href': None, 'total': 76609}, 'genres': ['bedroom pop'], 'href': 'https://api.spotify.com/v1/artists/7MPGCB854Qo4alYMOPkBka', 'id': '7MPGCB854Qo4alYMOPkBka', 'images': [

KeyboardInterrupt: 

In [42]:
all_artist_features[0]

'artists'

In [22]:
spotify_path = Path(data_dir, "SpotifyLikedFeaturesAndArtists.csv")
#FeaturesAndArtiststracks_and_features_df.to_csv(spotify_path, index=False)

In [23]:
#TODO: Also include songs in playlists that arent in liked songs?