# Download track details from spotify playlists

Spotipy documentation: https://spotipy.readthedocs.io/en/latest/

Spotify API documentation: https://developer.spotify.com/web-api/

In [1]:
import spotipy, spotipy.util as util, pandas as pd, math, time
from keys import spotify_username, spotify_client_id, spotify_client_secret, spotify_redirect_uri

In [2]:
# define the playlist ids here
playlist_ids = ['2UfBZ8aRbMniK0052b5uYb', '25suCiMhFOWCWLG2JLfhot']
limit = 100 #number of tracks to download per request (max=100)
pause = 0.5 #how long to pause between requests

## Authorize spotipy and define functions for the download

In [3]:
# define scope to work with playlists then get an auth token
scope = 'playlist-read-private playlist-modify-private playlist-modify-public'
token = util.prompt_for_user_token(spotify_username, scope, spotify_client_id, spotify_client_secret, spotify_redirect_uri)

In [4]:
# function to determine how many tracks total are in a playlist
def get_playlist_track_count(playlist_id):
    playlist = sp.user_playlist(user=user_id, playlist_id=playlist_id)
    track_count = playlist['tracks']['total']
    return track_count

In [5]:
# function to iteratively get all the tracks from a playlist
def get_playlist_tracks_details(playlist_id, user_id, track_count, limit):
    playlist_tracks = []
    call_count = math.ceil(track_count / limit)
    for n in range(call_count):
        time.sleep(pause)
        offset = n * limit
        tracks_batch = sp.user_playlist_tracks(user=user_id, playlist_id=playlist_id, fields=None, limit=limit, offset=offset)
        for item in tracks_batch['items']:
            item['playlist_id'] = playlist_id
        playlist_tracks.extend(tracks_batch['items'])
    return playlist_tracks

## Begin the download process

In [6]:
# open a connection to spotify
sp = spotipy.Spotify(auth=token)

In [7]:
# get the user id associated with this username
user = sp.user(spotify_username)
user_id = user['id']

In [8]:
# for each playlist id in the list, get the details of all the tracks in it
tracks = []
for playlist_id, n in zip(playlist_ids, range(len(playlist_ids))):
    
    print('playlist {} of {}: {}'.format(n+1, len(playlist_ids), playlist_id))
    
    track_count = get_playlist_track_count(playlist_id)
    print('  {:,} tracks in playlist {}'.format(track_count, playlist_id))
    
    new_tracks = get_playlist_tracks_details(playlist_id, user_id, track_count, limit)
    tracks.extend(new_tracks)
    print('  {:,} tracks retrieved from API'.format(len(new_tracks)))

playlist 1 of 2: 2UfBZ8aRbMniK0052b5uYb
  9,872 tracks in playlist 2UfBZ8aRbMniK0052b5uYb
  9,872 tracks retrieved from API
playlist 2 of 2: 25suCiMhFOWCWLG2JLfhot
  9,352 tracks in playlist 25suCiMhFOWCWLG2JLfhot
  9,352 tracks retrieved from API


## Extract and save the details

In [9]:
# for each downloaded track, extract the details to an object and append to list
track_objects = []
for track in tracks:
    track_obj = {}
    track_obj['album_name'] = track['track']['album']['name']
    track_obj['album_id'] = track['track']['album']['id']
    track_obj['artist_name'] = track['track']['artists'][0]['name']
    track_obj['artist_id'] = track['track']['artists'][0]['id']
    track_obj['track_name'] = track['track']['name']
    track_obj['track_id'] = track['track']['id']
    track_obj['duration_ms'] = track['track']['duration_ms']
    track_obj['playlist_id'] = track['playlist_id']
    track_objects.append(track_obj)

In [10]:
# create a dataframe from the list of objects
df = pd.DataFrame(track_objects)
print(len(df))
df.head()

19224


Unnamed: 0,album_id,album_name,artist_id,artist_name,duration_ms,playlist_id,track_id,track_name
0,6p5qrjJ5vdtO8eS0K2zyCs,The Moon Rising - Pipa & Ruan,7cq68kwWhVASWHBr5OPlkV,Min Xiao-Fen,152800,2UfBZ8aRbMniK0052b5uYb,1KvrS59nB8zvnOdSnpn7LA,Meandering Stream from High Mountains
1,1WzRXeE4N7Ktzvhmvr4hew,金曲情不變新曲+精選珍藏版,4XgiICuVKb8ARTXG0kuDB1,陳浩德,153133,2UfBZ8aRbMniK0052b5uYb,0AJnuKVk8NFCjTLxQXmFq8,悲秋風
2,1hE6dawN13GR5EPHdBgTNf,京劇大典 9 紅生篇 (Masterpieces of Beijing Opera Vol. 9),6mqzILewAiMgFvD2q49XRS,王鴻壽 (Wang Hongshou),154480,2UfBZ8aRbMniK0052b5uYb,5k0cQHBIfgNAZtv8aKdPXJ,關公挑袍 (Guan Gong in Command)
3,0A8nzYNMn1MCzpieK6lgSX,Ancient Chinese Music: Shadows Of Apricot Blos...,32Em8TFlcUJmKrOq38ieJK,Shan Xiurong,154346,2UfBZ8aRbMniK0052b5uYb,6Cwc1YzmuN4qKWwoyCfV0U,Melancholy Over Lotus
4,4oKSTzh7Gg9py7rOJ3o2dS,"Authentic Japan, Vol. 2: Descriptive Moods & G...",3hTwdAdxr0CfpBlds2NbyT,Tadao Sawai,154000,2UfBZ8aRbMniK0052b5uYb,1Qb03s39BGIMOsPgcnG2dM,Early Spring


In [11]:
# save the dataframe to csv
filename = 'data/playlist_tracks.csv'
df.to_csv(filename, index=False, encoding='utf-8')