## Generate access token + pull raw playlist data

In [None]:
import requests
import base64
import html
import time
import pandas as pd

def get_access_token(client_id, client_secret):
    """Get Spotify access token using client credentials"""
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_header = base64.b64encode(
        f"{client_id}:{client_secret}".encode()
    ).decode()

    headers = {
        'Authorization': f'Basic {auth_header}',
        'Content-Type': 'application/x-www-form-urlencoded'
    }

    data = {'grant_type': 'client_credentials'}
    auth_response = requests.post(auth_url, headers=headers, data=data)
    return auth_response.json()['access_token']

def GET(url, params):
  """ Utility function for a basic request / response """
  headers = {
      'Authorization': f'Bearer {access_token}'
  }

  if params is None:
    res = requests.get(url, headers=headers)
    return res.json()
  else:
    params = params
    res = requests.get(url, headers=headers, params=params)
    return res.json()

def get_user_playlists(user_id, access_token):
    """Get ALL public playlists for a specific user"""

    all_playlists = []
    offset = 0
    limit = 50  # Maximum allowed by Spotify API

    while True:
        # Make request with pagination parameters
        url = f'https://api.spotify.com/v1/users/{user_id}/playlists'
        data = GET(url, {'limit': limit, 'offset': offset})

        # Add this batch of playlists to our list
        if 'items' in data:
            all_playlists.extend(data['items'])

        # Check if we've received all playlists
        if len(data.get('items', [])) < limit:
            break

        # Update offset for next batch
        offset += limit

    return all_playlists

# Credentials (client id/secret are from your spotify dev dashboard)
CLIENT_ID = ''
CLIENT_SECRET = ''
USER_ID = '1261690341' # konrad's spotify user id

# Generate Access Token
access_token = get_access_token(CLIENT_ID, CLIENT_SECRET)
raw_playlists = get_user_playlists(USER_ID, access_token)
print(f'Found {len(raw_playlists)} public playlists.')

## Format + compile all playlist data

In [None]:
def getTracksFromPlaylist(url):
  """ Retrieves raw tracklist data from an API playlist object """
  allTracks = []
  offset = 0
  limit = 100 # maximum amount of tracks at one time allowed by Spotify API

  while True:
    data = GET(url, {'limit': limit, 'offset': offset})

    if 'items' in data:
      allTracks.extend(data['items'])

    if len(data.get('items', [])) < limit:
      break

    offset += limit

  return allTracks

def compileTracks(playlist):
  """ Formats all the tracks in a given playlist """
  compiledTracks = []
  tracklist = getTracksFromPlaylist(playlist['tracks']['href'])

  for i in range(len(tracklist)):
    if tracklist[i]['track'] is None:
      continue
    trackInfo = {}
    trackInfo['name'] = tracklist[i]['track']['name']
    trackInfo['artists'] = [artist['name'] for artist in tracklist[i]['track']['artists']]
    trackInfo['album'] = tracklist[i]['track']['album']['name']
    if tracklist[i]['track']['external_urls'] != {}:
      trackInfo['url'] = tracklist[i]['track']['external_urls']['spotify']
    else:
      trackInfo['url'] = None
    trackInfo['dateAdded'] = tracklist[i]['added_at']
    compiledTracks.append(trackInfo)
  return compiledTracks

def compilePlaylistInfo(playlist):
  """ Completes formatting of an individual playlist for our final product """
  name = ''
  image = ''
  tracks = []
  dateCreated = None
  id = playlist['id']
  followers = GET(f'https://api.spotify.com/v1/playlists/{id}', None)['followers']['total']

  if playlist['name'] is None:
    name = None
  else:
    name = playlist['name']

  if playlist['images'] is None:
    image = None
  else:
    image = playlist['images'][0]['url']

  if playlist['tracks'] is None:
    tracks = None
  else:
    tracks = compileTracks(playlist)
    if tracks is not None:
      for track in tracks:
        if dateCreated is None and track['dateAdded'] is not None:
          dateCreated = pd.to_datetime(track['dateAdded'])
        elif dateCreated is not None:
          currentTrackDate = pd.to_datetime(track['dateAdded'])
          if currentTrackDate < dateCreated:
            dateCreated = currentTrackDate
      if dateCreated is not None:
        dateCreated = dateCreated.strftime('%m-%d-%Y')

  playlistInfo = {
      'name': name,
      'id': id,
      'followers': followers,
      'dateCreated': dateCreated,
      'description': playlist['description'],
      'url': playlist['external_urls']['spotify'],
      'followers': followers,
      'image': image,
      'trackCount': playlist['tracks']['total'],
      'tracks': tracks
  }
  return playlistInfo

# Create full, compiled list of playlists
allFormattedPlaylists = []
for i, playlist in enumerate(raw_playlists):
  if playlist['owner']['display_name'] != 'Konrad Rauscher':
    continue
  pl = compilePlaylistInfo(playlist)
  allFormattedPlaylists.append(pl)
  print(f'\rProcessed Playlist {i}...', end='', flush=True)
  time.sleep(0.25)

print(f'\nSuccessfully compiled {len(allFormattedPlaylists)} playlists.')

## Export to CSV file

In [89]:
df = pd.DataFrame(allFormattedPlaylists)
df.drop(df[df['trackCount'] == 0].index, inplace = True) # filters out playlists with 0 songs
df = df.map(lambda x: html.escape(str(x)) if isinstance(x, str) else x)
df.to_csv('playlists.csv', encoding='utf-8', lineterminator='\r\n', index=False)

## Export to JSON file

In [87]:
df = pd.DataFrame(allFormattedPlaylists)
df.drop(df[df['trackCount'] == 0].index, inplace = True) # filters out playlists with 0 songs
df = df.map(lambda x: html.escape(str(x)) if isinstance(x, str) else x)
df.to_json('playlists-utf8.json', force_ascii=False, orient='records', indent=2)
# ensure utf-8 encoding
with open('playlists-utf8.json', 'w', encoding='utf-8') as file:
    df.to_json(file, force_ascii=False, orient='records', indent=2)
# use a formatter like Prettier in a separate IDE to finish formatting correctly

## Clean for Web App

In [88]:
df = df.drop(['id'], axis=1)
df = df.drop(['description'], axis=1)
with open('playlists-utf8.json', 'w', encoding='utf-8') as file:
    df.to_json(file, force_ascii=False, orient='records', indent=2)