### Get Spotify credentials

In [1]:
# Get required info for token request
import os
from dotenv import load_dotenv
load_dotenv()
client_id = os.getenv("SPOTIFY_CLIENT_ID")
client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
token_url = "https://accounts.spotify.com/api/token"

# Perform OAuth client credentials authorization
import requests
r = requests.post(
    "https://accounts.spotify.com/api/token",
    data={ "grant_type": "client_credentials" },
    auth=(client_id, client_secret)
)
r.raise_for_status()
response = r.json()
token = response["access_token"]
headers = { "Authorization": f"Bearer {token}" }

### Get Song a Day playlist data

In [2]:
playlists = { 2019: "6xJjyO5AGrZkyGkWYXfUX8", 2020: "3iNn4yKx9PxpSVYfRXkK8i" }
years = sorted(playlists.keys())
tracks = { year: [] for year in years }

# For each year's playlist, grab tracks
for year in years:
    # Page through track list for the playlist
    next_url = f"https://api.spotify.com/v1/playlists/{playlists[year]}/tracks"
    while next_url:
        # Fetch the current page
        r = requests.get(next_url, headers=headers)
        r.raise_for_status()
        response = r.json()
        # Reshape each playlist item and add to our "tracks" list for the relevant year
        for playlist_item in response["items"]:
            img_sizes = { 640: "large", 300: "medium", 64: "small" }
            tracks[year].append({
                "track_name": playlist_item["track"]["name"],
                "track_id": playlist_item["track"]["id"],
                "album_id": playlist_item["track"]["album"]["id"],
                "album_name": playlist_item["track"]["album"]["name"],
                **dict([
                    ("album_image_{size}".format(size=img_sizes[i['width']]), i["url"])
                    for i in playlist_item["track"]["album"]["images"]
                ]),
                "artists": [
                    { "name": artist["name"], "id": artist["id"] }
                    for artist in playlist_item["track"]["artists"]
                ],
                "primary_artist_name": playlist_item["track"]["artists"][0]["name"],
                "primary_artist_id": playlist_item["track"]["artists"][0]["id"],
                "is_local": playlist_item["is_local"]
            })
        # Take note of the URL of the next page of playlist items
        next_url = response["next"]

In [3]:
# Compile all tracks
all_tracks = []
# Assign a date to each track
from datetime import datetime, timedelta
for year in years:
    curr_date = datetime(year, 1, 1)
    for track in tracks[year]:
        track["date"] = curr_date
        all_tracks.append(track) # add to big array
        curr_date += timedelta(days=1)

print(tracks[2019][-1])

{'track_name': 'Northstar (feat. Vic Mensa & Maceo Haymes)', 'track_id': '2rQo6riaUQ9KRzDgPLqeWx', 'album_id': '1H2Y4lomFedlGP6yEd15cT', 'album_name': 'Opia', 'album_image_large': 'https://i.scdn.co/image/ab67616d0000b2732fe16c52fdb5c238f66586c1', 'album_image_medium': 'https://i.scdn.co/image/ab67616d00001e022fe16c52fdb5c238f66586c1', 'album_image_small': 'https://i.scdn.co/image/ab67616d000048512fe16c52fdb5c238f66586c1', 'artists': [{'name': 'Malcolm London', 'id': '1ikfT4dtC0eCVk4TZdfHGO'}], 'primary_artist_name': 'Malcolm London', 'primary_artist_id': '1ikfT4dtC0eCVk4TZdfHGO', 'is_local': False, 'date': datetime.datetime(2019, 12, 31, 0, 0)}


### Add to Pandas

In [4]:
import pandas as pd
pd.options.display.max_columns = None

track_data_df = pd.DataFrame(all_tracks)
track_data_df

Unnamed: 0,track_name,track_id,album_id,album_name,album_image_large,album_image_medium,album_image_small,artists,primary_artist_name,primary_artist_id,is_local,date
0,So It Goes,0EA2RhRHL4KWeNa7JfD1Yw,5wtE5aLX5r7jOosmPhJhhk,Swimming,https://i.scdn.co/image/ab67616d0000b273175c57...,https://i.scdn.co/image/ab67616d00001e02175c57...,https://i.scdn.co/image/ab67616d00004851175c57...,"[{'name': 'Mac Miller', 'id': '4LLpKhyESsyAXpc...",Mac Miller,4LLpKhyESsyAXpc4laK94U,False,2019-01-01
1,Fertilizer,6jypaMkKsoc5npsVzxhksl,392p3shh2jkxUxY2VHvlH8,channel ORANGE,https://i.scdn.co/image/ab67616d0000b2737aede4...,https://i.scdn.co/image/ab67616d00001e027aede4...,https://i.scdn.co/image/ab67616d000048517aede4...,"[{'name': 'Frank Ocean', 'id': '2h93pZq0e7k5yf...",Frank Ocean,2h93pZq0e7k5yf4dywlkpM,False,2019-01-02
2,Brain Damage,05uGBKRCuePsf43Hfm0JwX,4LH4d3cOWNNsVw41Gqt2kv,The Dark Side of the Moon,https://i.scdn.co/image/ab67616d0000b273f05e5a...,https://i.scdn.co/image/ab67616d00001e02f05e5a...,https://i.scdn.co/image/ab67616d00004851f05e5a...,"[{'name': 'Pink Floyd', 'id': '0k17h0D3J5Vfsdm...",Pink Floyd,0k17h0D3J5VfsdmQ1iZtE9,False,2019-01-03
3,Jet Fuel,2JmFRXaJrkAUD2cs6U20KG,5wtE5aLX5r7jOosmPhJhhk,Swimming,https://i.scdn.co/image/ab67616d0000b273175c57...,https://i.scdn.co/image/ab67616d00001e02175c57...,https://i.scdn.co/image/ab67616d00004851175c57...,"[{'name': 'Mac Miller', 'id': '4LLpKhyESsyAXpc...",Mac Miller,4LLpKhyESsyAXpc4laK94U,False,2019-01-04
4,Come Back to Earth,01z2fBGB8Hl3Jd3zXe4IXR,5wtE5aLX5r7jOosmPhJhhk,Swimming,https://i.scdn.co/image/ab67616d0000b273175c57...,https://i.scdn.co/image/ab67616d00001e02175c57...,https://i.scdn.co/image/ab67616d00004851175c57...,"[{'name': 'Mac Miller', 'id': '4LLpKhyESsyAXpc...",Mac Miller,4LLpKhyESsyAXpc4laK94U,False,2019-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...
657,Sober,0PXCdhnm9fRgrIgoMcteZa,4oCGmYsAQOWt2ACWTpNUU6,Melodrama,https://i.scdn.co/image/ab67616d0000b27375083e...,https://i.scdn.co/image/ab67616d00001e0275083e...,https://i.scdn.co/image/ab67616d0000485175083e...,"[{'name': 'Lorde', 'id': '163tK9Wjr9P9DmM0AVK7...",Lorde,163tK9Wjr9P9DmM0AVK7lm,False,2020-10-19
658,Liability (Reprise),6lFDB0BFDRV58qaQDX85jV,2B87zXm9bOWvAJdkJBTpzF,Melodrama,https://i.scdn.co/image/ab67616d0000b273f8553e...,https://i.scdn.co/image/ab67616d00001e02f8553e...,https://i.scdn.co/image/ab67616d00004851f8553e...,"[{'name': 'Lorde', 'id': '163tK9Wjr9P9DmM0AVK7...",Lorde,163tK9Wjr9P9DmM0AVK7lm,False,2020-10-20
659,What You Don't Do,0gorODYIUfyg83lBPnPbZs,0OyUgwL97FT5MWpBLqL6br,Blood,https://i.scdn.co/image/ab67616d0000b273f5a142...,https://i.scdn.co/image/ab67616d00001e02f5a142...,https://i.scdn.co/image/ab67616d00004851f5a142...,"[{'name': 'Lianne La Havas', 'id': '2RP4pPHTXl...",Lianne La Havas,2RP4pPHTXlQpDnO9LvR7Yt,False,2020-10-21
660,Shake Me Down,1Hb1IJ9bBCa6wo3fRtexnJ,0WizSRN8LuMWhliou9PFlg,Thank You Happy Birthday,https://i.scdn.co/image/ab67616d0000b2735f1590...,https://i.scdn.co/image/ab67616d00001e025f1590...,https://i.scdn.co/image/ab67616d000048515f1590...,"[{'name': 'Cage The Elephant', 'id': '26T3Ltbu...",Cage The Elephant,26T3LtbuGT1Fu9m0eRq5X3,False,2020-10-22


### Get audio features for each track

In [5]:
audio_features_data = []

# deduplicated ID of every track.
# This ensures that our audio_features dataframe only contains each track once and we can
# perform a many-to-one merge.
track_ids = list(set([track["track_id"] for track in all_tracks if not track["is_local"]]))

# Fetch data from spotify API
for idx in range(0, len(track_ids), 100):
    ids_batch = track_ids[idx:idx+100]
    endpoint = "https://api.spotify.com/v1/audio-features"
    r = requests.get(endpoint, headers=headers, params={ "ids": ",".join(ids_batch) })
    r.raise_for_status()
    keys = [
        "id",
        # Concrete musical properties
        "mode", "key", "tempo", "time_signature", "duration_ms",
        # Mood
        "danceability", "energy", "speechiness", "acousticness", "instrumentalness", "liveness", "valence"
    ]
    audio_features_data.extend([
        { k: track_data[k] for k in keys }
        for track_data in r.json()["audio_features"]
    ])

# Merge this new data into basic track data
audio_features_df = pd.DataFrame(audio_features_data).rename(columns={ "id" : "track_id" })
tracks_df = pd.merge(track_data_df, audio_features_df, how="left", on="track_id", validate="many_to_one")
tracks_df

Unnamed: 0,track_name,track_id,album_id,album_name,album_image_large,album_image_medium,album_image_small,artists,primary_artist_name,primary_artist_id,is_local,date,mode,key,tempo,time_signature,duration_ms,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence
0,So It Goes,0EA2RhRHL4KWeNa7JfD1Yw,5wtE5aLX5r7jOosmPhJhhk,Swimming,https://i.scdn.co/image/ab67616d0000b273175c57...,https://i.scdn.co/image/ab67616d00001e02175c57...,https://i.scdn.co/image/ab67616d00004851175c57...,"[{'name': 'Mac Miller', 'id': '4LLpKhyESsyAXpc...",Mac Miller,4LLpKhyESsyAXpc4laK94U,False,2019-01-01,1.0,0.0,148.173,4.0,312960.0,0.576,0.556,0.2310,0.81200,0.125000,0.0943,0.294
1,Fertilizer,6jypaMkKsoc5npsVzxhksl,392p3shh2jkxUxY2VHvlH8,channel ORANGE,https://i.scdn.co/image/ab67616d0000b2737aede4...,https://i.scdn.co/image/ab67616d00001e027aede4...,https://i.scdn.co/image/ab67616d000048517aede4...,"[{'name': 'Frank Ocean', 'id': '2h93pZq0e7k5yf...",Frank Ocean,2h93pZq0e7k5yf4dywlkpM,False,2019-01-02,1.0,11.0,115.864,4.0,39640.0,0.706,0.570,0.0841,0.68900,0.069800,0.2880,0.960
2,Brain Damage,05uGBKRCuePsf43Hfm0JwX,4LH4d3cOWNNsVw41Gqt2kv,The Dark Side of the Moon,https://i.scdn.co/image/ab67616d0000b273f05e5a...,https://i.scdn.co/image/ab67616d00001e02f05e5a...,https://i.scdn.co/image/ab67616d00004851f05e5a...,"[{'name': 'Pink Floyd', 'id': '0k17h0D3J5Vfsdm...",Pink Floyd,0k17h0D3J5VfsdmQ1iZtE9,False,2019-01-03,1.0,2.0,133.577,4.0,226667.0,0.322,0.265,0.0302,0.07260,0.340000,0.3660,0.207
3,Jet Fuel,2JmFRXaJrkAUD2cs6U20KG,5wtE5aLX5r7jOosmPhJhhk,Swimming,https://i.scdn.co/image/ab67616d0000b273175c57...,https://i.scdn.co/image/ab67616d00001e02175c57...,https://i.scdn.co/image/ab67616d00004851175c57...,"[{'name': 'Mac Miller', 'id': '4LLpKhyESsyAXpc...",Mac Miller,4LLpKhyESsyAXpc4laK94U,False,2019-01-04,0.0,7.0,119.973,4.0,345213.0,0.791,0.557,0.0698,0.54000,0.000000,0.1030,0.284
4,Come Back to Earth,01z2fBGB8Hl3Jd3zXe4IXR,5wtE5aLX5r7jOosmPhJhhk,Swimming,https://i.scdn.co/image/ab67616d0000b273175c57...,https://i.scdn.co/image/ab67616d00001e02175c57...,https://i.scdn.co/image/ab67616d00004851175c57...,"[{'name': 'Mac Miller', 'id': '4LLpKhyESsyAXpc...",Mac Miller,4LLpKhyESsyAXpc4laK94U,False,2019-01-05,0.0,2.0,83.507,4.0,161840.0,0.272,0.238,0.0349,0.89000,0.009250,0.2750,0.119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657,Sober,0PXCdhnm9fRgrIgoMcteZa,4oCGmYsAQOWt2ACWTpNUU6,Melodrama,https://i.scdn.co/image/ab67616d0000b27375083e...,https://i.scdn.co/image/ab67616d00001e0275083e...,https://i.scdn.co/image/ab67616d0000485175083e...,"[{'name': 'Lorde', 'id': '163tK9Wjr9P9DmM0AVK7...",Lorde,163tK9Wjr9P9DmM0AVK7lm,False,2020-10-19,1.0,6.0,107.901,4.0,197236.0,0.796,0.467,0.1810,0.17300,0.004110,0.1260,0.516
658,Liability (Reprise),6lFDB0BFDRV58qaQDX85jV,2B87zXm9bOWvAJdkJBTpzF,Melodrama,https://i.scdn.co/image/ab67616d0000b273f8553e...,https://i.scdn.co/image/ab67616d00001e02f8553e...,https://i.scdn.co/image/ab67616d00004851f8553e...,"[{'name': 'Lorde', 'id': '163tK9Wjr9P9DmM0AVK7...",Lorde,163tK9Wjr9P9DmM0AVK7lm,False,2020-10-20,1.0,0.0,77.759,4.0,136020.0,0.552,0.277,0.0635,0.90400,0.000000,0.1070,0.260
659,What You Don't Do,0gorODYIUfyg83lBPnPbZs,0OyUgwL97FT5MWpBLqL6br,Blood,https://i.scdn.co/image/ab67616d0000b273f5a142...,https://i.scdn.co/image/ab67616d00001e02f5a142...,https://i.scdn.co/image/ab67616d00004851f5a142...,"[{'name': 'Lianne La Havas', 'id': '2RP4pPHTXl...",Lianne La Havas,2RP4pPHTXlQpDnO9LvR7Yt,False,2020-10-21,1.0,2.0,144.005,3.0,220587.0,0.656,0.731,0.0876,0.45100,0.000002,0.0811,0.649
660,Shake Me Down,1Hb1IJ9bBCa6wo3fRtexnJ,0WizSRN8LuMWhliou9PFlg,Thank You Happy Birthday,https://i.scdn.co/image/ab67616d0000b2735f1590...,https://i.scdn.co/image/ab67616d00001e025f1590...,https://i.scdn.co/image/ab67616d000048515f1590...,"[{'name': 'Cage The Elephant', 'id': '26T3Ltbu...",Cage The Elephant,26T3LtbuGT1Fu9m0eRq5X3,False,2020-10-22,1.0,9.0,105.110,4.0,211373.0,0.457,0.613,0.0295,0.00463,0.000441,0.0711,0.132


### Export data

In [6]:
# Abbreviate
export_df = tracks_df.drop(columns=["album_image_medium", "artists"])

# Shorten image URLs to save space
def discard_cdn_prefix(url):
    prefix = "https://i.scdn.co/image/"
    return url[len(prefix):] if isinstance(url, str) and url.startswith(prefix) else url
export_df["album_image_large"] = export_df["album_image_large"].apply(discard_cdn_prefix)
export_df["album_image_small"] = export_df["album_image_small"].apply(discard_cdn_prefix)

# Date index makes JSON export nicer
export_df.set_index("date", inplace=True)

# Export data

# Formatted JSON for easy reading
export_df.to_json("./tracks.json", orient="index", date_format="iso", indent=2)
# CSV is the "clean" distributable data export; CSV takes up a lot less space than even non-indented JSON does
export_df.to_csv("./tracks.csv")