In [24]:
import os
from dotenv import load_dotenv

load_dotenv()

SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
USER = os.getenv("USER")

In [25]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

auth_manager = SpotifyClientCredentials(
    client_id = SPOTIPY_CLIENT_ID,
    client_secret = SPOTIPY_CLIENT_SECRET
)
sp = spotipy.Spotify(auth_manager=auth_manager)

playlists = sp.user_playlists(
    user = USER,
    limit = 100
)

In [26]:
import pandas as pd
import statistics as stat

In [27]:
# define playlists to pull
## more work needed for automation
year_starting = 2024
years = 2
months = 12

pl_names = [None] * years * months

# auto generate list of playlist names based on range provided (format mm.yyyy)
for years_iter in range(years):
    for months_iter in range(months):
        months_str = str((months_iter + 1)).zfill(2) # format single digit values as 01, 02 etc.
        years_str = year_starting + years_iter
        if years_iter == 0:
            pl_names[months_iter]= f"{months_str}.{years_str}"
        else:
            pl_names[(months_iter + 12)] = f"{months_str}.{years_str}"

# get item info from init data load
pl_info = pd.DataFrame(playlists["items"])

# filter playlists for date range defined previously
pl_filter = pl_info["name"].isin(pl_names)

# create new dataframe filtered for scope of analysis (2024 - 2025 inc.)
pl_scope = pl_info[pl_filter]
pl_scope = pl_scope[["images", "name", "tracks", "id"]]

pl_scope = pl_scope.reset_index(drop = True)

### get playlist covers
num_pl = len(pl_scope)
pl_covers = [None] * num_pl

for i in range(num_pl):
    pl_scope["images"].iloc[i] = sp.playlist_cover_image(pl_scope["id"].iloc[i])

In [None]:
### get playlist track information
# define 'flattened' list for collecting normalised json responses
pl_tracks = [None] * num_pl
flattened = [None] * num_pl
for i in range(num_pl):
    pl_tracks[i] = sp.playlist_items(
        # take playlist id from master pl df
        playlist_id = pl_scope["id"].iloc[i],
        # specify fields wanted. Take track details for this list
        fields = "items.track.id, items.track.name, items.track.duration_ms, items.track.popularity"
    )["items"]
    # returns json lists, normalise:
    flattened[i] = pd.json_normalize(pl_tracks[i], sep="_")
    # clean headers
    flattened[i].columns = flattened[i].columns.str.replace("track_", "")
    flattened[i]["playlist_id"] = pl_scope["id"].iloc[i]

In [23]:
flattened[0]

Unnamed: 0,duration_ms,name,popularity,id,playlist_id
0,200045,POP POP POP (feat. Danny Brown),38,5gGHRgEQ2wfgjKRyPaOG2P,5NDD0eLJwABWYVzyB0fIF6
1,180724,IS THIS LOVE,50,4SrhXzAN9P1LJlkvs91PKe,5NDD0eLJwABWYVzyB0fIF6
2,229373,Everybody Wins,32,6YZTTcD1W9vGNGw8Yzicgt,5NDD0eLJwABWYVzyB0fIF6
3,180807,I See Myself,59,4gFFHAj5iwUEHwPpjZTdi5,5NDD0eLJwABWYVzyB0fIF6
4,145506,GIVE IT TO ME,49,7xvWCrjw6cO2oyGX8J1A0V,5NDD0eLJwABWYVzyB0fIF6
5,205866,Legend Has It,59,7satW8tFLasyZbftvrWFBP,5NDD0eLJwABWYVzyB0fIF6
6,345796,movies for guys,49,1urgZoAjz91vFqPEokA1OR,5NDD0eLJwABWYVzyB0fIF6
7,279693,Moonage Daydream - 2012 Remaster,69,6mib3N4E8PZHAGQ3xy7bho,5NDD0eLJwABWYVzyB0fIF6
8,168146,おちゃんせんすぅす,36,7pVJtMyNMXlYBh4MevJ692,5NDD0eLJwABWYVzyB0fIF6
9,212000,Stop Pretending you Like Salad,52,6fTH4b7b5Mvp9xxOSoDiZl,5NDD0eLJwABWYVzyB0fIF6
