In [1]:
import os
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

load_dotenv()

sp = spotipy.Spotify(
    auth_manager=SpotifyClientCredentials(
        client_id=os.getenv("SPOTIFY_CLIENT_ID"),
        client_secret=os.getenv("SPOTIFY_CLIENT_SECRET")
    ),
    requests_timeout=20
)

In [17]:
BILLBOARD_PLAYLIST_NAME_PHRASE = "Billboard Year End Hot 100"

def get_billboard_playlists(
    start_year: int = 2000,
    end_year: int = 2024
):
    billboard_playlists_by_id = {}

    playlists = sp.user_playlists(
        user="wickeddreamer96",
        limit=100
    )["items"]

    for playlist in playlists:
        phrase_idx = playlist["name"].find(BILLBOARD_PLAYLIST_NAME_PHRASE)

        if phrase_idx != 5:
            continue

        playlist_year = playlist["name"][:4]

        if not (playlist_year.isdigit() and start_year <= int(playlist_year) <= end_year):
            continue

        billboard_playlists_by_id[playlist["id"]] = playlist["name"]

    return billboard_playlists_by_id

billboard_playlists_by_id = get_billboard_playlists()
billboard_playlists_by_id["6UeSakyzhiEt4NB3UAd6NQ"] = "2025 Billboard Year Hot 100"

billboard_playlists_by_id

{'5C41vcHVDKBCmh0kSFIkjY': '2024 Billboard Year End Hot 100 (No Duplicates)',
 '2olmwY1gfIl9hWIqG2pw3X': '2023 Billboard Year End Hot 100',
 '3xBeF2bX6RRt7HTxiQmgQj': '2022 Billboard Year End Hot 100',
 '4XbUTUESZDcddgPuSaOlrT': '2021 Billboard Year End Hot 100',
 '2jDNKyd7Fs8Zf3pLVkCasY': '2020 Billboard Year End Hot 100',
 '7tJ2JkOIYF2BTFGY3fumU0': '2019 Billboard Year End Hot 100',
 '4MCwbKJPbmfKY08aTyur9e': '2018 Billboard Year End Hot 100',
 '2XPEN88QyrPQ9zGqS8uS2x': '2017 Billboard Year End Hot 100',
 '3JbWD8OGutoTKUbR3RvR8u': '2016 Billboard Year End Hot 100',
 '6LYxiUgw87zsDPqU0sdalZ': '2015 Billboard Year End Hot 100',
 '2trgZsxRpWX7sq28yHC40u': '2014 Billboard Year End Hot 100',
 '1KK0RvFmgsUkZ8zELRZgjS': '2013 Billboard Year End Hot 100',
 '0W42jv9ZJTwCL0cLuLbTfX': '2012 Billboard Year End Hot 100',
 '2z3eLip2NlV9quzTEm37cW': '2011 Billboard Year End Hot 100',
 '4aUY170nZ3mhkzMpTAXDv2': '2010 Billboard Year End Hot 100',
 '7FnOEoPb0biPkxYYAoXO8Q': '2009 Billboard Year End Ho

In [None]:
import time
import pandas as pd

def get_artists_from_playlists(playlists_by_id: dict[str, str]):
    artists_by_id = {}
    limit = 100

    for playlist_id, playlist_name in playlists_by_id.items():
        print(f"Getting artists from {playlist_name}")

        offset = 0

        while True:
            res = sp.playlist_items(
                playlist_id,
                limit=limit,
                offset=offset,
                fields="items(track(artists(id,name))),next"
            )

            if not res.get("items"):
                continue

            for item in res.get("items"):
                for artist in item["track"]["artists"]:
                    if artist["id"] not in artists_by_id:
                        artists_by_id[artist["id"]] = artist["name"]

            if not res.get("next"):
                break

            offset += limit
            time.sleep(0.05)

    pd.DataFrame(
        [(id, name) for id, name in artists_by_id.items()],
        columns=["id", "name"]
    ).to_csv(f"data/billboard_artists_2000_2025.tsv", sep="\t", index=False, encoding="utf-8")

    return artists_by_id

artists_by_id = get_artists_from_playlists(billboard_playlists_by_id)

Getting artists from 2024 Billboard Year End Hot 100 (No Duplicates)
Getting artists from 2023 Billboard Year End Hot 100
Getting artists from 2022 Billboard Year End Hot 100
Getting artists from 2021 Billboard Year End Hot 100
Getting artists from 2020 Billboard Year End Hot 100
Getting artists from 2019 Billboard Year End Hot 100
Getting artists from 2018 Billboard Year End Hot 100
Getting artists from 2017 Billboard Year End Hot 100
Getting artists from 2016 Billboard Year End Hot 100
Getting artists from 2015 Billboard Year End Hot 100
Getting artists from 2014 Billboard Year End Hot 100
Getting artists from 2013 Billboard Year End Hot 100
Getting artists from 2012 Billboard Year End Hot 100
Getting artists from 2011 Billboard Year End Hot 100
Getting artists from 2010 Billboard Year End Hot 100
Getting artists from 2009 Billboard Year End Hot 100
Getting artists from 2008 Billboard Year End Hot 100
Getting artists from 2007 Billboard Year End Hot 100
Getting artists from 2006 Bill

In [None]:
def get_artists_popularities_and_most_popular_song(artist_id: str):
    limit = 50
    offset = 0

    album_ids: set[str] = set()
    while True:
        res = sp.artist_albums(
            artist_id,
            include_groups="album,single,appears_on,compilation",
            market="US",
            limit=limit,
            offset=offset
        )
        items = res.get("items", [])
        album_ids.update(album["id"] for album in items if album.get("id"))

        if len(items) < limit:
            break

        offset += limit
        time.sleep(0.02)
