In [1]:
import os
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import pandas as pd

load_dotenv()  # load the .env file

CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
REDIRECT_URI = os.getenv("SPOTIPY_REDIRECT_URI")

# Quick check
CLIENT_ID, REDIRECT_URI


('9b0c00a5ea344ac29e2f5861cb242c1c', 'http://127.0.0.1:3000/callback')

In [2]:
scope = (
    "user-read-private "
    "user-read-email "
    "user-top-read "
    "user-read-recently-played "
    "playlist-read-private"
)

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    redirect_uri=REDIRECT_URI,
    scope=scope
))

sp.current_user()


{'country': 'US',
 'display_name': 'Daniâ™¡',
 'email': 'daniela.binns@gmail.com',
 'explicit_content': {'filter_enabled': False, 'filter_locked': False},
 'external_urls': {'spotify': 'https://open.spotify.com/user/e32ht3uw976lewxrj812m3onb'},
 'followers': {'href': None, 'total': 13},
 'href': 'https://api.spotify.com/v1/users/e32ht3uw976lewxrj812m3onb',
 'id': 'e32ht3uw976lewxrj812m3onb',
 'images': [{'height': 300,
   'url': 'https://i.scdn.co/image/ab6775700000ee85d03388930481e3078272afe1',
   'width': 300},
  {'height': 64,
   'url': 'https://i.scdn.co/image/ab67757000003b82d03388930481e3078272afe1',
   'width': 64}],
 'product': 'premium',
 'type': 'user',
 'uri': 'spotify:user:e32ht3uw976lewxrj812m3onb'}

In [3]:
results = sp.current_user_recently_played(limit=50)

recent_tracks = []
for item in results["items"]:
    track = item["track"]

    recent_tracks.append({
        "played_at": item["played_at"],
        "track_name": track["name"],
        "track_id": track["id"],
        "artist_name": track["artists"][0]["name"],
        "artist_id": track["artists"][0]["id"],
        "album": track["album"]["name"],
        "isrc": track["external_ids"].get("isrc"),
        "explicit": track["explicit"],
        "release_date": track["album"]["release_date"],
        "popularity": track["popularity"]
    })

df_recent = pd.DataFrame(recent_tracks)
df_recent.head()


Unnamed: 0,played_at,track_name,track_id,artist_name,artist_id,album,isrc,explicit,release_date,popularity
0,2025-11-24T23:13:18.841Z,Mr. Jones,5DiXcVovI0FcY2s0icWWUu,Counting Crows,0vEsuISMWAKNctLlUAhSZC,August And Everything After,USIR10000287,False,1993-01-01,80
1,2025-11-24T23:08:51.695Z,Wrong Way,2PdIo7ewQPuAsP99LVg9uy,Sublime,0EdvGhlC1FkGItLOWQzG4J,Sublime,USGA19649249,True,1996-07-30,71
2,2025-11-24T23:06:39.391Z,Rockstar - 2020 Remaster,05tlUh0E6IFks9sCQguQKN,Nickelback,6deZN1bslXzeGvOLaLMOIF,All The Right Reasons (15th Anniversary Expand...,USRH12000116,False,2005,75
3,2025-11-24T23:02:28.690Z,One Last Breath,42T2QQv3xgBlpQxaSP7lnK,Creed,43sZBwHjahUvgbx1WNIkIz,Weathered,USWU30107505,False,2001-01-01,83
4,2025-11-24T22:57:33.322Z,You Shook Me All Night Long,2SiXAy7TuUkycRVbbWDEpo,AC/DC,711MCceyCBcFnzjGY4Q7Un,Back In Black,AUAP08000047,False,1980-07-25,83


In [4]:
unique_artist_ids = df_recent["artist_id"].dropna().unique().tolist()

artist_data = []
for a_id in unique_artist_ids:
    artist = sp.artist(a_id)
    artist_data.append({
        "artist_id": a_id,
        "artist_name": artist["name"],
        "artist_popularity": artist["popularity"],
        "followers": artist["followers"]["total"],
        "genres": artist["genres"]
    })

df_artists = pd.DataFrame(artist_data)
df_artists.head()


Unnamed: 0,artist_id,artist_name,artist_popularity,followers,genres
0,0vEsuISMWAKNctLlUAhSZC,Counting Crows,69,1967656,[]
1,0EdvGhlC1FkGItLOWQzG4J,Sublime,72,3415787,"[reggae rock, ska punk, ska]"
2,6deZN1bslXzeGvOLaLMOIF,Nickelback,79,7896901,"[post-grunge, rock]"
3,43sZBwHjahUvgbx1WNIkIz,Creed,74,4353570,[post-grunge]
4,711MCceyCBcFnzjGY4Q7Un,AC/DC,82,32488489,"[rock, hard rock, classic rock, rock and roll]"


In [5]:
df_user_full = df_recent.merge(
    df_artists,
    on="artist_id",
    how="left"
)

df_user_full.head()


Unnamed: 0,played_at,track_name,track_id,artist_name_x,artist_id,album,isrc,explicit,release_date,popularity,artist_name_y,artist_popularity,followers,genres
0,2025-11-24T23:13:18.841Z,Mr. Jones,5DiXcVovI0FcY2s0icWWUu,Counting Crows,0vEsuISMWAKNctLlUAhSZC,August And Everything After,USIR10000287,False,1993-01-01,80,Counting Crows,69,1967656,[]
1,2025-11-24T23:08:51.695Z,Wrong Way,2PdIo7ewQPuAsP99LVg9uy,Sublime,0EdvGhlC1FkGItLOWQzG4J,Sublime,USGA19649249,True,1996-07-30,71,Sublime,72,3415787,"[reggae rock, ska punk, ska]"
2,2025-11-24T23:06:39.391Z,Rockstar - 2020 Remaster,05tlUh0E6IFks9sCQguQKN,Nickelback,6deZN1bslXzeGvOLaLMOIF,All The Right Reasons (15th Anniversary Expand...,USRH12000116,False,2005,75,Nickelback,79,7896901,"[post-grunge, rock]"
3,2025-11-24T23:02:28.690Z,One Last Breath,42T2QQv3xgBlpQxaSP7lnK,Creed,43sZBwHjahUvgbx1WNIkIz,Weathered,USWU30107505,False,2001-01-01,83,Creed,74,4353570,[post-grunge]
4,2025-11-24T22:57:33.322Z,You Shook Me All Night Long,2SiXAy7TuUkycRVbbWDEpo,AC/DC,711MCceyCBcFnzjGY4Q7Un,Back In Black,AUAP08000047,False,1980-07-25,83,AC/DC,82,32488489,"[rock, hard rock, classic rock, rock and roll]"


In [None]:
def get_playlist_tracks(playlist_id):

    playlist = sp.playlist_tracks(playlist_id, limit=100)

    records = []

    for item in playlist["items"]:
        track = item["track"]
        if track is None:
            continue

        records.append({
            "track_id": track["id"],
            "track_name": track["name"],
            "artist_id": track["artists"][0]["id"],
            "artist_name": track["artists"][0]["name"],
            "popularity": track["popularity"],
            "isrc": track["external_ids"].get("isrc"),
            "explicit": track["explicit"],
            "release_date": track["album"]["release_date"]
        })

    return pd.DataFrame(records)


In [None]:
PLAYLISTS = {
    "todays_top_hits": "37i9dQZF1DXcBWIGoYBM5M",
    "global_top_50": "37i9dQZEVXbMDoHDwVN2tF",
    "viral_50_global": "37i9dQZEVXbLiRSasKsNU9"
}

df_global_list = []

for name, pid in PLAYLISTS.items():
    df = get_playlist_tracks(pid)
    df["source_playlist"] = name
    df_global_list.append(df)

df_global = pd.concat(df_global_list, ignore_index=True)
df_global.head()


In [None]:
global_artist_ids = df_global["artist_id"].dropna().unique().tolist()

global_artists = []
for a_id in global_artist_ids:
    artist = sp.artist(a_id)
    global_artists.append({
        "artist_id": a_id,
        "artist_name": artist["name"],
        "artist_popularity": artist["popularity"],
        "followers": artist["followers"]["total"],
        "genres": artist["genres"]
    })

df_global_artists = pd.DataFrame(global_artists)


In [None]:
df_global_full = df_global.merge(
    df_global_artists,
    on="artist_id",
    how="left"
)

df_global_full.head()


In [None]:
# Save to CSV

df_user_full.to_csv("user_listening_data.csv", index=False)
df_global_full.to_csv("global_trends_data.csv", index=False)
