In [1]:
import pandas as pd
import json
import os

DATA_DIR = "../data"
OUTPUT_DIR = "../data/lists"
os.makedirs(OUTPUT_DIR, exist_ok=True)

spotify = pd.read_csv(os.path.join(DATA_DIR, "spotify_clean.csv"))
lastfm = pd.read_csv(os.path.join(DATA_DIR, "lastfm_clean.csv"))

spotify['track_genre'] = spotify['track_genre'].fillna("Unknown")

tracks_by_genre = {}
for genre, group in spotify.groupby('track_genre'):
    tracks_by_genre[genre] = group['track_name'].unique().tolist()

with open(os.path.join(OUTPUT_DIR, "available_tracks_by_genre.json"), "w", encoding="utf-8") as f:
    json.dump(tracks_by_genre, f, ensure_ascii=False, indent=2)

print(f"Saved {len(tracks_by_genre)} genres with tracks.")

unique_users = lastfm['Username'].dropna().unique().tolist()

with open(os.path.join(OUTPUT_DIR, "available_users.json"), "w", encoding="utf-8") as f:
    json.dump(unique_users, f, ensure_ascii=False, indent=2)

print(f"Saved {len(unique_users)} unique users.")

print("\nExample genres and tracks:")
for genre, tracks in list(tracks_by_genre.items())[:3]:
    print(f"{genre}: {tracks[:5]}...")

print("\nExample users:", unique_users[:10])

Saved 113 genres with tracks.
Saved 11 unique users.

Example genres and tracks:
acoustic: ['Comedy', 'Ghost - Acoustic', 'To Begin Again', "Can't Help Falling In Love", 'Hold On']...
afrobeat: ['Jireh (My Provider)', 'Ainda Há Tempo', 'Fellini', 'Lion Man', 'Espiral de Ilusão']...
alt-rock: ['Daddy Issues', 'Little Dark Age', 'Softcore', 'Sweater Weather', 'You Get Me So High']...

Example users: ['Babs_05', 'franhale', 'eartle', 'massdosage', 'Knapster01', 'jonocole', 'isaac', 'lobsterclaw', 'jajo', 'mremond']
