In [45]:
import pandas as pd
import csv
from spotipy import Spotify
from spotipy.oauth2 import SpotifyClientCredentials
from config import *

In [46]:
# The following code uses the kaggle spotify dataset from https://www.kaggle.com/datasets/mrmorj/dataset-of-songs-in-spotify/
# This csv file downloaded from kaggle has track information including track ids, and song title
# The author is missing from the csv
# We can use track ids from the csv file to query spotify api and get latest track name and artist info for the song
# The dataset is collected for 3000 songs
# Note this will take a couple of minutes, so build only when needed
# The results are store in a csv file not_so_hot.csv
sp = Spotify(
    auth_manager=SpotifyClientCredentials(
        client_id=client_id, client_secret=client_secret
    )
)

def get_track_ids_from_csv(path: str) -> list[str]:
    track_ids = []
    with open(path) as csv_file:
        csv_reader = csv.DictReader(csv_file, delimiter=',')
        for row in csv_reader:
            track_ids.append(row["uri"])
    return track_ids

def get_track_info(track_ids: list[str]) -> list[dict[str, str]]:
    tracks = []
    for track_id in track_ids:
        track = sp.track(track_id)
        if track:
            title = track.get("name", "")
            artist = ""
            artists = track.get("artists", []) or []
            if artists:
                artist = artists[0].get("name", "")
            if title and artist:
                tracks.append({"Artist": artist, "Title": title})
    return tracks

def write_csv(path: str, content: list[dict[str, str]]) -> None:
    with open(path, mode='w') as csv_file:
        fieldnames = ['Artist', 'Title']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        for c in content:
            writer.writerow(c)

# track_ids = get_track_ids_from_csv("kaggle_spotify_v2_reduced.csv")
# tracks = get_track_info(track_ids)
# write_csv("not_so_hot.csv", tracks)

In [47]:
not_hot100 = pd.read_csv("./not_so_hot.csv")
display(not_hot100)

Unnamed: 0,Artist,Title
0,Ghostemane,Mercury: Retrograde
1,Don Kenobi,Pathology
2,gizmo,Symbiote
3,Kamiyada+,ProductOfDrugs (Prod. The Virus and Antidote)
4,$uicideboy$,Venom
...,...,...
2995,Tomkillsjerry,Focus
2996,Tomkillsjerry,The Cure
2997,Tomkillsjerry,Blocks
2998,Tomkillsjerry,Liu Kang


In [48]:
not_hot100 = not_hot100.sample(2500)
display(not_hot100)

Unnamed: 0,Artist,Title
2215,Ghostemane,Avatar
2014,Lil Tracy,Pictures
1800,Germ,We Outside
2101,Rich Brian,Gospel
2284,BONES,WhereTheTreesMeetTheFreeway
...,...,...
556,ONI INC.,Bad Blood 2
1959,Wicca Phase Springs Eternal,Dead Star
2402,Pop Smoke,Gangstas
1965,Lil Peep,shiver


In [49]:
hot100 = pd.read_csv('hot100.csv')
hot100

Unnamed: 0,Artist,Title
0,Doja Cat,Paint The Town Red
1,SZA,Snooze
2,Taylor Swift,Cruel Summer
3,Luke Combs,Fast Car
4,Jung Kook & Jack Harlow,3D
...,...,...
95,Rod Wave,Long Journey
96,Luke Bryan,But I Got A Beer In My Hand
97,Peso Pluma,Rubicon
98,Zach Bryan,East Side Of Sorrow


In [50]:
int_df = pd.merge(not_hot100, hot100, how='inner', on=['Artist', 'Title']) 
print(int_df)

Empty DataFrame
Columns: [Artist, Title]
Index: []
