In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()

CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")

auth_manager = SpotifyClientCredentials(CLIENT_ID, CLIENT_SECRET)

spotify = spotipy.Spotify(auth_manager=auth_manager)

In [None]:
"""
Get playlist ids
"""

# this needs to be fixed to get more playlists
playlists = spotify.featured_playlists(locale="en_GB", country="GB", timestamp=None, limit=50, offset=0)
playlist_ids = [playlist.get("id") for playlist in playlists.get("playlists").get("items")]

In [None]:
"""
create data dictionary
"""
data_list = []
playlist_dictionary = dict()

for playlist_id in playlist_ids: 
    
    playlist_tracks = spotify.playlist_tracks(playlist_id)
    playlist_tracks = playlist_tracks.get("items")
    
    playlist = []
        
    for playlist_track in playlist_tracks:
        track_dict = dict()
        # get artist names and ids
        if playlist_track.get('track'):
            artist = [(artist.get('name'), artist.get('id')) for artist in playlist_track.get('track').get("artists")]
            # get song name and ids
            song_name = playlist_track.get('track').get("name")
            song_id = playlist_track.get('track').get("id")
        else:
            artist = None
            song_name, song_id = None, None
        track_dict = {"artist": artist, "song_id": song_id, "song_name": song_name}
        playlist.append(track_dict)
    playlist_dictionary[playlist_id] = playlist
    

In [None]:
"""
create data df
"""
df = pd.DataFrame(columns=['song_name', 'song_id', 'artist', 'playlist'])
for playlist, songs in playlist_dictionary.items():
    for song_info in songs:
        artist = str(song_info['artist'])
        song_name = song_info['song_name']
        song_id = song_info['song_id']
        df = df.append({'playlist': playlist, 'song_name': song_name, 'song_id': song_id, 'artist': artist}, ignore_index=True)

In [None]:
"""
ensure song uniqueness
"""

grouped_df = df.groupby(['song_name', 'song_id', 'artist'])['playlist'].apply(list).reset_index()

In [None]:
grouped_df.song_id.nunique()

In [None]:
grouped_df.sample(10)