# Generating playlist recomendations for Spotify

## Pipeline:
#### Data Preperation --> Data Pre-Processing --> Spotify API Integration --> Generate Playlist Vector --> Generate Recommendations

In [1]:
# Dependencies
import pandas as pd
import re 
import numpy as np
import itertools


In [2]:
!pip install scikit-learn
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


## Data Preperation:

In [3]:
# Load the dataset
# https://www.kaggle.com/datasets/yamaerenay/spotify-dataset-19212020-600k-tracks/data
tracks_df = pd.read_csv('archive/tracks.csv')

In [4]:
tracks_df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,35iwgR4jXetI318WEWsa1Q,Carve,6,126903,0,['Uli'],['45tIt06XoI0Iio4LBEVpls'],1922-02-22,0.645,0.445,0,-13.338,1,0.451,0.674,0.744,0.151,0.127,104.851,3
1,021ht4sdgPcrDgSk7JTbKY,Capítulo 2.16 - Banquero Anarquista,0,98200,0,['Fernando Pessoa'],['14jtPCOoNZwquk5wd9DxrY'],1922-06-01,0.695,0.263,0,-22.136,1,0.957,0.797,0.0,0.148,0.655,102.009,1
2,07A5yehtSnoedViJAZkNnc,Vivo para Quererte - Remasterizado,0,181640,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.434,0.177,1,-21.18,1,0.0512,0.994,0.0218,0.212,0.457,130.418,5
3,08FmqUhxtyLTn6pAh6bk45,El Prisionero - Remasterizado,0,176907,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.321,0.0946,7,-27.961,1,0.0504,0.995,0.918,0.104,0.397,169.98,3
4,08y9GfoqCWfOGsKdwojr5e,Lady of the Evening,0,163080,0,['Dick Haymes'],['3BiJGZsyX9sJchTqcSA7Su'],1922,0.402,0.158,3,-16.9,0,0.039,0.989,0.13,0.311,0.196,103.22,4


In [5]:
# To find even more relevant content, artists.csv contains genres
# Genres should be way more useful especially in a playlist generator
artists_df = pd.read_csv('archive/artists.csv')
artists_df.head()

Unnamed: 0,id,followers,genres,name,popularity
0,0DheY5irMjBUeLybbCUEZ2,0.0,[],Armid & Amir Zare Pashai feat. Sara Rouzbehani,0
1,0DlhY15l3wsrnlfGio2bjU,5.0,[],ปูนา ภาวิณี,0
2,0DmRESX2JknGPQyO15yxg7,0.0,[],Sadaa,0
3,0DmhnbHjm1qw6NCYPeZNgJ,0.0,[],Tra'gruda,0
4,0Dn11fWM7vHQ3rinvWEl4E,2.0,[],Ioannis Panoutsopoulos,0


In [6]:
# We need genres to be in a list format, to check..
artists_df['genres'].values[283100] # random values

"['swedish alternative rock']"

In [7]:
# when indexing this we see its a string that looks like a list
# this has caused issues further in the process so we should fix it now
artists_df['genres'].values[283100][0]


'['

In [8]:
# Use regex to extract the string inside the brackets and convert to a list
artists_df['genres_updated'] = artists_df['genres'].str.extractall(r"'([^']*)'").groupby(level=0).agg(list)


In [9]:
# so now to check
artists_df['genres_updated'].values[283100][0]

'swedish alternative rock'

now that thats sorted, we should merge this genre colomn with the main dataset containing all the other data

In [10]:
# using regex again to split each artist within a song to their own row
tracks_df['artists_updated'] = tracks_df['artists'].apply(lambda x: re.findall(r"'([^']*)'", x))


In [11]:
tracks_df['artists'].values[1245]

"['Louis Armstrong & His Hot Five']"

In [12]:
tracks_df['artists_updated'].values[1245][0]

'Louis Armstrong & His Hot Five'

In [13]:
tracks_df[tracks_df['artists_updated'].apply(lambda x: not x)]

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artists_updated
164,1xEEYhWxT4WhDQdxfPCT8D,Snake Rag,20,194533,0,"[""King Oliver's Creole Jazz Band""]",['08Zk65toyJllap1MnzljxZ'],1923,0.708,0.3610,...,-11.764,0,0.0441,0.9940,0.883,0.1030,0.902,105.695,4,[]
170,3rauXVLOOM5BlxWqUcDpkg,Chimes Blues,14,170827,0,"[""King Oliver's Creole Jazz Band""]",['08Zk65toyJllap1MnzljxZ'],1923,0.546,0.1890,...,-15.984,1,0.0581,0.9960,0.908,0.3390,0.554,80.318,4,[]
172,1UdqHVRFYMZKU2Q7xkLtYc,Pickin' On Your Baby,11,197493,0,"[""Clarence Williams' Blue Five""]",['6RuQvIr0t0otZHnAxXTGkm'],1923,0.520,0.1530,...,-14.042,1,0.0440,0.9950,0.131,0.3530,0.319,102.937,4,[]
174,0Vl2DO5U6FjgBpzCtBN3OA,Everybody Loves My Baby,10,152507,0,"[""Clarence Williams' Blue Five""]",['6RuQvIr0t0otZHnAxXTGkm'],1923,0.514,0.1930,...,-13.920,0,0.2380,0.9960,0.199,0.2480,0.665,180.674,4,[]
180,5SvyP1ZeJX1jA7AOZD08NA,Tears,10,187227,0,"[""King Oliver's Creole Jazz Band""]",['08Zk65toyJllap1MnzljxZ'],1923,0.359,0.3570,...,-11.810,1,0.0511,0.9940,0.819,0.2900,0.753,205.053,4,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
585721,3LrdNQeSEWbHcO06iLv2h1,I'll Sing a Hymn to Mary,11,123995,0,"[""Patrick O'Hagen""]",['1lyT8sdJKWAO5WNX3Cl6SC'],1969-05-01,0.198,0.0766,...,-19.754,1,0.0431,0.9850,0.505,0.1040,0.085,158.869,3,[]
585904,6KzpZArHa1ZOpZRxQhYNoY,Bunga Mawar,5,172440,0,"[""The Mercy's""]",['2QrTiM5ZMIwqn5DQrq9tXc'],1980-01-01,0.545,0.4060,...,-12.144,1,0.0295,0.1520,0.740,0.2220,0.482,123.635,4,[]
586597,6rsm9NTgl9kKPatf7S1yCS,Talk,61,190347,0,"[""Why Don't We""]",['2jnIB6XdLvnJUeNTy5A0J2'],2018-08-31,0.795,0.7000,...,-5.221,0,0.1070,0.0588,0.000,0.0977,0.798,120.002,4,[]
586599,54j7EaJPDmSZYcNYvLSJ78,Trust Fund Baby,66,184460,0,"[""Why Don't We""]",['2jnIB6XdLvnJUeNTy5A0J2'],2018-02-01,0.624,0.7850,...,-3.337,1,0.0498,0.0629,0.000,0.1350,0.593,163.929,4,[]


So it seems like we didn't split all of them because some of the artists have an apostrophe in their title.
They are also are enclosed in a full quotes. We can use another regex to handle this and then combine the two


In [14]:
# extract artists handling both single and double quotes
tracks_df['artists_updated'] = tracks_df['artists'].apply(lambda x: re.findall(r"\'([^']+)\'|\"([^\"]+)\"", x))

# extract tuples where the unmatched group is None, and each artist is a group
# then flatten the list and filter out None entries
tracks_df['artists_updated'] = tracks_df['artists_updated'].apply(lambda x: [i for sub in x for i in sub if i])

In [15]:
# now we create a unique identifier for each track since the dataset apparently has duplicate songs with different ids
# this will be helpful for indexing, searching, or displaying data later

# combine the artist name and song name for the identifer
tracks_df['artists_song'] = tracks_df.apply(
    lambda row: (str(row['artists_updated'][0]) + str(row['name'])) if row['artists_updated'] else "UnknownArtist" + str(row['name']),
    axis=1
)


In [16]:
# just a quick sort
tracks_df.sort_values(['artists_song', 'release_date'], ascending=False, inplace=True)

In [17]:
# and a quick check
tracks_df[tracks_df['name'] == 'Montreal']

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artists_updated,artists_song
107684,3sdUnQiM4xjHsDcS8uSKg3,Montreal,13,265627,0,"['Tony Schilder', 'Gary Granger', 'Richard Pic...","['2ZsAXhbgbc9w8qbsrcrnpJ', '4QThIGdpL7eARyO8Eb...",1995,0.566,0.641,...,1,0.0508,0.00864,0.000443,0.0739,0.692,115.392,4,"[Tony Schilder, Gary Granger, Richard Pickett,...",Tony SchilderMontreal
294056,6fRgjqJptZry7fipP2Beyh,Montreal,56,250520,0,['The Weeknd'],['1Xyo4u8uXC1ZmMpatF05PJ'],2012-11-13,0.497,0.803,...,0,0.132,0.0447,1.8e-05,0.424,0.155,183.883,4,[The Weeknd],The WeekndMontreal
199981,5maosvba9QkQP14u6vtxog,Montreal,59,236336,0,['Roosevelt'],['4AQrqVz6BYwy29iMxcGtx7'],2013-08-19,0.669,0.642,...,1,0.0515,0.00221,0.834,0.203,0.838,115.973,4,[Roosevelt],RooseveltMontreal


In [18]:
# now we can remove duplicates
tracks_df.drop_duplicates('artists_song',inplace = True)


In [19]:
# and check again

tracks_df[tracks_df['name'] == 'Adore You']

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artists_updated,artists_song
86217,5AnCLGg35ziFOloEnXK4uu,Adore You,71,278747,0,['Miley Cyrus'],['5YGY8feqx7naU7z4HrwZM6'],2013-10-04,0.583,0.655,...,1,0.0315,0.111,4e-06,0.113,0.201,119.759,4,[Miley Cyrus],Miley CyrusAdore You
91884,3jjujdWJ72nww5eGnfs2E7,Adore You,88,207133,0,['Harry Styles'],['6KImCVD70vtIoJWnq6nGn3'],2019-12-13,0.676,0.771,...,1,0.0483,0.0237,7e-06,0.102,0.569,99.048,4,[Harry Styles],Harry StylesAdore You


In [20]:
# now to finally merge
artists_df.rename(columns={'id': 'artist_id'}, inplace=True)

tracks_exploded = tracks_df[['artists_updated', 'id', 'acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence', 'popularity', 'key', 'mode']].explode('artists_updated').copy()


In [21]:
artists_df.head()

Unnamed: 0,artist_id,followers,genres,name,popularity,genres_updated
0,0DheY5irMjBUeLybbCUEZ2,0.0,[],Armid & Amir Zare Pashai feat. Sara Rouzbehani,0,
1,0DlhY15l3wsrnlfGio2bjU,5.0,[],ปูนา ภาวิณี,0,
2,0DmRESX2JknGPQyO15yxg7,0.0,[],Sadaa,0,
3,0DmhnbHjm1qw6NCYPeZNgJ,0.0,[],Tra'gruda,0,
4,0Dn11fWM7vHQ3rinvWEl4E,2.0,[],Ioannis Panoutsopoulos,0,


In [22]:
artists_exploded_enriched = tracks_exploded.merge(artists_df, how = 'left', left_on = 'artists_updated',right_on = 'name')
artists_exploded_enriched_nonnull = artists_exploded_enriched[~artists_exploded_enriched.genres_updated.isnull()]

In [23]:
# lets check
artists_exploded_enriched_nonnull.head()
# artists_exploded_enriched_nonnull[artists_exploded_enriched_nonnull['id'] =='6fRgjqJptZry7fipP2Beyh']

Unnamed: 0,artists_updated,id,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,...,valence,popularity_x,key,mode,artist_id,followers,genres,name,popularity_y,genres_updated
0,최진희,3u1C6nWVRoP5F0w8gGrDL3,0.617,0.367,222380,0.194,6e-06,0.162,-19.057,0.04,...,0.367,25,7,1,1NSrAf8XJYJVgAXKoxaMet,788.0,['trot'],최진희,17.0,[trot]
16,김수철,4FG1uHQAVPsYiXIn3tcGRs,0.509,0.456,189245,0.321,1e-06,0.156,-16.682,0.0314,...,0.21,27,4,0,0EmfHeitNJW1MjPBVvy8uR,570.0,['classic korean pop'],김수철,18.0,[classic korean pop]
18,강수지,6p0ecVFjeSu09Ncb1bUEGR,0.382,0.669,235093,0.471,0.0,0.0963,-6.515,0.033,...,0.47,1,7,0,6yvfQfQ8CS9aXvGlyHEdoi,922.0,['classic korean pop'],강수지,22.0,[classic korean pop]
19,龔詩嘉,5RtLa6YhqP7VuKc6vHAdmm,0.94,0.362,259467,0.282,6e-06,0.125,-9.696,0.0326,...,0.263,40,11,1,3YJXJABOwuadyp7MPN9wbi,533.0,"['singaporean mandopop', 'singaporean pop']",龔詩嘉,33.0,"[singaporean mandopop, singaporean pop]"
20,龔詩嘉,2w5L5kQVkSRdduNewYPDA5,0.134,0.45,184120,0.782,0.0,0.0632,-5.905,0.0306,...,0.475,20,0,1,3YJXJABOwuadyp7MPN9wbi,533.0,"['singaporean mandopop', 'singaporean pop']",龔詩嘉,33.0,"[singaporean mandopop, singaporean pop]"


In [24]:
# all thats left is to
# - group by song id
# - combine these lists and output the unique values

artists_genres_consolidated = artists_exploded_enriched_nonnull.groupby('id')['genres_updated'].apply(list).reset_index()


In [25]:
artists_genres_consolidated['consolidates_genre_lists'] = artists_genres_consolidated['genres_updated'].apply(lambda x: list(set(list(itertools.chain.from_iterable(x)))))


In [26]:
artists_genres_consolidated.head()

Unnamed: 0,id,genres_updated,consolidates_genre_lists
0,0004Uy71ku11n3LMpuyf59,[[polish rock]],[polish rock]
1,000CSYu4rvd8cQ7JilfxhZ,"[[country quebecois, rock quebecois]]","[country quebecois, rock quebecois]"
2,000DsoWJKHdaUmhgcnpr8j,[[barnmusik]],[barnmusik]
3,000G1xMMuwxNHmwVsBdtj1,"[[candy pop, new wave, new wave pop, permanent...","[new wave, candy pop, permanent wave, new wave..."
4,000KblXP5csWFFFsD6smOy,"[[chamame, folclore salteno, folklore argentino]]","[chamame, folclore salteno, folklore argentino]"


In [27]:
tracks_df = tracks_df.merge(artists_genres_consolidated[['id','consolidates_genre_lists']], on = 'id',how = 'left')

In [28]:
tracks_df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artists_updated,artists_song,consolidates_genre_lists
0,3u1C6nWVRoP5F0w8gGrDL3,사랑의 미로,25,222380,0,['최진희'],['1NSrAf8XJYJVgAXKoxaMet'],1987-06-01,0.367,0.194,...,0.04,0.617,6e-06,0.162,0.367,144.316,4,[최진희],최진희사랑의 미로,[trot]
1,1Mv4u308L16NZDZiD6HZCy,사랑은 힘든가봐,28,213440,0,['지수'],['4c9QIMfEbIIynuaswyxGx9'],2005-12-23,0.675,0.785,...,0.028,0.379,0.0,0.353,0.623,103.008,4,[지수],지수사랑은 힘든가봐,
2,1jvoY322nxyKXq8OBhgmSY,어떡하죠,44,244360,0,['지선'],['2Mo9NQaNCFCWSR5CnlfmbN'],2011-10-13,0.606,0.341,...,0.0513,0.779,0.0,0.144,0.294,135.667,4,[지선],지선어떡하죠,
3,2ghebdwe2pNXT4eL34T7pW,그아픔까지사랑한거야,32,237688,0,['조정현'],['2WTpsPucygbYRnCnoEUkJQ'],1989-06-15,0.447,0.215,...,0.0272,0.568,1e-06,0.0649,0.177,71.979,4,[조정현],조정현그아픔까지사랑한거야,
4,7rxpWwcXNgDUXl0wN0gUvp,천국의 기억 장정우 Version,31,280372,0,['장정우'],['5L7zKs2ftwENWOMI7LFaN1'],2003-12-24,0.494,0.656,...,0.0262,0.659,7e-06,0.111,0.42,82.003,4,[장정우],장정우천국의 기억 장정우 Version,


## Data Pre-Processing:

In [29]:
# starting by cleaning up dates
tracks_df['year'] = tracks_df['release_date'].apply(lambda x: x.split('-')[0])


In [30]:
# using one hot encoding
# converting each categorical value into a new categorical column and 
# assigning a binary value of 1 or 0

# we set up the columns
float_cols = tracks_df.dtypes[tracks_df.dtypes == 'float64'].index.values
ohe_cols = 'popularity'


In [31]:
tracks_df['popularity'].describe()


count    523466.000000
mean         27.235834
std          18.030364
min           0.000000
25%          13.000000
50%          27.000000
75%          40.000000
max          99.000000
Name: popularity, dtype: float64

In [32]:
# 5 point buckets for popularity 
tracks_df['popularity_red'] = tracks_df['popularity'].apply(lambda x: int(x/5))

# to handle nulls, fill any null values with an empty list
tracks_df['consolidates_genre_lists'] = tracks_df['consolidates_genre_lists'].apply(lambda d: d if isinstance(d, list) else [])

In [33]:
tracks_df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,...,instrumentalness,liveness,valence,tempo,time_signature,artists_updated,artists_song,consolidates_genre_lists,year,popularity_red
0,3u1C6nWVRoP5F0w8gGrDL3,사랑의 미로,25,222380,0,['최진희'],['1NSrAf8XJYJVgAXKoxaMet'],1987-06-01,0.367,0.194,...,6e-06,0.162,0.367,144.316,4,[최진희],최진희사랑의 미로,[trot],1987,5
1,1Mv4u308L16NZDZiD6HZCy,사랑은 힘든가봐,28,213440,0,['지수'],['4c9QIMfEbIIynuaswyxGx9'],2005-12-23,0.675,0.785,...,0.0,0.353,0.623,103.008,4,[지수],지수사랑은 힘든가봐,[],2005,5
2,1jvoY322nxyKXq8OBhgmSY,어떡하죠,44,244360,0,['지선'],['2Mo9NQaNCFCWSR5CnlfmbN'],2011-10-13,0.606,0.341,...,0.0,0.144,0.294,135.667,4,[지선],지선어떡하죠,[],2011,8
3,2ghebdwe2pNXT4eL34T7pW,그아픔까지사랑한거야,32,237688,0,['조정현'],['2WTpsPucygbYRnCnoEUkJQ'],1989-06-15,0.447,0.215,...,1e-06,0.0649,0.177,71.979,4,[조정현],조정현그아픔까지사랑한거야,[],1989,6
4,7rxpWwcXNgDUXl0wN0gUvp,천국의 기억 장정우 Version,31,280372,0,['장정우'],['5L7zKs2ftwENWOMI7LFaN1'],2003-12-24,0.494,0.656,...,7e-06,0.111,0.42,82.003,4,[장정우],장정우천국의 기억 장정우 Version,[],2003,6


In [34]:
# create a one-hot encoded DataFrame from a specified column of an existing DataFrame.
def ohe_prep(df, column, new_name):
    # Generate a DataFrame with one-hot encoded features for the specified column
    tf_df = pd.get_dummies(df[column])
    
    # Get the list of column names created by one-hot encoding
    feature_names = tf_df.columns
    
    # Modify the column names to include the new_name prefix
    tf_df.columns = [new_name + "|" + str(i) for i in feature_names]
    
    # Reset the index of the DataFrame to ensure it starts from 0 and drop the old index
    tf_df.reset_index(drop=True, inplace=True)
    
    return tf_df


In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from scipy import sparse
import pandas as pd

#function to build entire feature set
def create_feature_set(df, float_cols):
    #tfidf genre lists
    tfidf = TfidfVectorizer()
    tfidf_matrix =  tfidf.fit_transform(df['consolidates_genre_lists'].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]
    genre_df.reset_index(drop = True, inplace=True)

    #explicity_ohe = ohe_prep(df, 'explicit','exp')    
    year_ohe = ohe_prep(df, 'year','year') * 0.5
    popularity_ohe = ohe_prep(df, 'popularity_red','pop') * 0.15

    #scale float columns
    floats = df[float_cols].reset_index(drop = True)
    scaler = MinMaxScaler()
    floats_scaled = pd.DataFrame(scaler.fit_transform(floats), columns = floats.columns) * 0.2

    #concanenate all features
    final = pd.concat([genre_df, floats_scaled, popularity_ohe, year_ohe], axis = 1)
     
    #add song id
    final['id']=df['id'].values
    
    return final

In [36]:
# complete_feature_set = create_feature_set(tracks_df, float_cols=float_cols)

In [37]:
# check it
# complete_feature_set.head()

## Spotify API:

In [38]:
import os
import sys
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

# set details
os.environ['client_id'] = '63302bd258834779a755d20a01adb049'
os.environ['client_secret']= '1270c66f93ef4751ba8b3d5af9e2afa8'

In [39]:

client_id = os.getenv('client_id')
client_secret= os.getenv('client_secret')

In [40]:
scope = 'playlist-read-private'
username = 'm3runbpu8cnxud4eivx8knnif'

In [41]:
auth_manager = SpotifyOAuth(scope=scope, client_id=os.getenv('client_id'),
                            client_secret=os.getenv('client_secret'),
                            redirect_uri='http://localhost:8888/')
sp = spotipy.Spotify(auth_manager=auth_manager)

In [42]:
#gather playlist names and images. 
id_name = {}
list_photo = {}
for i in sp.current_user_playlists()['items']:

    id_name[i['name']] = i['uri'].split(':')[2]
    list_photo[i['uri'].split(':')[2]] = i['images'][0]['url']

In [43]:
id_name

{'karims white girl bangers': '0xrMydH7erGRvks0xUHQMA',
 'qusaiy + karim': '37i9dQZF1EJDOlZV0z6jEQ',
 'jibrann + karim': '37i9dQZF1EJIlyNh43yG5u',
 'My playlist #15': '3n0fuTtztD0dAHk994QRUS',
 'My playlist #14': '3SZDOZsLkNQ9LsvzJP1bOo',
 'lcossiop + karim': '37i9dQZF1EJAtRrDTWU2xv',
 '🐢': '2I6ZEgnWKlA037mriAX8ly',
 'My playlist #12': '53nBm5ActLySHyjNFXxF7T',
 '🦫': '2RdFv1t3FrdWlUqKmAXrs2',
 '☀️': '54ivzWwfS4Kb5RD04z79bg',
 'هاوس ميوزك ': '56LaLf4wvmJnNcPgYyzn9k',
 'My playlist #11': '2jGJ4Zx3S0f77JYqsvMlLk',
 'nour’s 19th': '576Fu3d4xKOEYSuj2ZC9G6',
 'karim + 8 others': '37i9dQZF1EJLe0MSgnxLLa',
 'My Playlist #8': '0Dth3RMU7Fl6s6RGylF3kr',
 'Abdulelah + karim': '37i9dQZF1EJxsaPJXeEOlf',
 '#10': '28F5jEiTqvVTnvUfQjsoqK',
 'L@N': '64bMZuaGoEK4lglmj0ylGb',
 'fictional anarchy': '0Z4xoTc1cIf8tG2sAbaiiv',
 'COSMIC GEOMETRY': '7ku8Vsk3diwr43XZPjEPXv',
 '#229': '35I2xtl7rRWxWbfZMHkGkV',
 'bad decrypt': '1z510r6kvjekhDmvY0RMhy',
 '.mp3': '5lrhyDdlvPP2gSEgNuh5tY'}

In [44]:
# Pull all songs from a playlist
def create_necessary_outputs(playlist_name,id_dic, df):
    #generate playlist dataframe
    playlist = pd.DataFrame()
    playlist_name = playlist_name

    for ix, i in enumerate(sp.playlist(id_dic[playlist_name])['tracks']['items']):
        #print(i['track']['artists'][0]['name'])
        playlist.loc[ix, 'artist'] = i['track']['artists'][0]['name']
        playlist.loc[ix, 'name'] = i['track']['name']
        playlist.loc[ix, 'id'] = i['track']['id'] # ['uri'].split(':')[2]
        playlist.loc[ix, 'url'] = i['track']['album']['images'][1]['url']
        playlist.loc[ix, 'date_added'] = i['added_at']

    playlist['date_added'] = pd.to_datetime(playlist['date_added'])  
    
    playlist = playlist[playlist['id'].isin(df['id'].values)].sort_values('date_added',ascending = False)
    
    return playlist

In [55]:
playlist_15 = create_necessary_outputs('playlist_15', id_name,tracks_df)

In [56]:
playlist_15

Unnamed: 0,artist,name,id,url,date_added
24,Clean Bandit,Rather Be (feat. Jess Glynne),61dCUoMCg28qEBzrW6htYM,https://i.scdn.co/image/ab67616d00001e025ef315...,2024-04-10 13:14:46+00:00
22,Flo Rida,Whistle,3bC1ahPIYt1btJzSSEyyrF,https://i.scdn.co/image/ab67616d00001e02871d85...,2024-04-06 13:39:57+00:00
21,Jessie J,Domino,2fQxE0jVrjNMT9oJAXtSJR,https://i.scdn.co/image/ab67616d00001e027805ae...,2024-04-06 13:39:37+00:00
20,Soulja Boy,Kiss Me Thru The Phone,2q4rjDy9WhaN3o9MvDbO21,https://i.scdn.co/image/ab67616d00001e026aef62...,2024-04-06 13:39:16+00:00
18,Rihanna,Umbrella,49FYlytm3dAAraYgpoJZux,https://i.scdn.co/image/ab67616d00001e02f9f271...,2024-04-06 13:39:10+00:00
12,Kesha,Die Young,7EQGXaVSyEDsCWKmUcfpLk,https://i.scdn.co/image/ab67616d00001e0276c5c6...,2024-04-06 13:38:07+00:00
11,Flo Rida,Club Can't Handle Me (feat. David Guetta),6ebkx7Q5tTxrCxKq4GYj0Y,https://i.scdn.co/image/ab67616d00001e02c0ddb3...,2024-04-06 13:37:48+00:00
5,Taylor Swift,I Knew You Were Trouble.,6FB3v4YcR57y4tXFcdxI1E,https://i.scdn.co/image/ab67616d00001e021c248e...,2024-04-06 13:37:17+00:00
