In [2]:
import pandas as pd
import numpy as np
import json
import re
import sys
import itertools

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

import warnings
warnings.filterwarnings("ignore")

In [3]:
%matplotlib inline

## **DATA** **EXPLORATION**

In [4]:
spotify_df = pd.read_csv('data.csv')

In [5]:
spotify_df.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [6]:
data_w_genre = pd.read_csv('data_w_genres.csv')
data_w_genre.head()

Unnamed: 0,genres,artists,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key,mode,count
0,['show tunes'],"""Cats"" 1981 Original London Cast",0.590111,0.467222,250318.555556,0.394003,0.0114,0.290833,-14.448,0.210389,117.518111,0.3895,38.333333,5,1,9
1,[],"""Cats"" 1983 Broadway Cast",0.862538,0.441731,287280.0,0.406808,0.081158,0.315215,-10.69,0.176212,103.044154,0.268865,30.576923,5,1,26
2,[],"""Fiddler On The Roof” Motion Picture Chorus",0.856571,0.348286,328920.0,0.286571,0.024593,0.325786,-15.230714,0.118514,77.375857,0.354857,34.857143,0,1,7
3,[],"""Fiddler On The Roof” Motion Picture Orchestra",0.884926,0.425074,262890.962963,0.24577,0.073587,0.275481,-15.63937,0.1232,88.66763,0.37203,34.851852,0,1,27
4,[],"""Joseph And The Amazing Technicolor Dreamcoat""...",0.510714,0.467143,270436.142857,0.488286,0.0094,0.195,-10.236714,0.098543,122.835857,0.482286,43.0,5,1,7


In [7]:
data_w_genre.dtypes

genres               object
artists              object
acousticness        float64
danceability        float64
duration_ms         float64
energy              float64
instrumentalness    float64
liveness            float64
loudness            float64
speechiness         float64
tempo               float64
valence             float64
popularity          float64
key                   int64
mode                  int64
count                 int64
dtype: object

In [8]:
data_w_genre['genres'].values[0]

"['show tunes']"

In [9]:
data_w_genre['genres'].values[0][0]

'['

In [10]:
data_w_genre['genres_upd'] = data_w_genre['genres'].apply(lambda x: [re.sub(' ','_',i) for i in re.findall(r"'([^']*)'", x)])

In [11]:
data_w_genre['genres_upd'].values[0][0]

'show_tunes'

In [12]:
spotify_df['artists_upd_v1'] = spotify_df['artists'].apply(lambda x: re.findall(r"'([^']*)'", x))

In [13]:
spotify_df['artists'].values[0]

"['Sergei Rachmaninoff', 'James Levine', 'Berliner Philharmoniker']"

In [14]:
spotify_df['artists_upd_v1'].values[0][0]

'Sergei Rachmaninoff'

In [15]:
spotify_df[spotify_df['artists_upd_v1'].apply(lambda x: not x)].head(5)

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,artists_upd_v1
143,0.3,1921,0.772,"[""Scarlet D'Carpio""]",0.56,249370,0.313,0,7b4eHImKQ51DYaQvNTdtEp,5e-06,6,0.115,-8.346,0,Himno Nacional del Perú,0,1921-09-23,0.0376,107.501,[]
234,0.902,1923,0.994,"[""King Oliver's Creole Jazz Band""]",0.708,194533,0.361,0,1xEEYhWxT4WhDQdxfPCT8D,0.883,0,0.103,-11.764,0,Snake Rag,20,1923,0.0441,105.695,[]
238,0.554,1923,0.996,"[""King Oliver's Creole Jazz Band""]",0.546,170827,0.189,0,3rauXVLOOM5BlxWqUcDpkg,0.908,0,0.339,-15.984,1,Chimes Blues,13,1923,0.0581,80.318,[]
244,0.319,1923,0.995,"[""Clarence Williams' Blue Five""]",0.52,197493,0.153,0,1UdqHVRFYMZKU2Q7xkLtYc,0.131,0,0.353,-14.042,1,Pickin' On Your Baby,11,1923,0.044,102.937,[]
249,0.753,1923,0.994,"[""King Oliver's Creole Jazz Band""]",0.359,187227,0.357,0,5SvyP1ZeJX1jA7AOZD08NA,0.819,3,0.29,-11.81,1,Tears,10,1923,0.0511,205.053,[]


In [16]:
spotify_df['artists_upd_v2'] = spotify_df['artists'].apply(lambda x: re.findall('\"(.*?)\"',x))
spotify_df['artists_upd'] = np.where(spotify_df['artists_upd_v1'].apply(lambda x: not x), spotify_df['artists_upd_v2'], spotify_df['artists_upd_v1'] )

In [17]:
spotify_df['artists_song'] = spotify_df.apply(lambda row: row['artists_upd'][0]+row['name'],axis = 1)

In [18]:
spotify_df.sort_values(['artists_song','release_date'], ascending = False, inplace = True)

In [19]:
spotify_df[spotify_df['name']=='Blank Space']

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,...,mode,name,popularity,release_date,speechiness,tempo,artists_upd_v1,artists_upd_v2,artists_upd,artists_song
18424,0.583,2014,0.085,['Taylor Swift'],0.752,231827,0.678,0,1p80LdxRV74UKvL8gnD7ky,2e-06,...,1,Blank Space,71,2014-10-27,0.0646,96.009,[Taylor Swift],[],[Taylor Swift],Taylor SwiftBlank Space
37458,0.57,2014,0.103,['Taylor Swift'],0.76,231827,0.703,0,2sC2P3BN0IXujNaaSyDmtP,0.0,...,1,Blank Space,58,2014-10-27,0.054,95.997,[Taylor Swift],[],[Taylor Swift],Taylor SwiftBlank Space
73916,0.57,2014,0.103,['Taylor Swift'],0.76,231827,0.703,0,1kHEuJRasudLhjvnbfc4yS,0.0,...,1,Blank Space,59,2014-10-27,0.054,95.997,[Taylor Swift],[],[Taylor Swift],Taylor SwiftBlank Space
18602,0.233,2014,0.00302,['I Prevail'],0.437,240928,0.863,0,2ZiJidFdQ30nVJEP4u44l3,0.0,...,0,Blank Space,65,2014-12-16,0.0941,194.073,[I Prevail],[],[I Prevail],I PrevailBlank Space
169339,0.233,2014,0.00302,['I Prevail'],0.437,240928,0.863,0,076jgZgkx4YiJ4q0dN3Xsl,0.0,...,0,Blank Space,48,2014-12-16,0.0941,194.073,[I Prevail],[],[I Prevail],I PrevailBlank Space


In [20]:
spotify_df.drop_duplicates('artists_song',inplace = True)

In [21]:
spotify_df[spotify_df['name']=='Blank Space']


Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,...,mode,name,popularity,release_date,speechiness,tempo,artists_upd_v1,artists_upd_v2,artists_upd,artists_song
18424,0.583,2014,0.085,['Taylor Swift'],0.752,231827,0.678,0,1p80LdxRV74UKvL8gnD7ky,2e-06,...,1,Blank Space,71,2014-10-27,0.0646,96.009,[Taylor Swift],[],[Taylor Swift],Taylor SwiftBlank Space
18602,0.233,2014,0.00302,['I Prevail'],0.437,240928,0.863,0,2ZiJidFdQ30nVJEP4u44l3,0.0,...,0,Blank Space,65,2014-12-16,0.0941,194.073,[I Prevail],[],[I Prevail],I PrevailBlank Space


In [22]:
artists_exploded = spotify_df[['artists_upd','id']].explode('artists_upd')

In [23]:
artists_exploded_enriched = artists_exploded.merge(data_w_genre, how = 'left', left_on = 'artists_upd',right_on = 'artists')
artists_exploded_enriched_nonnull = artists_exploded_enriched[~artists_exploded_enriched.genres_upd.isnull()]

In [24]:
artists_exploded_enriched_nonnull[artists_exploded_enriched_nonnull['id'] =='2ZiJidFdQ30nVJEP4u44l3']

Unnamed: 0,artists_upd,id,genres,artists,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key,mode,count,genres_upd
124349,I Prevail,2ZiJidFdQ30nVJEP4u44l3,['nu-metalcore'],I Prevail,0.080242,0.482333,211502.416667,0.844306,0.002255,0.309964,-5.442139,0.078786,144.24425,0.329742,58.944444,11.0,0.0,36.0,[nu-metalcore]


In [25]:
artists_genres_consolidated = artists_exploded_enriched_nonnull.groupby('id')['genres_upd'].apply(list).reset_index()

In [26]:
artists_genres_consolidated['consolidates_genre_lists'] = artists_genres_consolidated['genres_upd'].apply(lambda x: list(set(list(itertools.chain.from_iterable(x)))))

In [27]:
artists_genres_consolidated.head()

Unnamed: 0,id,genres_upd,consolidates_genre_lists
0,000G1xMMuwxNHmwVsBdtj1,"[[candy_pop, dance_rock, new_wave, new_wave_po...","[new_wave, power_pop, new_wave_pop, dance_rock..."
1,000GyYHG4uWmlXieKLij8u,"[[alternative_hip_hop, conscious_hip_hop, minn...","[conscious_hip_hop, pop_rap, alternative_hip_h..."
2,000Npgk5e2SgwGaIsN3ztv,"[[classic_bollywood, classic_pakistani_pop, fi...","[sufi, classic_bollywood, classic_pakistani_po..."
3,000ZxLGm7jDlWCHtcXSeBe,"[[boogie-woogie, piano_blues, ragtime, stride]]","[ragtime, boogie-woogie, piano_blues, stride]"
4,000jBcNljWTnyjB4YO7ojf,[[]],[]


In [28]:
spotify_df = spotify_df.merge(artists_genres_consolidated[['id','consolidates_genre_lists']], on = 'id',how = 'left')

##**FEATURE ENGINEERING**



*  *Normalizing Float Variables*
*   *One Hot Encoding Variables - Year & Popularity*
*   *Creating TF-IDF features off of artist genres*






In [29]:
spotify_df.tail()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,...,name,popularity,release_date,speechiness,tempo,artists_upd_v1,artists_upd_v2,artists_upd,artists_song,consolidates_genre_lists
156602,0.768,1997,0.282,"[""Lil' Kim"", ""Lil' Cease""]",0.748,275947,0.693,0,2LP2uDQQ7eLMcUVE4aOpAV,0.0,...,Crush on You (feat. Lil' Cease) - Remix,56,1997-06-30,0.278,88.802,"[ Kim"", ""Lil]","[Lil' Kim, Lil' Cease]","[ Kim"", ""Lil]","Kim"", ""LilCrush on You (feat. Lil' Cease) - R...",
156603,0.792,2004,0.0248,"[""Lil' Flip"", 'Lea']",0.814,225173,0.387,1,4s0o8TJHfX9LLHa0umnOzT,0.0,...,Sunshine (feat. Lea),62,2004-03-30,0.0945,93.961,"[ Flip"", ]",[Lil' Flip],"[ Flip"", ]","Flip"", Sunshine (feat. Lea)",
156604,0.697,1999,0.0516,"[""Ol' Dirty Bastard"", 'Kelis', 'Rich Travali']",0.934,239547,0.459,1,6YYd5MLpu45J0uLrMdivF7,0.0,...,Got Your Money (feat. Kelis),66,1999,0.189,103.04,"[ Dirty Bastard"", , , ]",[Ol' Dirty Bastard],"[ Dirty Bastard"", , , ]","Dirty Bastard"", Got Your Money (feat. Kelis)",
156605,0.429,1994,0.0249,"[""World Class Wreckin' Cru"", ""Michel 'Le""]",0.715,351040,0.49,0,3hoiinUc5VA9xUEJID7R8V,0.00017,...,Turn Off The Lights - Rap,36,1994-04-06,0.0479,129.309,"[ Cru"", ""Michel ]","[World Class Wreckin' Cru, Michel 'Le]","[ Cru"", ""Michel ]","Cru"", ""Michel Turn Off The Lights - Rap",
156606,0.273,1996,0.0113,"[""Rappin' 4-Tay"", 'MC Breed', 'Too $hort']",0.897,337973,0.414,1,78859Af0fmA9VTlgnOHTAP,0.00011,...,Never Talk Down,35,1996,0.246,96.039,"[ 4-Tay"", , , ]",[Rappin' 4-Tay],"[ 4-Tay"", , , ]","4-Tay"", Never Talk Down",


In [30]:

spotify_df['year'] = spotify_df['release_date'].apply(lambda x: x.split('-')[0])

In [31]:
float_cols = spotify_df.dtypes[spotify_df.dtypes == 'float64'].index.values

In [32]:
ohe_cols = 'popularity'

In [34]:
spotify_df['popularity'].describe()

count    156607.000000
mean         31.307215
std          21.712234
min           0.000000
25%          11.000000
50%          33.000000
75%          48.000000
max         100.000000
Name: popularity, dtype: float64

In [33]:
#creating 5 point buckets for popularity
spotify_df['popularity_red'] = spotify_df['popularity'].apply(lambda x: int(x/5))

In [34]:
spotify_df['consolidates_genre_lists'] = spotify_df['consolidates_genre_lists'].apply(lambda d: d if isinstance(d, list) else [])

In [37]:
spotify_df.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,...,popularity,release_date,speechiness,tempo,artists_upd_v1,artists_upd_v2,artists_upd,artists_song,consolidates_genre_lists,popularity_red
0,0.177,1989,0.568,['조정현'],0.447,237688,0.215,0,2ghebdwe2pNXT4eL34T7pW,1e-06,...,31,1989-06-15,0.0272,71.979,[조정현],[],[조정현],조정현그아픔까지사랑한거야,[classic_korean_pop],6
1,0.352,1992,0.381,['黑豹'],0.353,316160,0.686,0,3KIuCzckjdeeVuswPo20mC,0.0,...,35,1992-12-22,0.0395,200.341,[黑豹],[],[黑豹],黑豹DON'T BREAK MY HEART,"[chinese_indie_rock, chinese_indie]",7
2,0.458,1963,0.987,['黃國隆'],0.241,193480,0.0437,0,4prhqrLXYMjHJ6vpRAlasx,0.000453,...,23,1963-05-28,0.0443,85.936,[黃國隆],[],[黃國隆],黃國隆藝旦調,[],4
3,0.796,1963,0.852,"['黃國隆', '王秋玉']",0.711,145720,0.111,0,5xFXTvnEe03SyvFpo6pEaE,0.0,...,23,1963-05-28,0.0697,124.273,"[黃國隆, 王秋玉]",[],"[黃國隆, 王秋玉]",黃國隆草螟弄雞公,[],4
4,0.704,1963,0.771,['黃國隆'],0.61,208760,0.175,0,6Pqs2suXEqCGx7Lxg5dlrB,0.0,...,23,1963-05-28,0.0419,124.662,[黃國隆],[],[黃國隆],黃國隆思想起,[],4


In [35]:
def ohe_prep(df, column, new_name):
    """
    Create One Hot Encoded features of a specific column

    Parameters:
        df (pandas dataframe): Spotify Dataframe
        column (str): Column to be processed
        new_name (str): new column name to be used

    Returns:
        tf_df: One hot encoded features
    """

    tf_df = pd.get_dummies(df[column])
    feature_names = tf_df.columns
    tf_df.columns = [new_name + "|" + str(i) for i in feature_names]
    tf_df.reset_index(drop = True, inplace = True)
    return tf_df

# function to build entire feature set
def create_feature_set(df, float_cols):
    """
    Process spotify df to create a final set of features that will be used to generate recommendations

    Parameters:
        df (pandas dataframe): Spotify Dataframe
        float_cols (list(str)): List of float columns that will be scaled

    Returns:
        final: final set of features
    """

    # tfidf genre lists
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(df['consolidates_genre_lists'].apply(lambda x: " ".join(x)))
    genre_df = pd.DataFrame(tfidf_matrix.toarray())
    genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]
    genre_df.reset_index(drop = True, inplace=True)

    # explicity_ohe = ohe_prep(df, 'explicit','exp')
    year_ohe = ohe_prep(df, 'year', 'year') * 0.5
    popularity_ohe = ohe_prep(df, 'popularity_red', 'pop') * 0.15

    # scale float columns
    floats = df[float_cols].reset_index(drop = True)
    scaler = MinMaxScaler()
    floats_scaled = pd.DataFrame(scaler.fit_transform(floats), columns=floats.columns) * 0.2

    # concatenate all features
    final = pd.concat([genre_df, floats_scaled, popularity_ohe, year_ohe], axis=1)
    # add song id
    final['id'] = df['id'].values

    return final

complete_feature_set = create_feature_set(spotify_df, float_cols=float_cols)

In [39]:
complete_feature_set.head()

Unnamed: 0,genre|21st_century_classical,genre|432hz,genre|_hip_hop,genre|a_cappella,genre|abstract,genre|abstract_beats,genre|abstract_hip_hop,genre|accordeon,genre|accordion,genre|acid_house,...,year|2012,year|2013,year|2014,year|2015,year|2016,year|2017,year|2018,year|2019,year|2020,id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2ghebdwe2pNXT4eL34T7pW
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3KIuCzckjdeeVuswPo20mC
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4prhqrLXYMjHJ6vpRAlasx
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5xFXTvnEe03SyvFpo6pEaE
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6Pqs2suXEqCGx7Lxg5dlrB


##**Connecting to Spotify API**

In [36]:
import dotenv
from dotenv import load_dotenv
import os
load_dotenv()

False

In [37]:
os.environ['SPOTIPY_CLIENT_ID'] = '38f6468c27f7440c9d49685b59215c92'
os.environ['SPOTIPY_CLIENT_SECRET'] = '90e613bba6a64c11bf2ea057a4345a70'
os.environ['SPOTIPY_REDIRECT_URI'] = 'http://localhost:8888/callback'

# Initialize SpotifyOAuth object
sp_oauth = SpotifyOAuth(scope='user-library-read playlist-read-private')

# Get the authorization URL
auth_url = sp_oauth.get_authorize_url()
print(auth_url)


https://accounts.spotify.com/authorize?client_id=38f6468c27f7440c9d49685b59215c92&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8888%2Fcallback&scope=user-library-read+playlist-read-private


In [38]:
import os
from spotipy import Spotify
from spotipy.oauth2 import SpotifyOAuth

In [40]:
# Paste the authorization code here
auth_code = 'AQBwoLfk4s8BszUFW_mhJEpOVW6Wz0wuD1ZvfBp82vz95eCO_GmtIiUaZCe1cAT-In6ELeAbgxbm2LZMZRrzJWR1feFNZOibgQR0UscVcpfccIUpbGNXjImbUS3jKDtLRPytDSqQgDlrh1b-hJe4qK0SMFhMlOqltXebxLNj1GsEuQwPj3Y_UzKTMo5HACi_RRjc3v1_jVEcyt96V8KSF32fLKWVId1FpZCZng'

# Get the access token
token_info = sp_oauth.get_access_token(auth_code)
token = token_info['access_token']
sp = Spotify(auth=token)


In [41]:
# Fetch user playlists
playlists = sp.current_user_playlists()

# Extract playlist names and images
id_name = {}
list_photo = {}

for i in playlists['items']:
    playlist_id = i['uri'].split(':')[2]
    id_name[i['name']] = playlist_id
    if i['images']:
        list_photo[playlist_id] = i['images'][0]['url']
    else:
        list_photo[playlist_id] = None

print("Playlist Names and IDs:", id_name)
print("Playlist Photos:", list_photo)


Playlist Names and IDs: {'My recommendation playlist': '731DxGAOLjDyUVrnatDKNH', 'This Is Taylor Swift': '37i9dQZF1DX5KpP2LN299J', 'All Out 10s Telugu': '37i9dQZF1DX0alDVD4MY0X', 'Pov: yeto vellipoindi manasu ': '37i9dQZF1DWYDZPxJIRDxJ', 'Pop Mix': '37i9dQZF1EQncLwOalG3K7', 'WhyBhanshu Music': '036SD1KdRpgyQ5hHcf7Hxv', 'twilight vibes': '37i9dQZF1DX7rFF5HEchjS', "Twilight 'Forever' Love Songs From the Twilight Saga": '0xxcXwehRYVQ4PjjrU5Z9w', 'Your Top Songs 2022': '37i9dQZF1F0sijgNaJdgit', '2010s Mix': '37i9dQZF1EQqedj0y9Uwvu', "All Out 00's Hindi": '37i9dQZF1DWZNJXX2UeBij', "00's Love Hits": '37i9dQZF1DWVq1SXCH6uFn', 'Mix Pritam': '37i9dQZF1EIWQ7iXYvVC5w', 'This Is Mohit Chauhan': '37i9dQZF1DZ06evO3lynVo', 'TELUGU BEST MELODY \U0001faf6': '3p2tAkOvJqOPYjttRxEQD5', 'Telugu songs': '42OEmH47P5WzXoZ4MvbNtJ'}
Playlist Photos: {'731DxGAOLjDyUVrnatDKNH': 'https://mosaic.scdn.co/640/ab67616d00001e020cc8944a7cc2700e43cfbdd0ab67616d00001e0244781fed7555fc6764c3ee26ab67616d00001e027653c310bcee9

In [42]:
id_name

{'My recommendation playlist': '731DxGAOLjDyUVrnatDKNH',
 'This Is Taylor Swift': '37i9dQZF1DX5KpP2LN299J',
 'All Out 10s Telugu': '37i9dQZF1DX0alDVD4MY0X',
 'Pov: yeto vellipoindi manasu ': '37i9dQZF1DWYDZPxJIRDxJ',
 'Pop Mix': '37i9dQZF1EQncLwOalG3K7',
 'WhyBhanshu Music': '036SD1KdRpgyQ5hHcf7Hxv',
 'twilight vibes': '37i9dQZF1DX7rFF5HEchjS',
 "Twilight 'Forever' Love Songs From the Twilight Saga": '0xxcXwehRYVQ4PjjrU5Z9w',
 'Your Top Songs 2022': '37i9dQZF1F0sijgNaJdgit',
 '2010s Mix': '37i9dQZF1EQqedj0y9Uwvu',
 "All Out 00's Hindi": '37i9dQZF1DWZNJXX2UeBij',
 "00's Love Hits": '37i9dQZF1DWVq1SXCH6uFn',
 'Mix Pritam': '37i9dQZF1EIWQ7iXYvVC5w',
 'This Is Mohit Chauhan': '37i9dQZF1DZ06evO3lynVo',
 'TELUGU BEST MELODY \U0001faf6': '3p2tAkOvJqOPYjttRxEQD5',
 'Telugu songs': '42OEmH47P5WzXoZ4MvbNtJ'}

In [43]:
def create_necessary_outputs(playlist_name,id_dic, df):
    """
    Pull songs from a specific playlist.

    Parameters:
        playlist_name (str): name of the playlist you'd like to pull from the spotify API
        id_dic (dic): dictionary that maps playlist_name to playlist_id
        df (pandas dataframe): spotify datafram

    Returns:
        playlist: all songs in the playlist THAT ARE AVAILABLE IN THE KAGGLE DATASET
    """

    #generate playlist dataframe
    playlist = pd.DataFrame()
    playlist_name = playlist_name

    for ix, i in enumerate(sp.playlist(id_dic[playlist_name])['tracks']['items']):
        #print(i['track']['artists'][0]['name'])
        playlist.loc[ix, 'artist'] = i['track']['artists'][0]['name']
        playlist.loc[ix, 'name'] = i['track']['name']
        playlist.loc[ix, 'id'] = i['track']['id'] # ['uri'].split(':')[2]
        playlist.loc[ix, 'url'] = i['track']['album']['images'][1]['url']
        playlist.loc[ix, 'date_added'] = i['added_at']

    playlist['date_added'] = pd.to_datetime(playlist['date_added'])

    playlist = playlist[playlist['id'].isin(df['id'].values)].sort_values('date_added',ascending = False)

    return playlist

In [44]:
id_name

{'My recommendation playlist': '731DxGAOLjDyUVrnatDKNH',
 'This Is Taylor Swift': '37i9dQZF1DX5KpP2LN299J',
 'All Out 10s Telugu': '37i9dQZF1DX0alDVD4MY0X',
 'Pov: yeto vellipoindi manasu ': '37i9dQZF1DWYDZPxJIRDxJ',
 'Pop Mix': '37i9dQZF1EQncLwOalG3K7',
 'WhyBhanshu Music': '036SD1KdRpgyQ5hHcf7Hxv',
 'twilight vibes': '37i9dQZF1DX7rFF5HEchjS',
 "Twilight 'Forever' Love Songs From the Twilight Saga": '0xxcXwehRYVQ4PjjrU5Z9w',
 'Your Top Songs 2022': '37i9dQZF1F0sijgNaJdgit',
 '2010s Mix': '37i9dQZF1EQqedj0y9Uwvu',
 "All Out 00's Hindi": '37i9dQZF1DWZNJXX2UeBij',
 "00's Love Hits": '37i9dQZF1DWVq1SXCH6uFn',
 'Mix Pritam': '37i9dQZF1EIWQ7iXYvVC5w',
 'This Is Mohit Chauhan': '37i9dQZF1DZ06evO3lynVo',
 'TELUGU BEST MELODY \U0001faf6': '3p2tAkOvJqOPYjttRxEQD5',
 'Telugu songs': '42OEmH47P5WzXoZ4MvbNtJ'}

In [45]:
playlist_Mine = create_necessary_outputs('Your Top Songs 2022', id_name,spotify_df)
#playlist_chill = create_necessary_outputs('chill',id_name, spotify_df)
#playlist_classical = create_necessary_outputs('Epic Classical',id_name, spotify_df)

In [46]:
from skimage import io
import matplotlib.pyplot as plt

def visualize_songs(df):
    """
    Visualize cover art of the songs in the inputted dataframe

    Parameters:
        df (pandas dataframe): Playlist Dataframe
    """

    temp = df['url'].values
    plt.figure(figsize=(15,int(0.625 * len(temp))))
    columns = 5

    for i, url in enumerate(temp):
        plt.subplot(len(temp) / columns + 1, columns, i + 1)

        image = io.imread(url)
        plt.imshow(image)
        plt.xticks(color = 'w', fontsize = 0.1)
        plt.yticks(color = 'w', fontsize = 0.1)
        plt.xlabel(df['name'].values[i], fontsize = 12)
        plt.tight_layout(h_pad=0.4, w_pad=0)
        plt.subplots_adjust(wspace=None, hspace=None)

    plt.show()

In [47]:
playlist_Mine

Unnamed: 0,artist,name,id,url,date_added
28,Taylor Swift,Delicate,6NFyWDv5CjfwuzoCkw47Xf,https://i.scdn.co/image/ab67616d00001e02da5d5a...,1970-01-01 00:00:00+00:00
32,Sabrina Carpenter,"Let Me Move You - From the Netflix film ""Work It""",0roOLcll6SSTYZwsQFmXqP,https://i.scdn.co/image/ab67616d00001e026c9d6c...,1970-01-01 00:00:00+00:00
41,Sukhwinder Singh,Chaiyya Chaiyya,5H4rKylLnO8KrmdXTRhj5s,https://i.scdn.co/image/ab67616d00001e024e2aa9...,1970-01-01 00:00:00+00:00
44,Sachin-Jigar,Saibo,6udC4b4jOSnHb9ItnXgKLR,https://i.scdn.co/image/ab67616d00001e021aac20...,1970-01-01 00:00:00+00:00
61,Rashid Ali,Kabhi Kabhi Aditi,3APdIdF8H0jsxSuGOqXedS,https://i.scdn.co/image/ab67616d00001e02abf918...,1970-01-01 00:00:00+00:00
68,Taylor Swift,august,3hUxzQpSfdDqwM3ZTFQY0K,https://i.scdn.co/image/ab67616d00001e0295f754...,1970-01-01 00:00:00+00:00
77,Ruth B.,Dandelions,2eAvDnpXP5W0cVtiI0PUxV,https://i.scdn.co/image/ab67616d00001e0297e971...,1970-01-01 00:00:00+00:00
78,Ed Sheeran,Shape of You,7qiZfU4dY1lWllzX7mPBI3,https://i.scdn.co/image/ab67616d00001e02ba5db4...,1970-01-01 00:00:00+00:00
80,Taylor Swift,Dancing With Our Hands Tied,7I7JbDv63ZJJsSi24DyJrz,https://i.scdn.co/image/ab67616d00001e02da5d5a...,1970-01-01 00:00:00+00:00
85,Taylor Swift,...Ready For It?,2yLa0QULdQr0qAIvVwN6B5,https://i.scdn.co/image/ab67616d00001e02da5d5a...,1970-01-01 00:00:00+00:00


##**CREATING PLAYLIST VECTOR**

In [48]:
def generate_playlist_feature(complete_feature_set, playlist_df, weight_factor):
    """
    Summarize a user's playlist into a single vector

    Parameters:
        complete_feature_set (pandas dataframe): Dataframe which includes all of the features for the spotify songs
        playlist_df (pandas dataframe): playlist dataframe
        weight_factor (float): float value that represents the recency bias. The larger the recency bias, the most priority recent songs get. Value should be close to 1.

    Returns:
        playlist_feature_set_weighted_final (pandas series): single feature that summarizes the playlist
        complete_feature_set_nonplaylist (pandas dataframe):
    """

    complete_feature_set_playlist = complete_feature_set[complete_feature_set['id'].isin(playlist_df['id'].values)]#.drop('id', axis = 1).mean(axis =0)
    complete_feature_set_playlist = complete_feature_set_playlist.merge(playlist_df[['id','date_added']], on = 'id', how = 'inner')
    complete_feature_set_nonplaylist = complete_feature_set[~complete_feature_set['id'].isin(playlist_df['id'].values)]#.drop('id', axis = 1)

    playlist_feature_set = complete_feature_set_playlist.sort_values('date_added',ascending=False)

    most_recent_date = playlist_feature_set.iloc[0,-1]

    for ix, row in playlist_feature_set.iterrows():
        playlist_feature_set.loc[ix,'months_from_recent'] = int((most_recent_date.to_pydatetime() - row.iloc[-1].to_pydatetime()).days / 30)

    playlist_feature_set['weight'] = playlist_feature_set['months_from_recent'].apply(lambda x: weight_factor ** (-x))

    playlist_feature_set_weighted = playlist_feature_set.copy()
    #print(playlist_feature_set_weighted.iloc[:,:-4].columns)
    playlist_feature_set_weighted.update(playlist_feature_set_weighted.iloc[:,:-4].mul(playlist_feature_set_weighted.weight,0))
    playlist_feature_set_weighted_final = playlist_feature_set_weighted.iloc[:, :-4]
    #playlist_feature_set_weighted_final['id'] = playlist_feature_set['id']

    return playlist_feature_set_weighted_final.sum(axis = 0), complete_feature_set_nonplaylist

In [49]:
complete_feature_set_playlist_vector_Mine, complete_feature_set_nonplaylist_Mine = generate_playlist_feature(complete_feature_set, playlist_Mine, 1.09)
#complete_feature_set_playlist_vector_chill, complete_feature_set_nonplaylist_chill = generate_playlist_feature(complete_feature_set, playlist_chill, 1.09)

In [50]:
complete_feature_set_playlist_vector_Mine.shape

(3070,)

##**GENERATING RECOMMENDATIONS**

In [51]:
def generate_playlist_recos(df, features, nonplaylist_features):
    """
    Pull songs from a specific playlist.

    Parameters:
        df (pandas dataframe): spotify dataframe
        features (pandas series): summarized playlist feature
        nonplaylist_features (pandas dataframe): feature set of songs that are not in the selected playlist

    Returns:
        non_playlist_df_top_40: Top 40 recommendations for that playlist
    """

    non_playlist_df = df[df['id'].isin(nonplaylist_features['id'].values)]
    non_playlist_df['sim'] = cosine_similarity(nonplaylist_features.drop('id', axis = 1).values, features.values.reshape(1, -1))[:,0]
    non_playlist_df_top_40 = non_playlist_df.sort_values('sim',ascending = False).head(40)
    non_playlist_df_top_40['url'] = non_playlist_df_top_40['id'].apply(lambda x: sp.track(x)['album']['images'][1]['url'])

    return non_playlist_df_top_40

In [1]:
Mine_top40 = generate_playlist_recos(spotify_df, complete_feature_set_playlist_vector_Mine, complete_feature_set_nonplaylist_Mine)

NameError: name 'generate_playlist_recos' is not defined

In [None]:
visualize_songs(Mine_top40)