In [1]:
import pandas as pd
import numpy as np
import json
import re 
import sys
import itertools

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util

import warnings
warnings.filterwarnings("ignore")

## 3. Connect to Spotify API

Useful links:
1. https://developer.spotify.com/dashboard/
2. https://spotipy.readthedocs.io/en/2.16.1/

In [2]:
# spotify_df = pd.read_feather('spotify_df.feather')
# complete_feature_set = pd.read_feather('complete_feature_set.feather')

In [3]:
spotify_df = pd.read_feather('spotify_df.feather')
complete_feature_set = pd.read_feather('cfs_final.feather')

In [4]:
spotify_df[spotify_df['id']=='0TK2YIli7K1leLovkQiNik']

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,...,name,popularity,release_date,speechiness,tempo,artists_upd_v1,artists_upd_v2,artists_upd,artists_song,popularity_red
38650,0.75,2019,0.037,"['Shawn Mendes', 'Camila Cabello']",0.759,190960,0.54,0,0TK2YIli7K1leLovkQiNik,0.0,...,Señorita,85,2019-06-21,0.0287,116.947,"[Shawn Mendes, Camila Cabello]",[],"[Shawn Mendes, Camila Cabello]",Shawn MendesSeñorita,17


In [15]:
id_list = ["7qiZfU4dY1lWllzX7mPBI3", "1i1fxkWeaMmKEB4T7zqbzK", "0e7ipj03S05BNilyu5bRzt", "0VjIjW4GlUZAMYd2vXMi3b", "2Fxmhks0bxGSBdJ92vM42m", "0TK2YIli7K1leLovkQiNik", "3KkXRkHbMCARz0aVfEt68P", "1rfofaqEpACxVEHIZBJe6W", "0pqnGHJpmpxLKifKRmU6WP0tgVpDi06FyKpA1z0VMD4v"]
default_date = pd.to_datetime('2021-04-27 08:09:52+00:00')
defailt_url = 'https://i.scdn.co/image/ab67616d00001e025675e83f707f1d7271e5cf8a'

In [16]:
def createPlaylist(id_list):
    idDF = pd.DataFrame({'artist':[],'name':[],'id':[],'url':[],'date_added':[]})
    for id in id_list:
        artistName = spotify_df[spotify_df['id'] == id]['artists_upd_v1'].iloc[0][0]
        songName = spotify_df[spotify_df['id'] == id]['name'].values[0]
        newRow = {'artist':artistName,'name':songName,'id':id,'url':defailt_url,'date_added':default_date}
        idDF = idDF.append(newRow,ignore_index=True)
    return idDF

In [17]:
createPlaylist(id_list)

Unnamed: 0,artist,name,id,url,date_added
0,Ed Sheeran,Shape of You,7qiZfU4dY1lWllzX7mPBI3,https://i.scdn.co/image/ab67616d00001e025675e8...,2021-04-27 08:09:52+00:00
1,The Chainsmokers,Don't Let Me Down,1i1fxkWeaMmKEB4T7zqbzK,https://i.scdn.co/image/ab67616d00001e025675e8...,2021-04-27 08:09:52+00:00
2,Post Malone,rockstar,0e7ipj03S05BNilyu5bRzt,https://i.scdn.co/image/ab67616d00001e025675e8...,2021-04-27 08:09:52+00:00
3,The Weeknd,Blinding Lights,0VjIjW4GlUZAMYd2vXMi3b,https://i.scdn.co/image/ab67616d00001e025675e8...,2021-04-27 08:09:52+00:00


In [7]:
try:
    for row in playlist_eve1.itertuples():
        for el in row:
            print(el)
except:
    pass

In [8]:
from skimage import io
import matplotlib.pyplot as plt

def visualize_songs(df):
    """ 
    Visualize cover art of the songs in the inputted dataframe

    Parameters: 
        df (pandas dataframe): Playlist Dataframe
    """
    
    temp = df['url'].values
    plt.figure(figsize=(15,int(0.625 * len(temp))))
    columns = 5
    
    for i, url in enumerate(temp):
        plt.subplot(len(temp) / columns + 1, columns, i + 1)

        image = io.imread(url)
        plt.imshow(image)
        plt.xticks(color = 'w', fontsize = 0.1)
        plt.yticks(color = 'w', fontsize = 0.1)
        plt.xlabel(df['name'].values[i], fontsize = 12)
        plt.tight_layout(h_pad=0.4, w_pad=0)
        plt.subplots_adjust(wspace=None, hspace=None)

    plt.show()

In [9]:
# playlist_eve

## 4. Create Playlist Vector

In [10]:
def generate_playlist_feature(complete_feature_set, playlist_df, weight_factor):
    """ 
    Summarize a user's playlist into a single vector

    Parameters: 
        complete_feature_set (pandas dataframe): Dataframe which includes all of the features for the spotify songs
        playlist_df (pandas dataframe): playlist dataframe
        weight_factor (float): float value that represents the recency bias. The larger the recency bias, the most priority recent songs get. Value should be close to 1. 
        
    Returns: 
        playlist_feature_set_weighted_final (pandas series): single feature that summarizes the playlist
        complete_feature_set_nonplaylist (pandas dataframe): 
    """
    
    
    complete_feature_set_playlist = complete_feature_set[complete_feature_set['id'].isin(playlist_df['id'].values)]#.drop('id', axis = 1).mean(axis =0)
    complete_feature_set_playlist = complete_feature_set_playlist.merge(playlist_df[['id','date_added']], on = 'id', how = 'inner')
    complete_feature_set_nonplaylist = complete_feature_set[~complete_feature_set['id'].isin(playlist_df['id'].values)]#.drop('id', axis = 1)
    
    playlist_feature_set = complete_feature_set_playlist.sort_values('date_added',ascending=False)

    most_recent_date = playlist_feature_set.iloc[0,-1]
    
    for ix, row in playlist_feature_set.iterrows():
        playlist_feature_set.loc[ix,'months_from_recent'] = int((most_recent_date.to_pydatetime() - row.iloc[-1].to_pydatetime()).days / 30)
        
    playlist_feature_set['weight'] = playlist_feature_set['months_from_recent'].apply(lambda x: weight_factor ** (-x))
    
    playlist_feature_set_weighted = playlist_feature_set.copy()
    #print(playlist_feature_set_weighted.iloc[:,:-4].columns)
    playlist_feature_set_weighted.update(playlist_feature_set_weighted.iloc[:,:-4].mul(playlist_feature_set_weighted.weight,0))
    playlist_feature_set_weighted_final = playlist_feature_set_weighted.iloc[:, :-4]
    #playlist_feature_set_weighted_final['id'] = playlist_feature_set['id']
    
    return playlist_feature_set_weighted_final.sum(axis = 0), complete_feature_set_nonplaylist

In [11]:
complete_feature_set_playlist_vector_EDM, complete_feature_set_nonplaylist_EDM = generate_playlist_feature(complete_feature_set, playlist_eve, 1.09)

In [12]:
# complete_feature_set_playlist_vector_EDM.shape

## 5. Generate Recommendations

In [13]:
def generate_playlist_recos(df, features, nonplaylist_features):
    """ 
    Pull songs from a specific playlist.

    Parameters: 
        df (pandas dataframe): spotify dataframe
        features (pandas series): summarized playlist feature
        nonplaylist_features (pandas dataframe): feature set of songs that are not in the selected playlist
        
    Returns: 
        non_playlist_df_top_40: Top 40 recommendations for that playlist
    """
    client_id = '79ca288b68884e198c53146b51adfa1e'
    client_secret= '63242b064ebb4e379bfca173dd482e79'
    auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    sp = spotipy.Spotify(auth_manager=auth_manager)
    
    non_playlist_df = df[df['id'].isin(nonplaylist_features['id'].values)]
    non_playlist_df['sim'] = cosine_similarity(nonplaylist_features.drop('id', axis = 1).values, features.values.reshape(1, -1))[:,0]
    non_playlist_df_top_40 = non_playlist_df.sort_values('sim',ascending = False).head(40)
    non_playlist_df_top_40['url'] = non_playlist_df_top_40['id'].apply(lambda x: sp.track(x)['album']['images'][1]['url'])
    
    return non_playlist_df_top_40

In [14]:
edm_top40 = generate_playlist_recos(spotify_df, complete_feature_set_playlist_vector_EDM, complete_feature_set_nonplaylist_EDM)

In [15]:
edm_top40.columns

Index(['index', 'valence', 'year', 'acousticness', 'artists', 'danceability',
       'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date',
       'speechiness', 'tempo', 'artists_upd_v1', 'artists_upd_v2',
       'artists_upd', 'artists_song', 'popularity_red', 'sim', 'url'],
      dtype='object')

In [60]:
dict = {'A':[1,2,3,4],'B':[16,2,36,46],'C':[14,24,4636,4],'D':[41,2,34,644]}
df = pd.DataFrame(dict,index=[1,2,3,4])

In [63]:
l = [1,2,3,4]
ind_list = []
for row in df.itertuples():
    if row[1] in l:
        df.drop(row[0],inplace=True)

In [64]:
df

Unnamed: 0,A,B,C,D


In [243]:
tmp = edm_top40
l = []
s = set()

for i in range(40):
    if tmp.iloc[i]['artists'] in s:
        l.append(tmp.iloc[i].name)
    else:
        s.add(tmp.iloc[i]['artists'])

tmp.drop(l, inplace=True)

In [244]:
tmp

Unnamed: 0,index,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,...,release_date,speechiness,tempo,artists_upd_v1,artists_upd_v2,artists_upd,artists_song,popularity_red,sim,url
34066,154379,0.466,2011,0.00382,['Adele'],0.61,241693,0.683,0,3CKCZ9pfwAfoMZlMncA1Nc,...,2011-01-19,0.0253,108.003,[Adele],[],[Adele],AdeleSet Fire to the Rain,15,0.999782,https://i.scdn.co/image/ab67616d00001e026d4056...
8047,38494,0.326,2015,0.919,"['Sam Smith', 'John Legend']",0.468,219536,0.19,0,64GRDrL1efgXclrhVCeuA0,...,2015-11-06,0.0373,125.319,"[Sam Smith, John Legend]",[],"[Sam Smith, John Legend]",Sam SmithLay Me Down,15,0.8715,https://i.scdn.co/image/ab67616d00001e02de48dc...
8037,38484,0.152,2017,0.343,"['Sam Smith', 'Yebba']",0.66,283295,0.542,0,75TlwJ8oBgdq8P9kO7uVML,...,2017-11-03,0.077,135.828,"[Sam Smith, Yebba]",[],"[Sam Smith, Yebba]",Sam SmithNo Peace,11,0.856851,https://i.scdn.co/image/ab67616d00001e02f3ea71...
20544,90165,0.249,2014,0.162,['Jessie Ware'],0.387,257484,0.413,0,71AATBHZGo82EnjZnG53Zx,...,2014-08-04,0.0559,173.73,[Jessie Ware],[],[Jessie Ware],Jessie WareSay You Love Me,11,0.854143,https://i.scdn.co/image/ab67616d00001e021c021b...
26072,120458,0.239,2019,0.758,"['Ed Sheeran', 'Yebba']",0.565,243267,0.243,0,0VsGaRXR5WAzpu51unJTis,...,2019-07-12,0.0317,81.655,"[Ed Sheeran, Yebba]",[],"[Ed Sheeran, Yebba]",Ed SheeranBest Part of Me (feat. YEBBA),13,0.853983,https://i.scdn.co/image/ab67616d00001e0273304c...
28308,130276,0.329,2011,0.81,['Corinne Bailey Rae'],0.683,208636,0.333,0,5FvapYqWK6TEuh4csYwQ9O,...,2011-01-01,0.0434,123.804,[Corinne Bailey Rae],[],[Corinne Bailey Rae],Corinne Bailey RaeIs This Love,11,0.83749,https://i.scdn.co/image/ab67616d00001e02e99107...
33516,152009,0.488,2014,0.0331,['Aloe Blacc'],0.308,254880,0.769,0,2stPxcgjdSImK7Gizl8ZUN,...,2014-01-01,0.065,81.853,[Aloe Blacc],[],[Aloe Blacc],Aloe BlaccThe Man,12,0.836387,https://i.scdn.co/image/ab67616d00001e02363fc5...
26649,122602,0.554,2015,0.53,['Donnie Trumpet & The Social Experiment'],0.511,226014,0.596,0,6fTdcGsjxlAD9PSkoPaLMX,...,2015-06-30,0.224,158.063,[Donnie Trumpet & The Social Experiment],[],[Donnie Trumpet & The Social Experiment],Donnie Trumpet & The Social ExperimentSunday C...,13,0.835235,https://i.scdn.co/image/ab67616d00001e0242bf3f...
21133,92426,0.161,2011,0.229,['James Morrison'],0.537,229303,0.611,0,2TUzU4IkfH8kcvY2MUlsd2,...,2011-01-01,0.0304,105.955,[James Morrison],[],[James Morrison],James MorrisonI Won't Let You Go,12,0.826811,https://i.scdn.co/image/ab67616d00001e029accc4...
25551,117845,0.212,2011,0.58,['Emily King'],0.747,243533,0.224,0,4Aj1eHsMtEmjpJUWdUWiVa,...,2011-07-12,0.0649,105.014,[Emily King],[],[Emily King],Emily KingGeorgia,10,0.826299,https://i.scdn.co/image/ab67616d00001e02f381df...
