## Python Playlist Recommender Demo
### Using Spotify API

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

import datetime

from datetime import datetime,date #for cleaning the dates

import matplotlib.pyplot as plt

import json
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import base64
from urllib.parse import urlencode
import requests

import re

## Spotify Set Up

In [2]:
# Set environment variables
os.environ['SPOTIPY_CLIENT_ID'] = 'c24c5f113ae54900b2d00ff68db31733'
os.environ['SPOTIPY_CLIENT_SECRET'] = '8095c6f50e1b424eba66ba3e5089285a'

# Get environment variables
USER = os.getenv('SPOTIPY_CLIENT_ID')
PASSWORD = os.environ.get('SPOTIPY_CLIENT_SECRET')

client_creds = f"{USER}:{PASSWORD}"
client_creds_b64 = base64.b64encode(client_creds.encode())


In [3]:
#Set up token
token_url = "https://accounts.spotify.com/api/token"
method = "POST"
token_data = {
    "grant_type": "client_credentials"
}
token_header = {
    "Authorization": f"Basic {client_creds_b64.decode()}" #base64 encoded
}


r = requests.post(token_url, data=token_data, headers=token_header)
token_response_data = r.json()
access_token = token_response_data['access_token']
header = {
    "Authorization": f"Bearer {access_token}"
}

## Function Definition:

In [4]:
#GetVariation
#Pass in two dataframes (target, library)
#Populate variation between frame 1 and 2 in a new column

def GetVariation(target, library):
    l = len(library_df)
    var_list = []
    for i in range(l):
        var_list.append(abs((library['popularity'][i] - target['popularity']) + (library['Danceability'][i] - target['Danceability']) + (library['Energy'][i] - target['Energy']) + (library['Loudness'][i] - target['Loudness']) + (library['Speechiness'][i] - target['Speechiness']) + (library['Acousticness'][i] - target['Acousticness']) + (library['Instrumentalness'][i] - target['Instrumentalness']) + (library['Liveness'][i] - target['Liveness']) + (library['Tempo'][i] - target['Tempo'])))

    library['Variation'] = var_list
    return library

In [5]:
#GetIDs
#Pre-conditions: pass in dataframe of songs (with at least 'Name')
#Post-conditions: Populate dataframes with Spotify Data = {id, popularity}

def GetIDs(df):
   l = len(df)
   endpoint = "https://api.spotify.com/v1/search"

   id_list = []
   pop_list = []

   for i in range(l):
      #print(i)
      song_name = df['Name'][i]
    
      #song_name = test_list['Name'][i]
      #print(song_name)
      data = urlencode({"q": song_name, "type": "track"}) #I need it to search for song name + artist name
      lookup_url = f"{endpoint}?{data}"
      r = requests.get(lookup_url, headers = header)
      #print(r.status_code)
      results = r.json()
      items = results['tracks']['items']
      id_list.append(items[0]['id'])
      pop_list.append(items[0]['popularity'])

   df['id'] = id_list
   df['popularity'] = pop_list
   return df

In [6]:
#GetData
#Pre-conditions: pass in dataframe with spotify id's
#Post-conditions: Populate dataframe with data = {Danceability, Energy, Loudness, etc...}

def GetData(df):
    l = len(df)
    endpoint = "https://api.spotify.com/v1/audio-features/"

    dance_list = []
    energy_list = []
    loud_list = []
    speech_list = []
    acoustic_list = []
    instrument_list = []
    live_list = []
    tempo_list = []

    for i in range(l):
        #print(df['Name'][i])
        s_id = df['id'][i]
        lookup_url = f"{endpoint}{s_id}"
        r2 = requests.get(lookup_url, headers = header)
        song_info = r2.json()
        dance_list.append(song_info['danceability'])
        energy_list.append(song_info['energy'])
        loud_list.append(song_info['loudness'])
        speech_list.append(song_info['speechiness'])
        acoustic_list.append(song_info['acousticness'])
        instrument_list.append(song_info['instrumentalness'])
        live_list.append(song_info['liveness'])
        tempo_list.append(song_info['tempo'])

    df['Danceability'] = dance_list
    df['Energy'] = energy_list
    df['Loudness'] = loud_list
    df['Speechiness'] = speech_list
    df['Acousticness'] = acoustic_list
    df['Instrumentalness'] = instrument_list
    df['Liveness'] = live_list
    df['Tempo'] = tempo_list
    return df

In [7]:
def CleanPar(df):
    names = df['Name'].tolist()
    items = []
    for item in names:
        items.append(re.sub(r" ?\([^)]+\)", "", item))

    df['Name'] = items
    return df

In [8]:
def CleanBrack(df):
    names = df['Name'].tolist()
    items = []
    for item in names:
        items.append(re.sub(r" ?\[[^)]+\]", "", item))

    df['Name'] = items
    return df

In [9]:
def CleanSlash(df):
    names = df['Name'].tolist()
    items = []
    sep = '/'
    for item in names:
        items.append(item.split(sep, 1)[0])

    df['Name'] = items
    return df

In [10]:
def CleanDupes(df):
    df.drop_duplicates(subset ="Name", keep = "first", inplace = True);
    #Reset index for looping                     
    df.reset_index(drop=True, inplace=True);
    return df

In [11]:
#Removes instances of playlist songs in library 
# so you don't get recommended what you already have

def CrossRef(playlist, library):
    cond = playlist['Name'].isin(library['Name'])
    library.drop(library[cond].index, inplace = True)
    return library

## Main:

In [12]:
#Setting Up Data - In the future prompt user to browse files for playlist
playlist_df = pd.read_csv('STCD_data.csv', low_memory=False)
library_df = pd.read_csv('MusicLibrary3.csv', low_memory=False, encoding='latin1')

In [13]:
#Cleaning Calls
library_df = CleanPar(library_df)
playlist_df = CleanPar(playlist_df)

library_df = CleanBrack(library_df)
playlist_df = CleanBrack(playlist_df)

library_df = CleanSlash(library_df)
playlist_df = CleanSlash(playlist_df)

library_df = CleanDupes(library_df)
playlist_df = CleanDupes(playlist_df)

library_df.fillna(0, inplace=True)
playlist_df.fillna(0, inplace=True)

In [14]:
playlist_df.head()

Unnamed: 0,Name,Artist,Genre,Year,Date Added,Last Played,Plays
0,Santeria,Sublime,Alternative,1996.0,"5/11/12, 8:56 PM","7/28/20, 12:05 PM",314.0
1,Take On Me,a-ha,Pop,1985.0,"9/10/18, 12:40 PM","7/9/20, 2:43 PM",220.0
2,Cliffs of Dover,Eric Johnson,Rock,1990.0,"8/16/17, 2:52 PM","8/2/20, 5:21 PM",184.0
3,What I'm Here 4,Gang Starr,Hip-Hop/Rap,1998.0,"12/2/16, 2:17 AM","8/2/20, 5:11 PM",172.0
4,Heaven,Los Lonely Boys,Rock,2004.0,"5/25/18, 8:48 PM","8/2/20, 4:46 PM",163.0


In [15]:
library_df.head()

Unnamed: 0,Name,Date Added,Year,Last Played,Artist,Genre,Plays
0,A.D.H.D.,"7/6/15, 10:13 PM",2011.0,"6/27/17, 9:58 AM",Kendrick Lamar,Hip-Hop/Rap,21.0
1,Ab-Souls Outro,"8/12/19, 11:16 AM",2011.0,0,Kendrick Lamar,Hip-Hop/Rap,0.0
2,ABC,"6/27/18, 7:50 PM",1970.0,"11/3/19, 11:24 AM",Jackson 5,R&B/Soul,11.0
3,About a Girl,"8/8/18, 1:53 PM",1989.0,"2/28/19, 4:47 PM",Nirvana,Alternative,3.0
4,About Me,"10/25/16, 4:57 PM",2009.0,"6/8/19, 5:50 PM",Raekwon,Hip-Hop/Rap,1.0


In [None]:
#Check above to make sure data is clean (about a girl should only have one observation)

In [16]:
playlist_df = GetIDs(playlist_df)

In [17]:
library_df = GetIDs(library_df)

In [18]:
playlist_df = GetData(playlist_df)

In [19]:
library_df = GetData(library_df)

In [20]:
playlist_df.head()

Unnamed: 0,Name,Artist,Genre,Year,Date Added,Last Played,Plays,id,popularity,Danceability,Energy,Loudness,Speechiness,Acousticness,Instrumentalness,Liveness,Tempo
0,Santeria,Sublime,Alternative,1996.0,"5/11/12, 8:56 PM","7/28/20, 12:05 PM",314.0,2hnMS47jN0etwvFPzYk11f,77,0.682,0.765,-5.021,0.0395,0.0268,3.4e-05,0.188,90.807
1,Take On Me,a-ha,Pop,1985.0,"9/10/18, 12:40 PM","7/9/20, 2:43 PM",220.0,2WfaOiMkCvy7F5fcp2zZ8L,84,0.573,0.902,-7.638,0.054,0.018,0.00125,0.0928,84.412
2,Cliffs of Dover,Eric Johnson,Rock,1990.0,"8/16/17, 2:52 PM","8/2/20, 5:21 PM",184.0,5qm0KiVKMXW1kq6VrnIhz5,61,0.449,0.775,-12.029,0.0405,0.162,0.149,0.248,94.907
3,What I'm Here 4,Gang Starr,Hip-Hop/Rap,1998.0,"12/2/16, 2:17 AM","8/2/20, 5:11 PM",172.0,6NjXLhejj5dVjTyeU8SbVM,46,0.823,0.809,-3.871,0.208,0.105,0.0,0.356,91.007
4,Heaven,Los Lonely Boys,Rock,2004.0,"5/25/18, 8:48 PM","8/2/20, 4:46 PM",163.0,1HXy5I3HTWq8OvxCn0z7G7,78,0.677,0.556,-6.544,0.0306,0.263,0.0,0.105,80.009


In [21]:
library_df.head()

Unnamed: 0,Name,Date Added,Year,Last Played,Artist,Genre,Plays,id,popularity,Danceability,Energy,Loudness,Speechiness,Acousticness,Instrumentalness,Liveness,Tempo
0,A.D.H.D.,"7/6/15, 10:13 PM",2011.0,"6/27/17, 9:58 AM",Kendrick Lamar,Hip-Hop/Rap,21.0,2Fw5S2gaOSZzdN5dFoC2dj,71,0.609,0.799,-6.883,0.24,0.33,0.0,0.108,157.74
1,Ab-Souls Outro,"8/12/19, 11:16 AM",2011.0,0,Kendrick Lamar,Hip-Hop/Rap,0.0,4VaL54I6TlzghRTzOhfr7L,47,0.471,0.839,-6.852,0.379,0.116,0.0,0.658,115.79
2,ABC,"6/27/18, 7:50 PM",1970.0,"11/3/19, 11:24 AM",Jackson 5,R&B/Soul,11.0,6wDviYDtmSDZ0S6TVMM9Vc,66,0.715,0.678,-11.842,0.0475,0.486,0.000192,0.15,94.769
3,About a Girl,"8/8/18, 1:53 PM",1989.0,"2/28/19, 4:47 PM",Nirvana,Alternative,3.0,55yvzYuvJYG2RUEnMK78tr,70,0.409,0.932,-4.538,0.0339,1.9e-05,2e-06,0.31,131.761
4,About Me,"10/25/16, 4:57 PM",2009.0,"6/8/19, 5:50 PM",Raekwon,Hip-Hop/Rap,1.0,5hiatfPmopoExZRZ2O1g5S,58,0.518,0.943,-2.044,0.17,0.00216,0.0,0.08,180.166


### Dataframes should be populated with Spotify data - Now for the recommendation:

In [22]:
target = playlist_df.mean()
target

Year                1952.470238
Plays                 43.595238
popularity            69.565476
Danceability           0.615274
Energy                 0.655989
Loudness              -8.143571
Speechiness            0.068436
Acousticness           0.237319
Instrumentalness       0.032191
Liveness               0.191896
Tempo                121.582399
dtype: float64

In [23]:
library_df = GetVariation(target, library_df)

In [24]:
library_df.sort_values(by='Variation').head(15) #Lowest Variation => Most similar to target

Unnamed: 0,Name,Date Added,Year,Last Played,Artist,Genre,Plays,id,popularity,Danceability,Energy,Loudness,Speechiness,Acousticness,Instrumentalness,Liveness,Tempo,Variation
1039,Minnesota,"11/23/16, 5:10 PM",2016.0,"10/13/18, 7:35 AM",Lil Yachty,Hip-Hop/Rap,2.0,4XkOcWt0C2JX1s2RXybosk,58,0.812,0.648,-4.994,0.0887,0.0951,0.0,0.11,130.022,0.023608
60,Apologize,"1/19/16, 1:38 PM",0.0,"1/20/16, 12:23 AM",One Republic,0,1.0,1NrJYpdAi7uosDRPmSYrsG,71,0.593,0.74,-6.12,0.0339,0.363,2.2e-05,0.102,118.008,0.085486
568,Gimme All Your Lovin',"5/1/20, 3:51 PM",1983.0,"6/19/20, 11:04 AM",ZZ Top,Hard Rock,8.0,0OBwxFLu6Yj61s2OagYbgY,70,0.63,0.788,-6.7,0.0298,0.00322,0.0237,0.0883,120.059,0.116612
1674,Type of Way,"11/20/16, 2:44 PM",2013.0,"6/4/18, 12:04 PM",Rich Homie Quan,Hip-Hop/Rap,22.0,07sOl7WsmCaUqaJaRn9Dss,58,0.846,0.497,-10.174,0.419,0.146,0.0,0.0733,134.875,0.123108
1670,Two Tickets to Paradise,"3/26/20, 4:56 PM",1977.0,0,Eddie Money,Rock,0.0,22CIOfLZB9z8He7WgHYAgH,67,0.623,0.68,-13.255,0.0282,0.0056,0.000425,0.148,129.713,0.137817
1625,Think About You,"4/17/17, 1:59 PM",2018.0,"4/29/17, 9:20 PM",Guns N' Roses,Rock,3.0,3Kuu5vASpXK8oRsxOvau6P,67,0.673,0.596,-7.891,0.0354,0.166,0.0,0.11,123.969,0.147008
612,The Greatest,"10/14/19, 10:05 PM",2015.0,0,Alabama Shakes,Alternative,0.0,4tnLapnaoDS46c9CBRUqpE,72,0.732,0.41,-14.41,0.127,0.0915,0.0,0.0697,125.947,0.161792
795,In the End,"3/14/17, 11:52 PM",1994.0,0,Green Day,Alternative,0.0,60a0Rd6pjrkxjPbaKzXjfq,84,0.556,0.864,-5.87,0.0584,0.00958,0.0,0.209,105.143,0.164572
1859,The Zephyr Song,"3/16/19, 11:39 AM",2002.0,0,Red Hot Chili Peppers,Alternative,0.0,1ndGB6rvxKYN9seCYO1dTF,70,0.725,0.803,-4.083,0.0337,0.013,3.7e-05,0.0891,117.391,0.166428
670,High Demand,"3/10/17, 4:18 PM",2017.0,"2/10/18, 1:44 PM",Future,Hip-Hop/Rap,6.0,4ibj6HRTdzkfvlDgxSXxCp,48,0.907,0.465,-7.123,0.288,0.0175,1.5e-05,0.117,141.957,0.176893


## This represents the Top 15 Songs I should add to my STCD playlist
### In the future, I plan on making this into an application