In [1]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pickle
import numpy as np


In [2]:
with open('finalized_model_pca.pkl', 'rb') as f:
    model = pickle.load(f)

In [3]:
def get_keys(path):
    with open("/Users/Jonathan/Documents/Flatiron/phase_5/P5_spotify_recommendations/.secret/spotify_api.json") as f:
        return json.load(f)

In [4]:
keys = get_keys("/Users/Jonathan/.secret/spotify_api.json")

client = keys['client']
api_key = keys['api_key']

In [5]:
auth_manager = SpotifyClientCredentials(client_id = client, client_secret = api_key)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [6]:
# Importing data
spot_df = pd.read_csv('data/spotify_playlist.csv')
spot_df.head()

Unnamed: 0,artist,artist_id,popularity,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Bon Jovi,58lV9VcRSjABbAbfWS6skp,8,Slippery When Wet,Livin' On A Prayer,0J6mQxEZnlRt9ymzFntA6z,0.534,0.887,0,-3.777,1,0.0345,9.9e-05,0.325,0.72,122.494,249293,4
1,Bon Jovi,58lV9VcRSjABbAbfWS6skp,0,Bon Jovi Greatest Hits,Wanted Dead Or Alive,4Zj9VM4fO1KwkU3lFaECsW,0.257,0.819,7,-3.562,1,0.0422,0.015,0.324,0.267,150.354,308560,4
2,Queen,1dfeR4HaWDbWqFHLkxsg1d,64,News Of The World (Deluxe Remastered Version),We Are The Champions - Remastered 2011,7ccI9cStQbQdystvc6TvxD,0.268,0.459,7,-6.948,0,0.0346,0.0,0.119,0.172,64.223,179200,4
3,Queen,1dfeR4HaWDbWqFHLkxsg1d,69,News Of The World (Deluxe Remastered Version),We Will Rock You - Remastered 2011,54flyrjcdnQdco7300avMJ,0.693,0.497,2,-7.316,1,0.119,0.0,0.258,0.473,81.308,122067,4
4,Bon Jovi,58lV9VcRSjABbAbfWS6skp,0,Cross Road,You Give Love A Bad Name,7LBJui5MdjfCd8YZr4xaqA,0.556,0.945,0,-4.347,0,0.0496,4e-06,0.394,0.781,122.875,224307,4


In [7]:
def get_song(song, artist):
    
    playlist_features_list = ["artist", "artist_id", "popularity", "album", "track_name", "track_id", 
                             "danceability", "energy", "key", "loudness", "mode", "speechiness",
                             "instrumentalness", "liveness", "valence", "tempo", "duration_ms", "time_signature"]

    song_df  = pd.DataFrame(columns = playlist_features_list)
    song = sp.search(q = 'track: {},  artist: {}'.format(song, artist), limit=1)
    for track in song:
    
        playlist_features = {}
        playlist_features["artist"] = song['tracks']['items'][0]['artists'][0]['name']
        playlist_features['artist_id'] = song['tracks']['items'][0]['artists'][0]['id']
        playlist_features['popularity'] = song['tracks']['items'][0]['popularity']
        playlist_features["album"] = song['tracks']['items'][0]['album']['name']
        playlist_features["track_name"] = song['tracks']['items'][0]['name']
        playlist_features["track_id"] = song['tracks']['items'][0]['id']

        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[6:]:
            playlist_features[feature] = audio_features[feature]
        
        track_df = pd.DataFrame(playlist_features, index = [0])
        song_df = pd.concat([song_df, track_df], ignore_index = True)
        return song_df

In [8]:
def predict(song_title, artist, df):
    song = get_song(song_title, artist)
    new_df = pd.concat([song, spot_df], ignore_index=True)
    new_df = new_df.convert_dtypes()
    new_df_feat = new_df.select_dtypes(np.number)
    preds = model.predict(new_df_feat)
    pca_pipeline = Pipeline([('scaler', StandardScaler()), ('PCA', PCA(n_components=2))]) 
    coords = pca_pipeline.fit_transform(new_df_feat)
    new_df['cluster'] = preds
    new_df['x'], new_df['y'] = [x[0] for x in coords], [x[1] for x in coords]
    cluster = new_df.loc[new_df['track_name'] == new_df['track_name'][0], 'cluster'].to_list()[0]
    new_df = new_df.loc[new_df['cluster'] == cluster]
    
    
    return new_df

In [9]:
def dist(row):
    x = row['x']
    y = row['y']
    distance=np.sqrt((xt-x)**2 + (yt-y)**2)
    return distance
    

In [10]:
new_df = predict('brb', 'mahalia', spot_df)

In [11]:
xt = new_df.loc[new_df['track_name'] == new_df['track_name'][0],'x']
yt = new_df.loc[new_df['track_name'] == new_df['track_name'][0],'y']

new_df['distance'] = new_df.apply(dist, axis =1)
new_df.sort_values('distance')[0:10]
    

Unnamed: 0,artist,artist_id,popularity,album,track_name,track_id,danceability,energy,key,loudness,...,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster,x,y,distance
0,Mahalia,16rCzZOMQX7P8Kmn5YKexI,44,BRB (feat. Pink Sweat$),BRB (feat. Pink Sweat$),3cIYfp1jvO622pUW5zaeEF,0.729,0.514,6,-7.815,...,8.5e-05,0.093,0.54,79.998,218520,4,2,1.13717,-1.628493,0.0
4160,Wynonie Harris,4Imc3wiT22cuynvQNpXcVn,0,More Greatest Hits - Good Rockin' Tonight,Grandma Plays The Numbers,628ZHvFEZSdHAbp99ghlPr,0.718,0.665,8,-10.369,...,0.000675,0.11,0.751,79.725,159000,4,2,1.118535,-1.697996,0.071958
6335,Norlie & KKV,2u8P7EawurNYoIzRtr5Knk,27,seven eleven,seven eleven,1XcYiK8GMEe18whTA3h1cw,0.684,0.472,7,-8.512,...,0.0,0.0889,0.655,86.984,215827,4,2,1.134607,-1.521619,0.106905
6043,A R I Z O N A,7hOGhpa8RMSuDOWntGIAJt,44,GALLERY,I Was Wrong,3a2MbZSw3NGRWHqsCJ4iQj,0.824,0.502,6,-9.38,...,0.0559,0.105,0.563,118.006,217293,4,2,1.207025,-1.732565,0.125342
4847,Ritchie Valens,5Y9xEAGW4GwGJgbiI6W85P,57,Ritchie Valens,"Come On, Let's Go",4cRfSR0QxDlXRHTKyEOu93,0.616,0.724,9,-8.101,...,3e-06,0.128,0.809,81.402,124587,3,2,1.08378,-1.510789,0.129247
5716,Elvis Presley,43ZHCT0cAZBISjO8DG9PnE,0,Elvis Movies,Kissin' Cousins - 2003 Sony Remaster,0gcV8JdCeWcsU82hED7Wfy,0.626,0.587,6,-9.98,...,0.0,0.46,0.961,84.973,131800,4,2,1.041731,-1.722992,0.134308
5505,Willie Nelson,5W5bDNCqJ1jbCgTxDD0Cb3,57,Shotgun Willie,Whiskey River,6gS6XQ4OqMddkgzHNz40E5,0.581,0.574,7,-9.298,...,0.000902,0.107,0.853,76.915,247240,4,2,1.027522,-1.722807,0.14463
5371,Louis Jordan,2nRbxpnBMMbtMBWH5QdqH2,40,"Jack, You're Dead: The Essential Blue Archive",Is You Is or Is You Ain't My Baby?,482FSFSNKIekver2drxHdv,0.808,0.304,8,-7.629,...,0.0,0.113,0.51,137.796,164587,4,2,1.207286,-1.756766,0.146186
5667,Elvis Presley,43ZHCT0cAZBISjO8DG9PnE,32,Elvis Is Back,It Feels So Right,5RsdY1b0JSQTGeONEYFcwm,0.56,0.552,10,-11.033,...,3e-06,0.0509,0.875,84.222,128760,4,2,1.260599,-1.732702,0.161537
2422,TOTO,0PFtn5NtBbbUNbU9EAmIWF,47,Greatest Hits: 40 Trips Around The Sun,Africa,6QZo2TgclkUMwJgggi8QSQ,0.682,0.584,9,-9.507,...,0.000107,0.0468,0.709,92.793,295347,4,2,1.090797,-1.471292,0.163898
