In [1]:
#Importing necessary libraries
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pickle
import numpy as np


In [2]:
#Opening model
with open('finalized_model_pca.pkl', 'rb') as f:
    model = pickle.load(f)

In [3]:
#Gets API information
def get_keys(path):
    with open("/Users/Jonathan/Documents/Flatiron/phase_5/P5_music_recommendations/.secret/spotify_api.json") as f:
        return json.load(f)

In [4]:
#Get spotify credentials
keys = get_keys("/Users/Jonathan/.secret/spotify_api.json")

client = keys['client']
api_key = keys['api_key']

In [5]:
#Authenticates using of API
auth_manager = SpotifyClientCredentials(client_id = client, client_secret = api_key)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [6]:
# Importing data
spot_df = pd.read_csv('data/spotify_playlist.csv')
spot_df.head()

Unnamed: 0,artist,artist_id,popularity,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Bon Jovi,58lV9VcRSjABbAbfWS6skp,8,Slippery When Wet,Livin' On A Prayer,0J6mQxEZnlRt9ymzFntA6z,0.534,0.887,0,-3.777,1,0.0345,9.9e-05,0.325,0.72,122.494,249293,4
1,Bon Jovi,58lV9VcRSjABbAbfWS6skp,0,Bon Jovi Greatest Hits,Wanted Dead Or Alive,4Zj9VM4fO1KwkU3lFaECsW,0.257,0.819,7,-3.562,1,0.0422,0.015,0.324,0.267,150.354,308560,4
2,Queen,1dfeR4HaWDbWqFHLkxsg1d,64,News Of The World (Deluxe Remastered Version),We Are The Champions - Remastered 2011,7ccI9cStQbQdystvc6TvxD,0.268,0.459,7,-6.948,0,0.0346,0.0,0.119,0.172,64.223,179200,4
3,Queen,1dfeR4HaWDbWqFHLkxsg1d,69,News Of The World (Deluxe Remastered Version),We Will Rock You - Remastered 2011,54flyrjcdnQdco7300avMJ,0.693,0.497,2,-7.316,1,0.119,0.0,0.258,0.473,81.308,122067,4
4,Bon Jovi,58lV9VcRSjABbAbfWS6skp,0,Cross Road,You Give Love A Bad Name,7LBJui5MdjfCd8YZr4xaqA,0.556,0.945,0,-4.347,0,0.0496,4e-06,0.394,0.781,122.875,224307,4


In [7]:
def get_song(song, artist):
    """
    Function
    ------------------------------------
    This function takes the song title and artist name and returns a Dataframe with a row of information that can be appended
    to an existing DataFrame
    
    """
    
    playlist_features_list = ["artist", "artist_id", "popularity", "album", "track_name", "track_id", 
                             "danceability", "energy", "key", "loudness", "mode", "speechiness",
                             "instrumentalness", "liveness", "valence", "tempo", "duration_ms", "time_signature"]

    song_df  = pd.DataFrame(columns = playlist_features_list)
    song = sp.search(q = 'track: {},  artist: {}'.format(song, artist), limit=1)
    for track in song:
    
        playlist_features = {}
        playlist_features["artist"] = song['tracks']['items'][0]['artists'][0]['name']
        playlist_features['artist_id'] = song['tracks']['items'][0]['artists'][0]['id']
        playlist_features['popularity'] = song['tracks']['items'][0]['popularity']
        playlist_features["album"] = song['tracks']['items'][0]['album']['name']
        playlist_features["track_name"] = song['tracks']['items'][0]['name']
        playlist_features["track_id"] = song['tracks']['items'][0]['id']

        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[6:]:
            playlist_features[feature] = audio_features[feature]
        
        track_df = pd.DataFrame(playlist_features, index = [0])
        song_df = pd.concat([song_df, track_df], ignore_index = True)
        return song_df

In [8]:
def predict(song_title, artist, df):
    
    """
    Function
    ------------------------------------
    This function gets the song outputted from the get_song function and concats it an imported DataFrame. It then runs 
    predictions to determine the clusters of each song and appends the cluster output to the DataFrame along with the new song.
    The function then filters the songs by the cluster of the song found as well as an x and y for each row.
    
    
    """
    song = get_song(song_title, artist)
    new_df = pd.concat([song, spot_df], ignore_index=True)
    new_df = new_df.convert_dtypes()
    new_df_feat = new_df.select_dtypes(np.number)
    preds = model.predict(new_df_feat)
    pca_pipeline = Pipeline([('scaler', StandardScaler()), ('PCA', PCA(n_components=2))]) 
    coords = pca_pipeline.fit_transform(new_df_feat)
    new_df['cluster'] = preds
    new_df['x'], new_df['y'] = [x[0] for x in coords], [x[1] for x in coords]
    cluster = new_df.loc[new_df['track_name'] == new_df['track_name'][0], 'cluster'].to_list()[0]
    new_df = new_df.loc[new_df['cluster'] == cluster]
    
    
    return new_df

In [9]:
def dist(row):
    """
    Function
    ------------------------------------
    This function takes the x and y of a Dataframe and calculates the distance of each row
    
    """
    x = row['x']
    y = row['y']
    distance=np.sqrt((xt-x)**2 + (yt-y)**2)
    return distance
    

In [10]:
#Creating a variable to get the information to calculate the distance
new_df = predict('dear maria count me in', 'all time low', spot_df)

In [11]:
new_df

Unnamed: 0,artist,artist_id,popularity,album,track_name,track_id,danceability,energy,key,loudness,...,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster,x,y
0,All Time Low,46gyXjRIvN1NL1eCB8GBxo,76,"So Wrong, It's Right","Dear Maria, Count Me In",0JJP0IS4w0fJx01EcrfkDe,0.459,0.895,2,-3.126,...,0.0805,0.0,0.206,0.572,181.04,182827,4,6,-1.713716,-0.394922
8,Various Artists,3Y3xIwWyq5wnNHPp5gPjOW,73,Footloose (15th Anniversary Collectors' Edition),"Footloose - From ""Footloose"" Soundtrack",2vz1CsL5WBsbpBcwgboTAw,0.583,0.905,2,-6.132,...,0.0733,0.00019,0.0593,0.646,173.982,226827,4,6,-1.050297,-0.920512
11,Journey,0rvjqX7ttXeg3mTy8Xscbt,61,The Essential Journey,Any Way You Want It,2DyHhPyCZgZzNXn1IrtsTu,0.519,0.934,7,-7.137,...,0.0471,0.00457,0.137,0.586,138.162,201867,4,6,-0.689359,-0.623323
15,Europe,7Js6Lde8thlIHXggv2SCBz,30,The Final Countdown (Expanded Edition),On the Loose,2lhHjoMq7L4wMuI1NrNuK2,0.418,0.902,9,-6.374,...,0.0669,0.0,0.0549,0.578,164.302,188093,4,6,-1.027790,0.179830
22,Amon Amarth,3pulcT2wt7FEG10lQlqDJL,62,Twilight Of The Thunder God,Twilight Of The Thunder God,5u3l2TONYacJgmRPQVaF9y,0.374,0.92,6,-4.653,...,0.0501,0.026,0.0802,0.278,191.014,248747,4,6,-1.356560,0.963684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6486,Plain White T's,1g1yxsNVPhMUl9GrMjEb2o,16,Stop,Your Fault,1H4YzFJH7IK3jkxnECevLT,0.501,0.88,9,-5.165,...,0.043,0.0,0.286,0.573,129.519,228333,4,6,-0.675579,-0.021362
6487,Plain White T's,1g1yxsNVPhMUl9GrMjEb2o,11,Stop,Can't Turn Away,7EytKg9puf2KtkE7VoFJom,0.485,0.805,11,-4.738,...,0.0508,0.000015,0.587,0.571,142.595,270160,4,6,-0.992442,0.426839
6490,AFI,19I4tYiChJoxEO5EuviXpz,70,DECEMBERUNDERGROUND,Miss Murder,0Ti2dlF2xLjXblvdU5fCxM,0.308,0.873,8,-4.542,...,0.0533,0.000729,0.0974,0.686,143.519,206587,4,6,-1.086277,-0.156966
6492,American Hi-Fi,4XlcLj6bxCnpBIOHmBpIWP,0,American Hi-Fi,Flavor Of The Weak,5hz3YCeenldfaaFucR2ONt,0.377,0.969,10,-1.863,...,0.0682,0.000016,0.339,0.538,152.002,188840,4,6,-1.625499,0.701888


In [12]:
#Calculates the distance of the given song and sort the values by the closest distance to the song found

xt = new_df.loc[new_df['track_id'] == new_df['track_id'][0],'x']
yt = new_df.loc[new_df['track_id'] == new_df['track_id'][0],'y']

new_df['distance'] = new_df.apply(dist, axis =1)
new_df.sort_values('distance')[0:10]
    

Unnamed: 0,artist,artist_id,popularity,album,track_name,track_id,danceability,energy,key,loudness,...,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster,x,y,distance
0,All Time Low,46gyXjRIvN1NL1eCB8GBxo,76,"So Wrong, It's Right","Dear Maria, Count Me In",0JJP0IS4w0fJx01EcrfkDe,0.459,0.895,2,-3.126,...,0.0,0.206,0.572,181.04,182827,4,6,-1.713716,-0.394922,0.0
1298,Bad Religion,2yJwXpWAQOOl5XFzbCxLs9,47,Suffer,Suffer,7AoWDXgfVQO8pK4hb0Kswt,0.39,0.974,9,-3.243,...,0.0,0.119,0.67,144.312,107323,4,6,-1.701844,-0.409828,0.019056
5596,Jordan Davis,77kULmXAQ6vWer7IIHdGzI,68,Home State,Singles You Up,4LiMQ6G9n84rDUBs9wtwz6,0.491,0.907,7,-3.594,...,0.0,0.242,0.631,199.943,182667,4,6,-1.738452,-0.400625,0.025384
488,Skillet,49bzE5vRBRIota4qeHtQM8,50,Rise (Deluxe Edition),What I Believe,5aUl2cPg4O7yXXdzWQVTYO,0.466,0.928,9,-2.59,...,1e-06,0.0629,0.711,168.026,199480,4,6,-1.777836,-0.428823,0.07253
2853,Halestorm,6om12Ev5ppgoMy3OYSoech,59,Halestorm (Deluxe),I Get Off,0JKY13K1Io2aqXJb96UyzX,0.456,0.891,11,-4.062,...,5e-06,0.0727,0.647,170.019,183707,4,6,-1.790666,-0.374594,0.079589
2118,Blitzkid,1u8qqOF2w0kgbg1QLXEXSJ,37,Terrifying Tales,Pretty in a Casket,0Y79YuvzHHIiKmD5CmddKT,0.361,0.952,1,-3.119,...,0.0,0.323,0.933,200.979,129916,4,6,-1.808185,-0.395781,0.094472
2264,Death Tour,08ET100WDKcE5HzPSWBJYg,16,P.I.S.S./U.S.,P.I.S.S./U.S.,5nyLsuDNAKt9Xtz27cnxDp,0.36,0.989,8,-2.454,...,0.00858,0.207,0.873,194.789,61865,4,6,-1.776785,-0.322612,0.09595
695,Saving Abel,2AVVj8ezW2mJ0v8u7XydiF,48,Saving Abel,New Tattoo,6cXxbf1eyxNDkjbGEawVFV,0.467,0.947,5,-3.393,...,0.0,0.212,0.763,157.987,262613,4,6,-1.808719,-0.379468,0.096251
3277,System Of A Down,5eAWCfyUhZtHHtBdNk56l1,69,Mezmerize,Violent Pornography,249Z7XT6mf8B2zuI0RaeS0,0.459,0.958,6,-2.333,...,0.0,0.108,0.586,142.707,211333,4,6,-1.64516,-0.481248,0.110237
1021,Panic! At The Disco,20JZFwl6HVl6yg8a4H3ZqK,58,A Fever You Can't Sweat Out,But It's Better If You Do,2ZqTtndqAZDRAWw2vgZwQK,0.467,0.848,9,-3.063,...,0.0,0.29,0.674,170.038,205752,4,6,-1.820552,-0.315472,0.133139
