In [106]:
import itertools
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

from spotify import SpotifyService
from record_collection import RecordCollection

pd.set_option('display.max_columns', 50)

In [2]:
spotifyService = SpotifyService(scope='user-read-recently-played')
recordCollection = RecordCollection(spotifyService)

In [133]:
tracks = recordCollection.tracks
tracks[1]

Track(track_id='2hUHm19fmQnJ40LZXqjhZt', track_uri='spotify:track:2hUHm19fmQnJ40LZXqjhZt', track_name='To Lose My Life', artist_name='White Lies', artist_id='6ssXMmc5EOUrauZxirM910', artist_uri='spotify:artist:6ssXMmc5EOUrauZxirM910', popularity=60, duration_ms=191040)

In [5]:
_audio_features = spotifyService.get_audio_features(tracks)

In [156]:
def get_track_name_and_artist(track_uri):
    r = spotifyService.spotipyClient.track(track_uri)
    return r['name'], r['album']['artists'][0]['name']

In [171]:
audio_features_df = pd.DataFrame.from_dict(_audio_features, orient='index')
audio_features_df['track_name'], audio_features_df['artist'] = zip(*audio_features_df.reset_index()['index'].map(get_track_name_and_artist))
audio_features_df.head(2)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,track_name,artist
spotify:track:4vYauy3ABO65vTXggGYaZg,0.472,0.71,0,-8.852,1,0.0599,0.278,1e-06,0.103,0.527,120.052,audio_features,4vYauy3ABO65vTXggGYaZg,spotify:track:4vYauy3ABO65vTXggGYaZg,https://api.spotify.com/v1/tracks/4vYauy3ABO65...,https://api.spotify.com/v1/audio-analysis/4vYa...,363973,4,Don't Leave Me This Way (feat. Teddy Pendergrass),Harold Melvin & The Blue Notes
spotify:track:2hUHm19fmQnJ40LZXqjhZt,0.542,0.818,10,-4.653,0,0.0318,0.000558,0.0141,0.126,0.29,132.048,audio_features,2hUHm19fmQnJ40LZXqjhZt,spotify:track:2hUHm19fmQnJ40LZXqjhZt,https://api.spotify.com/v1/tracks/2hUHm19fmQnJ...,https://api.spotify.com/v1/audio-analysis/2hUH...,191040,4,To Lose My Life,White Lies


In [172]:
key_categories = pd.get_dummies(audio_features_df['key'].map(str), prefix='key', drop_first=True)
time_signature_categories = pd.get_dummies(audio_features_df['time_signature'], prefix='time_signature', drop_first=True)
audio_features_df_encoded = pd.concat([audio_features_df, key_categories, time_signature_categories], axis=1)
audio_features_df_encoded.head(2)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,track_name,artist,key_1,key_10,key_11,key_2,key_3,key_4,key_5,key_6,key_7,key_8,key_9,time_signature_3,time_signature_4,time_signature_5
spotify:track:4vYauy3ABO65vTXggGYaZg,0.472,0.71,0,-8.852,1,0.0599,0.278,1e-06,0.103,0.527,120.052,audio_features,4vYauy3ABO65vTXggGYaZg,spotify:track:4vYauy3ABO65vTXggGYaZg,https://api.spotify.com/v1/tracks/4vYauy3ABO65...,https://api.spotify.com/v1/audio-analysis/4vYa...,363973,4,Don't Leave Me This Way (feat. Teddy Pendergrass),Harold Melvin & The Blue Notes,0,0,0,0,0,0,0,0,0,0,0,0,1,0
spotify:track:2hUHm19fmQnJ40LZXqjhZt,0.542,0.818,10,-4.653,0,0.0318,0.000558,0.0141,0.126,0.29,132.048,audio_features,2hUHm19fmQnJ40LZXqjhZt,spotify:track:2hUHm19fmQnJ40LZXqjhZt,https://api.spotify.com/v1/tracks/2hUHm19fmQnJ...,https://api.spotify.com/v1/audio-analysis/2hUH...,191040,4,To Lose My Life,White Lies,0,1,0,0,0,0,0,0,0,0,0,0,1,0


In [173]:
AUDIO_FEATURES = list(itertools.chain(['danceability', 'energy', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'], key_categories.columns.tolist(), time_signature_categories.columns.tolist()))

X, y = audio_features_df_encoded[AUDIO_FEATURES], audio_features_df_encoded['track_name']

In [174]:
standardScaler = StandardScaler()

In [175]:
X_scaled = standardScaler.fit_transform(X)

In [176]:
similarity_matrix = cosine_similarity(X_scaled)

#### Get similar songs

As a first run, what are the top 5 most similar songs to Don't Leave Me This Way (feat. Teddy Pendergrass)

In [177]:
def get_top_n_similar(track, num):
    track_idx = np.where(y==track)[0][0]
    print(track_idx)
    top_num = np.argsort(similarity_matrix[track_idx])[::-1][1:num+1]
    return top_num 

In [179]:
audio_features_df.iloc[get_top_n_similar("Don't Leave Me This Way (feat. Teddy Pendergrass)", 5)]

0


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,track_name,artist
spotify:track:5GqFHxvAVUmZPdAFj6THjD,0.437,0.902,0,-4.915,1,0.0372,0.0374,0.0,0.089,0.564,150.017,audio_features,5GqFHxvAVUmZPdAFj6THjD,spotify:track:5GqFHxvAVUmZPdAFj6THjD,https://api.spotify.com/v1/tracks/5GqFHxvAVUmZ...,https://api.spotify.com/v1/audio-analysis/5GqF...,188255,4,Change,Bailey Zimmerman
spotify:track:4FdPnT2cFrpWCmWZd7GXc3,0.471,0.846,0,-5.269,1,0.0389,0.00279,3e-06,0.145,0.539,100.089,audio_features,4FdPnT2cFrpWCmWZd7GXc3,spotify:track:4FdPnT2cFrpWCmWZd7GXc3,https://api.spotify.com/v1/tracks/4FdPnT2cFrpW...,https://api.spotify.com/v1/audio-analysis/4FdP...,213719,4,One Mississippi,Kane Brown
spotify:track:2PaFqV4KWXT1KflmwLtJlX,0.47,0.715,0,-7.234,1,0.0506,0.0306,0.0,0.165,0.313,151.977,audio_features,2PaFqV4KWXT1KflmwLtJlX,spotify:track:2PaFqV4KWXT1KflmwLtJlX,https://api.spotify.com/v1/tracks/2PaFqV4KWXT1...,https://api.spotify.com/v1/audio-analysis/2PaF...,200987,4,Weight In Gold,Sea Girls
spotify:track:2hKdd3qO7cWr2Jo0Bcs0MA,0.481,0.638,0,-5.862,1,0.0276,0.153,0.0,0.154,0.497,79.064,audio_features,2hKdd3qO7cWr2Jo0Bcs0MA,spotify:track:2hKdd3qO7cWr2Jo0Bcs0MA,https://api.spotify.com/v1/tracks/2hKdd3qO7cWr...,https://api.spotify.com/v1/audio-analysis/2hKd...,259933,4,Drops of Jupiter (Tell Me),Train
spotify:track:2a7Qtqop47xQK42JgqHllc,0.588,0.849,0,-5.222,1,0.0295,0.0387,0.0,0.0757,0.688,132.007,audio_features,2a7Qtqop47xQK42JgqHllc,spotify:track:2a7Qtqop47xQK42JgqHllc,https://api.spotify.com/v1/tracks/2a7Qtqop47xQ...,https://api.spotify.com/v1/audio-analysis/2a7Q...,225213,4,Saviour,Picture This


Don't think that Don't Leave Me This Way top two similar songs are two country songs... 