In [18]:
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
import pandas as pd
import api #gitignored, private


data = pd.read_csv('input/data.csv')

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=api.CLIENT_ID,
                                                           client_secret=api.CLIENT_SECRET))

In [19]:

relevant_features = ['popularity','valence', 'acousticness', 'danceability', 'energy',
               'instrumentalness', 'key', 'liveness', 'loudness',
               'mode', 'speechiness', 'tempo']
X = data.loc[:,relevant_features]

In [20]:
def find_song(name, artist):
    """Spotify API call that returns track based on query"""
    song_data = defaultdict()
    results = sp.search(q='track: {} artist: {}'.format(name, artist), limit=1)
    if not results['tracks']['items']:
        return None

    results = results['tracks']['items'][0]
    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]
    print('API call')
    song_data['name'] = [name]
    song_data['artist'] = [artist]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]

    for key, value in audio_features.items():
        song_data[key] = value

    return pd.DataFrame(song_data)


In [21]:
def get_song_data(song):
    """Tries to find song in dataset, if can't then makes API call"""

    song_data = data[data.name.str.contains(song['name'],case=False)
                     & data.artists.str.contains(song['artist'], case=False)]

    if len(song_data) == 0:
        song_data = find_song(song['name'], song['artist'])
    return song_data





In [22]:
data[relevant_features].head()

Unnamed: 0,popularity,valence,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo
0,4,0.0594,0.982,0.279,0.211,0.878,10,0.665,-20.096,1,0.0366,80.954
1,5,0.963,0.732,0.819,0.341,0.0,7,0.16,-12.441,1,0.415,60.936
2,5,0.0394,0.961,0.328,0.166,0.913,3,0.101,-14.85,1,0.0339,110.339
3,3,0.165,0.967,0.275,0.309,2.8e-05,5,0.381,-9.316,1,0.0354,100.109
4,2,0.253,0.957,0.418,0.193,2e-06,3,0.229,-10.096,1,0.038,101.665


In [23]:
# from sklearn.compose import ColumnTransformer
# from sklearn.preprocessing import StandardScaler, MinMaxScaler
#
# cols_to_transform = [0,1,2,3,4,5,6,7,8,9,10]
# transformer = ColumnTransformer(transformers=[('scaler', StandardScaler(), cols_to_transform)], remainder='passthrough')
#
#
# scaled_data = pd.DataFrame(transformer.fit_transform(data[relevant_features]), columns=transformer.get_feature_names_out())

In [24]:
# scaler = StandardScaler()
# scaled_data = scaler.fit_transform(data[relevant_features])

In [25]:
song_name = input('Enter song name: ')
artist = input('Enter artist name: ')


Shows data of song that was found

In [26]:
song = {'name': song_name, 'artist': artist}

song_data = get_song_data(song)
song_data

API call


Unnamed: 0,name,artist,explicit,duration_ms,popularity,danceability,energy,key,loudness,mode,...,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,time_signature
0,flowers,miley cyrus,0,200455,93,0.707,0.681,0,-4.325,1,...,5e-06,0.0322,0.646,117.999,audio_features,4DHcnVTT87F0zZhRPYmZ3B,spotify:track:4DHcnVTT87F0zZhRPYmZ3B,https://api.spotify.com/v1/tracks/4DHcnVTT87F0...,https://api.spotify.com/v1/audio-analysis/4DHc...,4


In [27]:

song_data = song_data[relevant_features]
song_data

Unnamed: 0,popularity,valence,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo
0,93,0.646,0.0632,0.707,0.681,5e-06,0,0.0322,-4.325,1,0.0668,117.999


In [28]:
song_data = song_data.iloc[0,:].values.reshape(1,-1)

In [29]:
# scaled_song_data = pd.DataFrame(transformer.transform(song_data), columns=transformer.get_feature_names_out())
# scaled_song_data

In [30]:
# scaled_song_data = scaler.transform(song_data)
# scaled_song_data

Returns cosine similarity for each song in data

In [31]:
from scipy.spatial.distance import cdist

distances = cdist(song_data, X, 'cosine')

Returns 5 closest, most similar songs, by id

In [32]:
index = list(np.argsort(distances)[:, :5][0])
index

[16269, 37892, 91808, 18856, 18008]

Returns recommended songs as a pandas dataset

In [33]:

rec_songs = data.iloc[index]
rec_songs = rec_songs.loc[:,['name', 'artists', 'release_date', 'id']]

Saves recommendations to csv

In [34]:
rec_songs.to_csv('recommendations.csv', index=False)