In [8]:
import pandas as pd
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import spotipy
import spotipy.util as util

In [9]:
# importing dataset 10k with lyrics
data_10k = pd.read_csv('../processed_data/data_lyrics_10k.csv')

In [10]:
data_10k.head()

Unnamed: 0.1,Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,...,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,lyrics
0,0,0.817,2013,0.0158,['Parmalee'],0.551,214933,0.863,0,3Bdqlr7jQLNhITAgcBGQBG,...,11,0.0843,-3.506,1,Close Your Eyes,50,2013-12-10,0.0322,143.952,Well take a look at what's left in that sunset...
1,1,0.548,2003,0.00661,['JAY-Z'],0.494,234627,0.887,1,7sLpSWxQazJzDVG6YGzlVs,...,6,0.103,-4.297,0,99 Problems,61,2003-11-14,0.398,89.554,"If you're havin' girl problems, I feel bad for..."
2,2,0.732,2014,0.0477,['Sam Hunt'],0.59,235507,0.94,0,3BuPop8SzLG2Q88TJcFAjp,...,9,0.379,-4.124,1,Raised On It,54,2014-10-27,0.0409,94.02,Snapbacks and Levi jeans PBR and burnt CDs Run...
3,3,0.475,1981,0.000473,['Iron Maiden'],0.34,288947,0.974,0,7EvjTEzuv7TWaIaWY63sWV,...,0,0.373,-5.114,1,Drifter - 2015 Remaster,29,1981-02-02,0.106,101.276,
4,4,0.55,1930,0.994,"['Markos Vamvakaris', 'Apostolos Xatzixristos']",0.41,197653,0.169,0,38PozVGXXoeO8dTEVzy74Y,...,2,0.113,-18.862,1,Soultana maurofora,0,1930-01-01,0.0391,93.89,


In [11]:
# data cleanup and arrangements
data_10k = data_10k.drop(columns=['Unnamed: 0', 'id', 'release_date', 'lyrics'])
data_10k['duration_s'] = data_10k['duration_ms'].apply(lambda x: x/1000)
data_10k = data_10k.drop(columns=['duration_ms'])
data_10k = data_10k[(data_10k['duration_s'] > 60) & (data_10k['duration_s'] < 600)]
data_10k['duration_m'] = data_10k['duration_s']/60
data_10k = data_10k.drop(columns=['duration_s'])
data_10k['artists'] = data_10k['artists'].apply(lambda x: x.replace("['", '').replace("'", '').replace("]", ''))
data_10k['index'] = data_10k['artists'] + ' - "' + data_10k['name'] + '"'
data_10k.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,energy,explicit,instrumentalness,key,liveness,loudness,mode,name,popularity,speechiness,tempo,duration_m,index
0,0.817,2013,0.0158,Parmalee,0.551,0.863,0,0.0,11,0.0843,-3.506,1,Close Your Eyes,50,0.0322,143.952,3.582217,"Parmalee - ""Close Your Eyes"""
1,0.548,2003,0.00661,JAY-Z,0.494,0.887,1,0.0,6,0.103,-4.297,0,99 Problems,61,0.398,89.554,3.91045,"JAY-Z - ""99 Problems"""
2,0.732,2014,0.0477,Sam Hunt,0.59,0.94,0,0.0,9,0.379,-4.124,1,Raised On It,54,0.0409,94.02,3.925117,"Sam Hunt - ""Raised On It"""
3,0.475,1981,0.000473,Iron Maiden,0.34,0.974,0,0.0928,0,0.373,-5.114,1,Drifter - 2015 Remaster,29,0.106,101.276,4.815783,"Iron Maiden - ""Drifter - 2015 Remaster"""
4,0.55,1930,0.994,"Markos Vamvakaris, Apostolos Xatzixristos",0.41,0.169,0,0.901,2,0.113,-18.862,1,Soultana maurofora,0,0.0391,93.89,3.294217,"Markos Vamvakaris, Apostolos Xatzixristos - ""S..."


In [12]:
# creating data matrix
data_10k_matrix = data_10k.drop(columns=['artists', 'name', 'index'])
data_10k_matrix = data_10k_matrix.set_index(data_10k['index']).sort_index(axis=1)
data_10k_matrix.head()

Unnamed: 0_level_0,acousticness,danceability,duration_m,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo,valence,year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"Parmalee - ""Close Your Eyes""",0.0158,0.551,3.582217,0.863,0,0.0,11,0.0843,-3.506,1,50,0.0322,143.952,0.817,2013
"JAY-Z - ""99 Problems""",0.00661,0.494,3.91045,0.887,1,0.0,6,0.103,-4.297,0,61,0.398,89.554,0.548,2003
"Sam Hunt - ""Raised On It""",0.0477,0.59,3.925117,0.94,0,0.0,9,0.379,-4.124,1,54,0.0409,94.02,0.732,2014
"Iron Maiden - ""Drifter - 2015 Remaster""",0.000473,0.34,4.815783,0.974,0,0.0928,0,0.373,-5.114,1,29,0.106,101.276,0.475,1981
"Markos Vamvakaris, Apostolos Xatzixristos - ""Soultana maurofora""",0.994,0.41,3.294217,0.169,0,0.901,2,0.113,-18.862,1,0,0.0391,93.89,0.55,1930


In [14]:
# testing recommendation with known index from dataset
v1 = np.array(data_10k_matrix.loc['Parmalee - "Close Your Eyes"']).reshape(1, -1)

sim1 = cosine_similarity(data_10k_matrix, v1).reshape(-1)

recommendation_df = pd.DataFrame(sim1, index = data_10k_matrix.index)
recommendation_df = recommendation_df.rename(columns={0:'cosine_similarity'})
recommendation_df.sort_values('cosine_similarity', ascending=False, inplace=True)

recommendation_df.head(10)

array([[ 1.58000000e-02,  5.51000000e-01,  3.58221667e+00,
         8.63000000e-01,  0.00000000e+00,  0.00000000e+00,
         1.10000000e+01,  8.43000000e-02, -3.50600000e+00,
         1.00000000e+00,  5.00000000e+01,  3.22000000e-02,
         1.43952000e+02,  8.17000000e-01,  2.01300000e+03]])

In [18]:
# needed credentials and permissions (scope)
cid = os.environ.get('SPOTIFY_CLIENT_ID')
secret = os.environ.get('SPOTIFY_CLIENT_SECRET')
redirect_uri = os.environ.get('SPOTIPY_REDIRECT_URI')
scope = 'playlist-modify-private'
username = None

In [76]:
# testing locally for user authentication, later development is needed
spotify_token = util.prompt_for_user_token(username,
                                           scope,
                                           cid,
                                           secret,
                                           redirect_uri)

sp = spotipy.Spotify(auth=spotify_token)

In [60]:
def get_song_features(input):
    """
    Gets index, id, all audio_features sorted.
    This works for songs that may be missing from our dataset.
    """
    search_result = sp.search(input, limit=1)
    
    id = search_result['tracks']['items'][0]['id']
    
    # get audio features
    audio_features = sp.audio_features(id)[0]
    audio_features['year'] = search_result['tracks']['items'][0]['album']['release_date'][:4]
    audio_features['explicit'] = search_result['tracks']['items'][0]['explicit']
    audio_features['popularity'] = search_result['tracks']['items'][0]['popularity']
    
    # convert dict to series
    audio_features = pd.Series(audio_features)
    
    # cleanup
    audio_features['explicit'] = audio_features['explicit'] * 1
    audio_features['duration_m'] = (audio_features['duration_ms'] /1000)/60
    audio_features = audio_features.drop(['id', 'uri', 'track_href', 'analysis_url', 'type', 'duration_ms', 'time_signature']).sort_index()
    
    # get artist name
    artist_name = search_result['tracks']['items'][0]['artists'][0]['name']
    
    # get track name
    track_name = search_result['tracks']['items'][0]['name']
    
    # create index
    track_index = artist_name + ' - "' + track_name + '"'
    
    return track_index, id, audio_features

In [71]:
song_features = get_song_features('blink 182 all the small things')

In [67]:
def get_recommendation(song_features, amount):
    """
    Compares cosine similarity between our requested song features and our dataset.
    Results sorted
    """
    v1 = np.array(song_features).reshape(1, -1)
    sim1 = cosine_similarity(data_10k_matrix, v1).reshape(-1)
    
    recommendation_df = pd.DataFrame(sim1, index = data_10k_matrix.index)
    recommendation_df = recommendation_df.rename(columns={0:'cosine_similarity'})
    recommendation_df.sort_values('cosine_similarity', ascending=False, inplace=True)
    
    return recommendation_df.head(amount)

In [80]:
recommendation = get_recommendation(song_features[2], 10)
recommendation

Unnamed: 0_level_0,cosine_similarity
index,Unnamed: 1_level_1
"Jimmie Rodgers - ""Ben Dewberry's Final Run""",0.999999
"Xavier Cugat & His Orchestra - ""(The Chi Chi) Cha Cha Cha""",0.999999
"Lefty Frizzell - ""You're Humbuggin' Me""",0.999998
"Francisco Canaro, Roberto Maida - ""Mal de Amores - Remasterizado""",0.999998
"Jimmie Rodgers - ""Daddy and Home""",0.999997
"Kanak Das - ""Aalok Andhar Jetha""",0.999997
"Guandulito - ""El Negro Lindo""",0.999997
"Vasilis Skaliotis - ""Agiovasileiatikos Kozanis / Nizamikos Naousis / Partalo""",0.999997
"Lola Flores - ""Válgame la Magdalena (Zambra)""",0.999996
"Sister Rosetta Tharpe, The Rosettes - ""Jesus Remembers""",0.999996


In [83]:
def get_song_id(song):
    
    search_result = sp.search(song, limit=1)
    
    id = search_result['tracks']['items'][0]['id']
    
    return id

In [94]:
recommendation_id_list = list(recommendation.reset_index()['index'].apply(lambda x: get_song_id(x)))

In [None]:
# getting user id
user_id = sp.me()['id']

# creating a private playlist
playlist = sp.user_playlist_create(user_id, 
                                   'test', 
                                   public=False, 
                                   collaborative=False, 
                                   description='Testing')

# obtaining playlist id
playlist_id = playlist['id']
playlist_id

# add songs to playlist
sp.user_playlist_add_tracks(username, 
                            playlist_id, 
                            recommendation_id_list, 
                            position=None)