# Euclidian Distances to Determine Song Recs

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

from sklearn.metrics.pairwise import euclidean_distances

%matplotlib inline

##### Loading in Main Song List

In [29]:
main_song_list = pd.read_csv('../data/main_wfeats.csv', index_col='song_id')

In [3]:
main_song_list.shape

(22891, 81)

##### Loading in Matrices for Evaluation

In [4]:
with open('../pickle/main_songs_scaled_svd.pkl', 'rb') as f:
    main_songs_scaled_svd = pickle.load(f)
main_songs_svdg = pd.read_csv('../data/main_songs_svdg.csv', index_col='song_id')

In [5]:
main_songs_svdg.shape

(22891, 124)

## Precomputing Euclidean Distance 

In [6]:
euc_dist_svdg = euclidean_distances(main_songs_scaled_svd, main_songs_scaled_svd)

### Creating a Series of `dicts` for Lookup

In [7]:
song_id_name = dict(zip(main_song_list.index, main_song_list['song_title']))

In [8]:
song_id_artist = dict(zip(main_song_list.index, main_song_list['artist_name']))

In [9]:
song_id_name['6SluaPiV04KOaRTOIScoff']

'Show Me Love - Radio Version'

In [10]:
song_euc_id = dict(zip(main_songs_svdg.index, range(main_songs_svdg.shape[0])))

In [11]:
song_euc_idr = dict(zip(range(main_songs_svdg.shape[0]), main_songs_svdg.index))

## Searching for Closest Songs (Regardless of Artist)

In [12]:
def feat_dist(song_id, k=10, euc_mat=euc_dist_svdg):
    '''
    Returns array of indices for top k songs with closest distance to given song, along with 
    their distance, based on precomputed euclidean distance of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, euc_mat)

    '''
    top_songs_feat = np.argsort(euc_mat[song_euc_id[song_id]])[1:11]
    top_songs_feat_dist = np.sort(euc_mat[song_euc_id[song_id]])[1:11]

    return top_songs_feat, top_songs_feat_dist

## Function to Retrieve Closest Titles Using Euclidean Distance

### For Recs Regardless of Artist

In [14]:
def get_recs(song_id, k=10, euc_mat=euc_dist_svdg):
    try:
        top_songs_feat, top_songs_feat_dist = feat_dist(song_id, k, euc_mat)
        return pd.DataFrame(
                        [[song_id_name[song_euc_idr[x]] for x in top_songs_feat],
                         [song_id_artist[song_euc_idr[x]] for x in top_songs_feat],
                         [song_euc_idr[x] for x in top_songs_feat],
                         list(top_songs_feat_dist)], 
                        index=['Song Name', 'Artist', 'Song ID', 'Distance']
                        ).T
    except:
        print('No results available for that id. Please refer to the Song Finder for a list of valid ids.')

In [15]:
#Show Me Love (Radio Version) - Robyn
get_recs('6SluaPiV04KOaRTOIScoff')

Unnamed: 0,Song Name,Artist,Song ID,Distance
0,Be Mine!,Robyn,3FtkFLmplS7GGPFMIQ0dSR,6.6241
1,Wannabe,Spice Girls,1Je1IMUlBXcx1Fz0WE7oPT,8.10034
2,Fuck You,Lily Allen,1GCbc1vpkZA2zhjsSFhmHT,8.2114
3,2 Become 1,Spice Girls,36AWdhZIGLUTkWpJDhe7va,8.25594
4,"Genius - with Sia, Diplo & Labrinth",Sia,73F87Sqh6jQWucOOvz1WFx,8.51976
5,Do You Know (What It Takes),Robyn,0idCpkJ2pspfAILbanmERu,8.55333
6,Hang With Me,Robyn,6rW8q1p2GCjGMRAlnxBeo7,8.60341
7,Say You'll Be There - シングル・ミックス,Spice Girls,1yTQ39my3MoNROlFw3RDNy,8.80104
8,Viva Forever,Spice Girls,6BPDPcnbDMDf58srVzbfX9,8.88022
9,Who Do You Think You Are,Spice Girls,1jI1aLmm5HTwiMtvsbwDJw,8.90613


In [16]:
#Finesse (Remix) - Bruno Mars (feat. Cardi B)
get_recs('3Vo4wInECJQuz9BIBMOu8i')

Unnamed: 0,Song Name,Artist,Song ID,Distance
0,That's What I Like - PARTYNEXTDOOR Remix,Bruno Mars,64TkpV7m7f09JY7K436pju,5.12958
1,The Party's Just Begun,The Cheetah Girls,5Au8ObNQvrwFVEgGqQQL6D,6.50115
2,Good Day,DNCE,49X8pWDpmFpAITVUaudkcN,6.52445
3,Love Never Felt so Good,Michael Jackson,48td6xvpokdYwvbl3JIiXP,6.68026
4,Lone Ranger,Rachel Platten,3eoyz3jZOHhko7hEwtEdnf,6.84979
5,TV In The Morning,DNCE,7bsYIRvIUztsOGVn2iW1ZT,6.91253
6,One World - Soundtrack,The Cheetah Girls,7luYEwE4Y9Of5AKbrt0pJ4,7.01698
7,I Kissed A Girl,Katy Perry,14iN3o8ptQ8cFVZTEmyQRV,7.05715
8,Oops!...I Did It Again,Britney Spears,6naxalmIoLFWR0siv8dnQQ,7.08268
9,Malibu - Tiësto Remix,Miley Cyrus,7FKcbKrAi4uiXosKMNTjQI,7.11706


In [17]:
#Clique - Kanye West
get_recs('3rbNV2GI8Vtd8byhUtXZID')

Unnamed: 0,Song Name,Artist,Song ID,Distance
0,Willy Wonka (feat. Offset),Macklemore,7uKSil5r7uHexnM9lukPTc,7.19057
1,Don't Like.1,Kanye West,12D0n7hKpPcjuUpcbAKjjr,8.36335
2,By Design,Kid Cudi,5FxVTEpoBress37MmlMhbZ,8.37384
3,3005,Childish Gambino,3Z2sglqDj1rDRMF5x0Sz2R,8.37561
4,Ghost Town,Kanye West,6Bg7MznA9X0dIhlAsLyBYj,8.45028
5,Ladders,Mac Miller,39NDBdU5Xkm5pCFGa5kZtI,8.50193
6,Freaks And Geeks,Childish Gambino,7xTY3rJMgj0nh6OOPHgsLw,8.82614
7,Romantic Call,Patra,1siSzxLE2nCUZ4VYTMbCQT,9.01005
8,When It Rains (feat. Aloe Blacc),Dag Savage,7x2zy4YgLY731TKaDFFeuj,9.0317
9,Thrift Shop (feat. Wanz),Macklemore & Ryan Lewis,3AYcyxEACnmE6d96RPubID,9.19289


## Searching for Closest Songs (Must be Different Artist)

In [39]:
def feat_dist_da(song_id, k=10, euc_mat=euc_dist_svdg, song_db=main_song_list):
    '''
    Returns array of indices for top k songs with closest distance to a given song, but only
    from artists who didn't perform the given song, along with 
    their distance, based on precomputed euclidean distance of feature vectors.
    '''
    artist_id = song_db.loc[song_id, 'artist_id']
    artist_songs = song_db.index[song_db['artist_id'] == artist_id].drop(song_id)
    
    top_songs_feat = np.argsort(euc_mat[song_euc_id[song_id]])[1:k+10]
    top_songs_feat_dist = np.sort(euc_mat[song_euc_id[song_id]])[1:k+10]
    
    return top_songs_feat, top_songs_feat_dist, artist_songs

In [40]:
def get_recs_da(song_id, k=10, euc_mat=euc_dist_svdg, song_db=main_song_list):
    
    top_songs_feat, top_songs_feat_dist, artist_songs = feat_dist_da(song_id, k, euc_mat, main_song_list)
    recs = pd.DataFrame([[song_id_name[song_euc_idr[x]] for x in top_songs_feat],
                    [song_id_artist[song_euc_idr[x]] for x in top_songs_feat],
                    [song_euc_idr[x] for x in top_songs_feat],
                    list(top_songs_feat_dist)], 
                    index=['Song Name', 'Artist', 'Song ID', 'Distance']
                    ).T
    recs.set_index('Song ID', inplace=True)
    
    for song in artist_songs:
        if song in recs.index:
            recs.drop(song, inplace=True)
            
    recs.reset_index(inplace=True)
    return recs.head(k)
#     except:
#         print('No results available for that id. Please refer to the Song Finder for a list of valid ids.')

In [44]:
#Show Me Love (Radio Version) - Robyn
get_recs_da('6SluaPiV04KOaRTOIScoff')

Unnamed: 0,Song ID,Song Name,Artist,Distance
0,1Je1IMUlBXcx1Fz0WE7oPT,Wannabe,Spice Girls,8.10034
1,1GCbc1vpkZA2zhjsSFhmHT,Fuck You,Lily Allen,8.2114
2,36AWdhZIGLUTkWpJDhe7va,2 Become 1,Spice Girls,8.25594
3,73F87Sqh6jQWucOOvz1WFx,"Genius - with Sia, Diplo & Labrinth",Sia,8.51976
4,1yTQ39my3MoNROlFw3RDNy,Say You'll Be There - シングル・ミックス,Spice Girls,8.80104
5,6BPDPcnbDMDf58srVzbfX9,Viva Forever,Spice Girls,8.88022
6,1jI1aLmm5HTwiMtvsbwDJw,Who Do You Think You Are,Spice Girls,8.90613
7,2HWyXrAd2voTlSFVVqFAVi,Feel so High,Des'ree,9.18988
8,3BsaRV5QIulYz2lV9WWa8T,Show Me the Meaning of Being Lonely,Backstreet Boys,9.27665
9,1NwDWbpg9dPH12xBd2ibrv,Holler,Spice Girls,9.39477


In [41]:
#Clique - Kanye West
get_recs_da('3rbNV2GI8Vtd8byhUtXZID', k=10, euc_mat=euc_dist_svdg, song_db=main_song_list)

Unnamed: 0,Song ID,Song Name,Artist,Distance
0,7uKSil5r7uHexnM9lukPTc,Willy Wonka (feat. Offset),Macklemore,7.19057
1,5FxVTEpoBress37MmlMhbZ,By Design,Kid Cudi,8.37384
2,3Z2sglqDj1rDRMF5x0Sz2R,3005,Childish Gambino,8.37561
3,39NDBdU5Xkm5pCFGa5kZtI,Ladders,Mac Miller,8.50193
4,7xTY3rJMgj0nh6OOPHgsLw,Freaks And Geeks,Childish Gambino,8.82614
5,1siSzxLE2nCUZ4VYTMbCQT,Romantic Call,Patra,9.01005
6,7x2zy4YgLY731TKaDFFeuj,When It Rains (feat. Aloe Blacc),Dag Savage,9.0317
7,3AYcyxEACnmE6d96RPubID,Thrift Shop (feat. Wanz),Macklemore & Ryan Lewis,9.19289
8,5MZjzyQUEhrfDnnzGsLqnx,Hendrix,Wyclef Jean,9.31986
9,0pBcI8Gf1Oe3ziIbE9tBsX,Young Nigga (feat. Puff Daddy),Nipsey Hussle,9.33391


In [43]:
#Finesse (Remix) - Bruno Mars (feat. Cardi B)
get_recs_da('3Vo4wInECJQuz9BIBMOu8i')

Unnamed: 0,Song ID,Song Name,Artist,Distance
0,5Au8ObNQvrwFVEgGqQQL6D,The Party's Just Begun,The Cheetah Girls,6.50115
1,49X8pWDpmFpAITVUaudkcN,Good Day,DNCE,6.52445
2,48td6xvpokdYwvbl3JIiXP,Love Never Felt so Good,Michael Jackson,6.68026
3,3eoyz3jZOHhko7hEwtEdnf,Lone Ranger,Rachel Platten,6.84979
4,7bsYIRvIUztsOGVn2iW1ZT,TV In The Morning,DNCE,6.91253
5,7luYEwE4Y9Of5AKbrt0pJ4,One World - Soundtrack,The Cheetah Girls,7.01698
6,14iN3o8ptQ8cFVZTEmyQRV,I Kissed A Girl,Katy Perry,7.05715
7,6naxalmIoLFWR0siv8dnQQ,Oops!...I Did It Again,Britney Spears,7.08268
8,7FKcbKrAi4uiXosKMNTjQI,Malibu - Tiësto Remix,Miley Cyrus,7.11706
9,2K87XMYnUMqLcX3zvtAF4G,Drag Me Down,One Direction,7.22432
