# Using Cosine Similarity to Determine Recommendations

In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import re

from sklearn.metrics.pairwise import cosine_similarity

%matplotlib inline

##### Loading in `svd_matrix` and `svd_df` from preprocessing

In [3]:
with open('../pickle/svd_matrix.pkl', 'rb') as f:
    svd_matrix = pickle.load(f)
with open('../pickle/')

##### Retrieving the Primary Song Listing to help Lookup Song Titles

In [7]:
main_song_list = pd.read_csv('../data/main_wfeats.csv', index_col='song_id')

## Precomputing Cosine Similarity Matrix

In [4]:
cos_sim_mat = cosine_similarity(svd_matrix, svd_matrix)
cos_sim_mat.shape

(22891, 22891)

### Creating a Series of `dicts` for Lookup

In [8]:
song_id_name = dict(zip(main_song_list.index, main_song_list['song_title']))

In [9]:
song_id_artist = dict(zip(main_song_list.index, main_song_list['artist_name']))

In [10]:
song_id_name['6SluaPiV04KOaRTOIScoff']

'Show Me Love - Radio Version'

In [11]:
svd_song_id = dict(zip(svd_df.index, range(svd_df.shape[0])))

NameError: name 'svd_df' is not defined

In [183]:
svd_song_idr = dict(zip(range(svd_df.shape[0]), svd_df.index))

## Searching for Most Similar Songs (Regardless of Artist)

In [252]:
def feat_sim(song_id, k=20, cos_sim_mat=cos_sim_mat):
    '''
    Returns array of indices for top k songs with greatest similarity to given song, along with 
    their similarity rating, based on precomputed cosine similarity of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, cos_sim_mat)

    '''
    top_songs_feat = np.argsort(cos_sim_mat[svd_song_id[song_id]])[-2:-(k+2):-1]
    top_songs_feat_sim = np.sort(cos_sim_mat[svd_song_id[song_id]])[-2:-(k+2):-1]

    return top_songs_feat, top_songs_feat_sim

In [179]:
top_songs_feat, top_songs_feat_sim = feat_sim('6SluaPiV04KOaRTOIScoff')

In [180]:
top_songs_feat

array([18716, 18876,  4009,  7957,  9140, 10296,  9698, 16202, 11310,
       18076, 14464,  4800,  8378, 22322, 18045,  8723, 18600, 14836,
       11385, 18175])

In [248]:
main_song_list.index[main_song_list['artist_id'] == '43sZBwHjahUvgbx1WNIkIz']

Index(['01z3wyn02forxUQHEDAa0R', '42T2QQv3xgBlpQxaSP7lnK',
       '4FzrCRil9uGpGGsnnM0vkE', '0s8OMEGJQJIUr9VFwNEH1v',
       '1ZozJfi8u9cO2Ob8KwiwNT', '35GwlKlVXgFCMF5uTp5r7P',
       '2VSbEXqs6NbNiZSTcHlIDR', '22bX2FwXSvG49G0bPWm5nc',
       '5vRPXm59z8ewWO6WiJHg3m', '0eKyHwckh9vQb8ncZ2DXCs'],
      dtype='object', name='song_id')

## Searching for Most Similar Songs (Must be Different Artist)

In [282]:
cos_sim_mat.shape

(22901, 22901)

In [285]:
def feat_sim_da(song_id, k=20, song_db=main_song_list):
    '''
    Returns array of indices for top k songs with greatest similarity to given song, but only
    from artists who didn't perform the given song, along with 
    their similarity rating, based on precomputed cosine similarity of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, cos_sim_mat)

    '''
    artist_id = song_db['artist_id'].loc[song_id]
    artist_songs = song_db.index[song_db['artist_id'] == artist_id].drop(song_id)
    new_svd = svds_df.drop(artist_songs, 0)
    csm = cosine_similarity(new_svd, new_svd)
    
    top_songs_feat = np.argsort(csm[svd_song_id[song_id]])[-2:-(k+2):-1]        
    top_songs_feat_sim = np.sort(csm[svd_song_id[song_id]])[-2:-(k+2):-1]

    return top_songs_feat, top_songs_feat_sim

In [286]:
feat_sim_da('6sbXGUn9V9ZaLwLdOfpKRE')

MemoryError: 

## Function to Derive Most Similar Titles Using Cosine Similarity Matrix

In [267]:
def get_recs(song_id, k=20):
    try:
        top_songs_feat, top_songs_feat_sim = feat_sim_da(song_id, k)
        return pd.DataFrame([[song_id_name[svd_song_idr[x]] for x in top_songs_feat],
                         [song_id_artist[svd_song_idr[x]] for x in top_songs_feat],
                         [svd_song_idr[x] for x in top_songs_feat],
                         list(top_songs_feat_sim)], 
                        index=['Song Name', 'Artist', 'Song ID', 'Similarity']).T
    except:
        print('No results available for that id. Please refer to the Song Finder for a list of valid ids.')

In [268]:
get_recs('6sbXGUn9V9ZaLwLdOfpKRE')

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Alabaster Box,CeCe Winans,7FLPCH6fx9gJgrRJBoo5U5,0.877043
1,Heads Will Roll,Yeah Yeah Yeahs,18oWEPapjNt32E6sCM6VLb,0.865936
2,"I'm On Fire - From ""Fifty Shades Of Grey"" Soun...",AWOLNATION,2lthIdb19OihVQMfuPaRZ6,0.863646
3,Garden Party,Ricky Nelson,7J5tyfg3OYVNR97KH66ovw,0.84893
4,We're Off to See the Wizard,Judy Garland,1lP1Ch077UUOpNouwBBYyl,0.846745
5,Move In My Direction,Bananarama,3c5a3kc9n4qLkZj3IWNtLe,0.833622
6,That Time Of The Month,Harley Poe,7dWYhwVvcWIKsXaPBLdDHg,0.824771
7,I Love You Always Forever,Donna Lewis,1PEqh7awkpuepLBSq8ZwqD,0.82404
8,Up Down (Do This All Day),T-Pain,6lbhWl34Il0WXm5pX1fM9E,0.822036
9,Supergroovalisticprosifunkstication (The Bumps...,Parliament,0HMn6Mj64KFPE0FYivxt3c,0.818102


In [232]:
get_recs('6sbXGUn9V9ZaLwLdOfpKRE')

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,It's Gonna Be Me,*NSYNC,2AW37v0bDyuOzGP3XnmFuA,0.850328
1,Tearin' up My Heart - Radio Edit,*NSYNC,594M0rqYMOo8BhMGEdoi5C,0.755491
2,As Long as You Love Me,Backstreet Boys,00WvmRXTkPBZNhhRK3xfdy,0.749165
3,Every Six Seconds,O-Town,2Gm0dh8gnGqelvChn2x4KK,0.719145
4,Don't Go Breaking My Heart,Backstreet Boys,79Mjfhh393dZdAsTvUFDR6,0.714671
5,I Want You Back - Radio Edit,*NSYNC,5YTMRAT4yKgFrepF8Hi3mY,0.707345
6,Everybody (Backstreet's Back) - Radio Edit,Backstreet Boys,4rTeOSYqwXNz5qPR2DUTFZ,0.704542
7,Everybody (Backstreet's Back) - Extended Version,Backstreet Boys,5WTxbyWTpoqhdxEN2szOnl,0.702513
8,"Always Know Where You Are - From ""Treasure Pla...",BBMak,02k3lrWVf8XHIHmBojESIn,0.699618
9,True To Your Heart,98º,7dPS5GUShBVTcAhM1nLRF9,0.690668


In [241]:
get_recs('66ezChUOWvgzRQAi3Ay1wt', 80).iloc[50:, :]

Unnamed: 0,Song Name,Artist,Song ID,Similarity
50,40 Oz.,D12,3SNIM6IPegio1UqXCYAhoc,0.682224
51,Fantastic Voyage,Coolio,3QlTzofanSqDWywxEzGGE2,0.68038
52,We All Die One Day,Obie Trice,5rurZZeggozpAZIHbI55cm,0.679931
53,I’m On Everything,Bad Meets Evil,3qBxoudRLiLbFl7Ansz1Uh,0.679598
54,California Love - Original Version,2Pac,1JClFT74TYSXlzpagbmj0S,0.664299
55,Real Muthaphuckkin' G's,Eazy-E,53BZ6XygAoubR5DU5w38Vq,0.662093
56,So Good (& Metro Boomin),Big Sean,0fWBFDRTIuk8ZgNdZqhCer,0.654352
57,Go Legend (& Metro Boomin),Big Sean,3wAI7MIQtTgwcRmwchPZBc,0.649675
58,Who Am I (What's My Name)?,Snoop Dogg,5XhkV07Vou38wnrzwURUOC,0.647078
59,The Setup,Obie Trice,6uzesxG4dlBg3faZmzL6Qs,0.641862


In [240]:
get_recs('0eKyHwckh9vQb8ncZ2DXCs', 100).iloc[80:, :]

Unnamed: 0,Song Name,Artist,Song ID,Similarity
80,45,Shinedown,420JGkyLfLUZcgBHKiIK9v,0.602134
81,Call Me,Shinedown,2nBWxgSZ79w9l6t2rUg6pl,0.601257
82,The Day I Tried To Live,Soundgarden,78YJJJH55MSyk7547100sW,0.600679
83,Sound Of A Gun,Audioslave,3zFG5dyH5rJfkZ25fgR173,0.600597
84,Surround Me,Scott Stapp,7CfJG9Kty5UVywjsEFRU9f,0.599598
85,Take Out the Gunman,Chevelle,1z7rvZQkZM1gdSp3meW3dW,0.598424
86,Hunger Strike - 25th Anniversary Mix,Temple Of The Dog,3CtphwpjC0XjIVpLFvGiQR,0.598107
87,TalkTalk,A Perfect Circle,3sPpTsMNzlyJJx9LLJFT0e,0.596926
88,Away From The Sun,3 Doors Down,3THdexHRmED4euIY91Zrxc,0.596529
89,December,Collective Soul,2PrUKG4kAO27esFEtQu9rL,0.596021


## Song Finder - An Easy Lookup Tool to Find Songs for Recommendations

In [None]:
def song_finder(query, type='artist'):
    '''
    Retrieves song name and song id for a requested recording currently indexed
    '''
    