# Using Cosine Similarity to Determine Recommendations

In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import re

from sklearn.metrics.pairwise import cosine_similarity

%matplotlib inline

##### Loading in `svd_matrix` and `svd_df` from preprocessing

In [12]:
with open('../pickle/svd_matrix.pkl', 'rb') as f:
    svd_matrix = pickle.load(f)
svd_df = pd.read_csv('../data/svd_df.csv', index_col='song_id')

##### Retrieving the Primary Song Listing to help Lookup Song Titles

In [7]:
main_song_list = pd.read_csv('../data/main_wfeats.csv', index_col='song_id')

## Precomputing Cosine Similarity Matrix

##### SVD on All Features

In [4]:
cos_sim_mat = cosine_similarity(svd_matrix, svd_matrix)
cos_sim_mat.shape

(22891, 22891)

##### SVD on Genres Only (20 total) + All Other Song Features

In [None]:
cos_sim_mat_g = cosine_similarity()

### Creating a Series of `dicts` for Lookup

In [8]:
song_id_name = dict(zip(main_song_list.index, main_song_list['song_title']))

In [9]:
song_id_artist = dict(zip(main_song_list.index, main_song_list['artist_name']))

In [10]:
song_id_name['6SluaPiV04KOaRTOIScoff']

'Show Me Love - Radio Version'

In [13]:
svd_song_id = dict(zip(svd_df.index, range(svd_df.shape[0])))

In [14]:
svd_song_idr = dict(zip(range(svd_df.shape[0]), svd_df.index))

## Searching for Most Similar Songs (Regardless of Artist)

In [15]:
def feat_sim(song_id, k=20, cos_sim_mat=cos_sim_mat):
    '''
    Returns array of indices for top k songs with greatest similarity to given song, along with 
    their similarity rating, based on precomputed cosine similarity of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, cos_sim_mat)

    '''
    top_songs_feat = np.argsort(cos_sim_mat[svd_song_id[song_id]])[-2:-(k+2):-1]
    top_songs_feat_sim = np.sort(cos_sim_mat[svd_song_id[song_id]])[-2:-(k+2):-1]

    return top_songs_feat, top_songs_feat_sim

## Searching for Most Similar Songs (Must be Different Artist)

In [54]:
def feat_sim_da(song_id, k=20, song_db=main_song_list, cos_sim_mat=cos_sim_mat):
    '''
    Returns array of indices for top k songs with greatest similarity to a given song, but only
    from artists who didn't perform the given song, along with their similarity rating, 
    based on precomputed cosine similarity of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, cos_sim_mat)
    '''
    artist_id = song_db['artist_id'].loc[song_id]
    artist_songs = song_db.index[song_db['artist_id'] == artist_id].drop(song_id)
    
    top_songs_feat = np.argsort(cos_sim_mat[svd_song_id[song_id]])[-2:-(k+12):-1]
    top_songs_feat_sim = np.sort(cos_sim_mat[svd_song_id[song_id]])[-2:-(k+12):-1]
    
    return top_songs_feat, top_songs_feat_sim, artist_songs

## Function to Derive Most Similar Titles Using Cosine Similarity Matrix

### For Recs Regardless of Artist

In [16]:
def get_recs(song_id, k=20):
    try:
        top_songs_feat, top_songs_feat_sim = feat_sim(song_id, k)
        return pd.DataFrame([[song_id_name[svd_song_idr[x]] for x in top_songs_feat],
                         [song_id_artist[svd_song_idr[x]] for x in top_songs_feat],
                         [svd_song_idr[x] for x in top_songs_feat],
                         list(top_songs_feat_sim)], 
                        index=['Song Name', 'Artist', 'Song ID', 'Similarity']).T
    except:
        print('No results available for that id. Please refer to the Song Finder for a list of valid ids.')

In [17]:
get_recs('6sbXGUn9V9ZaLwLdOfpKRE')

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,It's Gonna Be Me,*NSYNC,2AW37v0bDyuOzGP3XnmFuA,0.850973
1,Tearin' up My Heart - Radio Edit,*NSYNC,594M0rqYMOo8BhMGEdoi5C,0.765643
2,As Long as You Love Me,Backstreet Boys,00WvmRXTkPBZNhhRK3xfdy,0.754261
3,Don't Go Breaking My Heart,Backstreet Boys,79Mjfhh393dZdAsTvUFDR6,0.718922
4,Every Six Seconds,O-Town,2Gm0dh8gnGqelvChn2x4KK,0.717603
5,Everybody (Backstreet's Back) - Radio Edit,Backstreet Boys,4rTeOSYqwXNz5qPR2DUTFZ,0.710134
6,I Want You Back - Radio Edit,*NSYNC,5YTMRAT4yKgFrepF8Hi3mY,0.705414
7,Everybody (Backstreet's Back) - Extended Version,Backstreet Boys,5WTxbyWTpoqhdxEN2szOnl,0.701811
8,"Always Know Where You Are - From ""Treasure Pla...",BBMak,02k3lrWVf8XHIHmBojESIn,0.699394
9,True To Your Heart,98º,7dPS5GUShBVTcAhM1nLRF9,0.698136


In [241]:
get_recs('66ezChUOWvgzRQAi3Ay1wt', 80).iloc[50:, :]

Unnamed: 0,Song Name,Artist,Song ID,Similarity
50,40 Oz.,D12,3SNIM6IPegio1UqXCYAhoc,0.682224
51,Fantastic Voyage,Coolio,3QlTzofanSqDWywxEzGGE2,0.68038
52,We All Die One Day,Obie Trice,5rurZZeggozpAZIHbI55cm,0.679931
53,I’m On Everything,Bad Meets Evil,3qBxoudRLiLbFl7Ansz1Uh,0.679598
54,California Love - Original Version,2Pac,1JClFT74TYSXlzpagbmj0S,0.664299
55,Real Muthaphuckkin' G's,Eazy-E,53BZ6XygAoubR5DU5w38Vq,0.662093
56,So Good (& Metro Boomin),Big Sean,0fWBFDRTIuk8ZgNdZqhCer,0.654352
57,Go Legend (& Metro Boomin),Big Sean,3wAI7MIQtTgwcRmwchPZBc,0.649675
58,Who Am I (What's My Name)?,Snoop Dogg,5XhkV07Vou38wnrzwURUOC,0.647078
59,The Setup,Obie Trice,6uzesxG4dlBg3faZmzL6Qs,0.641862


### Must be Different Artist

In [65]:
def get_recs_da(song_id, k=20, song_db=main_song_list, cos_sim_mat=cos_sim_mat):
    try:
        top_songs_feat, top_songs_feat_sim, artist_songs = feat_sim_da(song_id, k)
        recs = pd.DataFrame([[song_id_name[svd_song_idr[x]] for x in top_songs_feat],
                                 [song_id_artist[svd_song_idr[x]] for x in top_songs_feat],
                                 [svd_song_idr[x] for x in top_songs_feat],
                                 list(top_songs_feat_sim)],index=['Song Name', 'Artist', 'Song ID', 'Similarity']).T
        recs.set_index('Song ID', inplace=True)
        for song in artist_songs:
            if song in recs.index:
                recs.drop(song, inplace=True)
        recs.reset_index(inplace=True)
        return recs.head(k)
    except:
        print('No results available for that id. Please refer to the Song Finder for a list of valid ids.')

In [69]:
get_recs('6SluaPiV04KOaRTOIScoff')

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Be Mine!,Robyn,3FtkFLmplS7GGPFMIQ0dSR,0.978904
1,Dancing On My Own - Radio Edit,Robyn,7g13jf3zqlP5S68Voo5v9m,0.97019
2,Honey - Single Edit,Robyn,1N0rYVSziD8aPL1NRgsWz2,0.964406
3,Hang With Me,Robyn,6rW8q1p2GCjGMRAlnxBeo7,0.964115
4,Honey,Robyn,4ieJSEFwhgIVJK97Rw4NkJ,0.960998
5,Do You Know (What It Takes),Robyn,0idCpkJ2pspfAILbanmERu,0.959374
6,With Every Heartbeat - with Kleerup,Robyn,53SqGkNJAYLss9AgbduTqQ,0.950063
7,Call Your Girlfriend,Robyn,2sCoROOlNQyFpRQEe6A5lv,0.943466
8,Missing U,Robyn,4Sn5B44sLfQ364FUL98jvN,0.93542
9,Talking Body,Tove Lo,2tpfxAXiI52znho4WE3XFA,0.893097


In [64]:
get_recs_da('6SluaPiV04KOaRTOIScoff')

Unnamed: 0,Song ID,Song Name,Artist,Similarity
0,2tpfxAXiI52znho4WE3XFA,Talking Body,Tove Lo,0.893097
1,3tJ4y2Zqx6gM9xOAuFfsSF,Cool Girl,Tove Lo,0.891052
2,0S0rlh59DhpPheILyXYX76,"bitches (feat. Charli XCX, Icona Pop, Elliphan...",Tove Lo,0.888997
3,6flhrkPIIqB0zU8j2T2yyc,shedontknowbutsheknows,Tove Lo,0.887314
4,45CiDBvF0HNtAHXQFfnSzD,romantics,Tove Lo,0.887224
5,14OxJlLdcHNpgsm4DRwDOB,Habits (Stay High),Tove Lo,0.886395
6,1TIiWomS4i0Ikaf9EKdcLn,disco tits,Tove Lo,0.881939
7,62N2JSA0jHmSH7Va9t7hIf,Habits (Stay High) - Hippie Sabotage Remix,Tove Lo,0.880327
8,6s7PleW93OfE3YnujL9yxw,True Disaster,Tove Lo,0.878036
9,0ADG9OgdVTL7fgREP75BrZ,Ain't My Fault,Zara Larsson,0.843959


In [70]:
get_recs('6sbXGUn9V9ZaLwLdOfpKRE', k=10)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,It's Gonna Be Me,*NSYNC,2AW37v0bDyuOzGP3XnmFuA,0.850973
1,Tearin' up My Heart - Radio Edit,*NSYNC,594M0rqYMOo8BhMGEdoi5C,0.765643
2,As Long as You Love Me,Backstreet Boys,00WvmRXTkPBZNhhRK3xfdy,0.754261
3,Don't Go Breaking My Heart,Backstreet Boys,79Mjfhh393dZdAsTvUFDR6,0.718922
4,Every Six Seconds,O-Town,2Gm0dh8gnGqelvChn2x4KK,0.717603
5,Everybody (Backstreet's Back) - Radio Edit,Backstreet Boys,4rTeOSYqwXNz5qPR2DUTFZ,0.710134
6,I Want You Back - Radio Edit,*NSYNC,5YTMRAT4yKgFrepF8Hi3mY,0.705414
7,Everybody (Backstreet's Back) - Extended Version,Backstreet Boys,5WTxbyWTpoqhdxEN2szOnl,0.701811
8,"Always Know Where You Are - From ""Treasure Pla...",BBMak,02k3lrWVf8XHIHmBojESIn,0.699394
9,True To Your Heart,98º,7dPS5GUShBVTcAhM1nLRF9,0.698136


## Song Finder - An Easy Lookup Tool to Find Songs for Recommendations

In [None]:
def song_finder(query, type='artist'):
    '''
    Retrieves song name and song id for a requested recording currently indexed
    '''
    