# iv_alt_cosine: Alternative Datasets Used to Calculate Cosine Similarity

In [2]:
with open('../pickle/svd_matrix.pkl', 'rb') as f:
    svd_matrix = pickle.load(f)
svd_df = pd.read_csv('../data/svd_df.csv', index_col='song_id')

In [4]:
with open('../pickle/main_songs_scaled_nmfg.pkl', 'rb') as f:
    main_songs_scaled_nmfg = pickle.load(f)

In [121]:
with open('../pickle/main_songs_scaled_af.pkl', 'rb') as f:
    main_songs_scaled_af = pickle.load(f)

In [4]:
with open('../pickle/main_songs_scaled_svd_ny.pkl', 'rb') as f:
    main_songs_scaled_svd_ny = pickle.load(f)
main_songs_svd_ny = pd.read_csv('../data/main_songs_svdg_ny.csv')

##### SVD on All Features

In [6]:
cos_sim_mat = cosine_similarity(svd_matrix, svd_matrix)
cos_sim_mat.shape

(22891, 22891)

##### NMF on Genres Only (50 total) + All Other Song Features

In [114]:
cos_sim_mat_nmfg = cosine_similarity(main_songs_scaled_nmfg, main_songs_scaled_nmfg)
cos_sim_mat_nmfg.shape

(22891, 22891)

In [122]:
cos_sim_mat_af = cosine_similarity(main_songs_scaled_af, main_songs_scaled_af)
cos_sim_mat_af.shape

(22891, 22891)

##### SVD, Genres Only, Year Kept

In [7]:
cos_sim_mat_g = cosine_similarity(main_songs_scaled_svd_ny, main_songs_scaled_svd_ny)
cos_sim_mat_g.shape

(22891, 22891)

All of the below functions to retreive similarity scores are slight modifications on Douglas Strodtman's repo @ git.generalassemb.ly

#### Searching for Most Similar Songs (Regardless of Artist)

In [10]:
def feat_sim(song_id, k=10, cos_sim_mat=cos_sim_mat_g):
    '''
    Returns array of indices for top k songs with greatest similarity to given song, along with 
    their similarity rating, based on precomputed cosine similarity of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, cos_sim_mat)

    '''
    top_songs_feat = np.argsort(cos_sim_mat[song_cosine_id[song_id]])[-2:-(k+2):-1]
    top_songs_feat_sim = np.sort(cos_sim_mat[song_cosine_id[song_id]])[-2:-(k+2):-1]

    return top_songs_feat, top_songs_feat_sim

#### Searching for Most Similar Songs (Must be Different Artist)

In [11]:
def feat_sim_da(song_id, k=10, song_db=main_song_list, cos_sim_mat=cos_sim_mat_g):
    '''
    Returns array of indices for top k songs with greatest similarity to a given song, but only
    from artists who didn't perform the given song, along with their similarity rating, 
    based on precomputed cosine similarity of feature vectors.

    top_songs_feat, top_songs_feat_sim = get_sim.by_feats(ind_song_id, k, cos_sim_mat)
    '''
    artist_id = song_db.loc[song_id, 'artist_id']
    artist_songs = song_db.index[song_db['artist_id'] == artist_id].drop(song_id)
    top_songs_feat = np.argsort(cos_sim_mat[song_cosine_id[song_id]])[-2:-(k+12):-1]
    top_songs_feat_sim = np.sort(cos_sim_mat[song_cosine_id[song_id]])[-2:-(k+12):-1]
    
    return top_songs_feat, top_songs_feat_sim, artist_songs

#### Retreiving Recommendations in a Dataframe

In [12]:
def get_recs(song_id, k=10, cos_sim_mat=cos_sim_mat_g):
    try:
        top_songs_feat, top_songs_feat_sim = feat_sim(song_id, k, cos_sim_mat)
        return pd.DataFrame([[song_id_name[song_cosine_idr[x]] for x in top_songs_feat],
                         [song_id_artist[song_cosine_idr[x]] for x in top_songs_feat],
                         [song_cosine_idr[x] for x in top_songs_feat],
                         list(top_songs_feat_sim)], 
                        index=['Song Name', 'Artist', 'Song ID', 'Similarity']).T
    except:
        print('No results available for that id. Please refer to the Song Finder for a list of valid ids.')

#### NMF Genres Only

In [115]:
# 
get_recs('6SluaPiV04KOaRTOIScoff', k=10, cos_sim_mat=cos_sim_mat_nmfg)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Be Mine!,Robyn,3FtkFLmplS7GGPFMIQ0dSR,0.807298
1,Do You Know (What It Takes),Robyn,0idCpkJ2pspfAILbanmERu,0.736749
2,Fuck You,Lily Allen,1GCbc1vpkZA2zhjsSFhmHT,0.694513
3,Wannabe,Spice Girls,1Je1IMUlBXcx1Fz0WE7oPT,0.684678
4,2 Become 1,Spice Girls,36AWdhZIGLUTkWpJDhe7va,0.671122
5,Hang With Me,Robyn,6rW8q1p2GCjGMRAlnxBeo7,0.665977
6,Viva Forever,Spice Girls,6BPDPcnbDMDf58srVzbfX9,0.661496
7,Who Do You Think You Are,Spice Girls,1jI1aLmm5HTwiMtvsbwDJw,0.655971
8,Say You'll Be There - シングル・ミックス,Spice Girls,1yTQ39my3MoNROlFw3RDNy,0.621182
9,Dancing On My Own - Radio Edit,Robyn,7g13jf3zqlP5S68Voo5v9m,0.620747


In [116]:
# Guess Who's Back - Scarface
get_recs('27dUo79lgLEy4Rk5w3UvSL', k=10, cos_sim_mat=cos_sim_mat_nmfg)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Young Boy,Clipse,5fuckUyGUsTPJEugfXNsjw,0.733536
1,No Tears,Scarface,6kYTywTCfycHZcH0WdLyRy,0.730979
2,Mr. Me Too,Clipse,6cAldqwmbUwpM8D17FWtPL,0.729955
3,Mary Jane,Scarface,52VpJGxfpghObsAtML7kaN,0.70373
4,I'm Throwed (feat. Jermaine Dupri),Paul Wall,3n5rIcC04MiNx4T2GOS06g,0.696336
5,My Buddy,G-Unit,3iyZRwIGmyXvdZMoxOlur7,0.689718
6,"Akickdoe! - feat. Pimp, Bun B, and Master P",C-Murder,6MGZkm9RcfxTONlFow4Lb6,0.681252
7,Smile,Scarface,2wxO3ZydJTZRi4Bbc02q7i,0.678955
8,On My Block,Scarface,3XcsaghGUCL9WzJz3TlLKo,0.671551
9,Smoke Buddah,Redman,3eY9pixHny3kVRLeqNezQx,0.67098


In [117]:
# Clique - Kanye West
get_recs('3rbNV2GI8Vtd8byhUtXZID', k=10, cos_sim_mat=cos_sim_mat_nmfg)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Willy Wonka (feat. Offset),Macklemore,7uKSil5r7uHexnM9lukPTc,0.766099
1,3005,Childish Gambino,3Z2sglqDj1rDRMF5x0Sz2R,0.692085
2,Ladders,Mac Miller,39NDBdU5Xkm5pCFGa5kZtI,0.652304
3,No Role Modelz,J. Cole,62vpWI1CHwFy7tMIcSStl8,0.635105
4,Ghost Town,Kanye West,6Bg7MznA9X0dIhlAsLyBYj,0.632292
5,Freaks And Geeks,Childish Gambino,7xTY3rJMgj0nh6OOPHgsLw,0.630839
6,By Design,Kid Cudi,5FxVTEpoBress37MmlMhbZ,0.628362
7,Don't Like.1,Kanye West,12D0n7hKpPcjuUpcbAKjjr,0.624216
8,Thrift Shop (feat. Wanz),Macklemore & Ryan Lewis,3AYcyxEACnmE6d96RPubID,0.618682
9,God's Plan,Drake,6DCZcSspjsKoFjzjrWoCdn,0.615009


In [118]:
#Finesse (Remix) - Bruno Mars (feat. Cardi B)
get_recs('3Vo4wInECJQuz9BIBMOu8i', k=10, cos_sim_mat=cos_sim_mat_nmfg)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,That's What I Like - PARTYNEXTDOOR Remix,Bruno Mars,64TkpV7m7f09JY7K436pju,0.806191
1,The Party's Just Begun,The Cheetah Girls,5Au8ObNQvrwFVEgGqQQL6D,0.703548
2,24K Magic,Bruno Mars,6b8Be6ljOzmkOmFslEb23P,0.678122
3,One World - Soundtrack,The Cheetah Girls,7luYEwE4Y9Of5AKbrt0pJ4,0.66753
4,Valerie - Glee Cast Version,Glee Cast,4hBW3h6FnQNh7NRmyxLLG7,0.659563
5,Good Day,DNCE,49X8pWDpmFpAITVUaudkcN,0.651185
6,24/Seven,Big Time Rush,6FkX1akf4mwRZSznL73kDR,0.635122
7,Malibu - Tiësto Remix,Miley Cyrus,7FKcbKrAi4uiXosKMNTjQI,0.634783
8,Run the World (Girls),Beyoncé,1uXbwHHfgsXcUKfSZw5ZJ0,0.634183
9,All In My Head (Flex),Fifth Harmony,7nD9nN3jord9wWcfW3Gkcm,0.633767


#### No Genres

In [123]:
#Show Me Love (Radio Version) - Robyn
get_recs('6SluaPiV04KOaRTOIScoff', k=10, cos_sim_mat=cos_sim_mat_af)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Mrs. Potato Head,Melanie Martinez,3K1tdhoeE6koNnWgAxEK9Y,0.680341
1,You Better Listen,Sublime With Rome,0UjhtgLGvbS8pBgQ4RIihW,0.661197
2,I'm Down,Aaliyah,444Rzzl3T2YOoAN4nQNoMI,0.647382
3,Ain't My Fault,Zara Larsson,0ADG9OgdVTL7fgREP75BrZ,0.641046
4,"Genius - with Sia, Diplo & Labrinth",Sia,73F87Sqh6jQWucOOvz1WFx,0.631186
5,Stripsearch - 2016 Remastered Version,Faith No More,0LErw54mr0L2fQDR8DMLh2,0.627506
6,Centuries,Fall Out Boy,04aAxqtGp5pv12UXAg4pkq,0.622403
7,Ain't My Fault - R3hab Remix,Zara Larsson,3YPlsrk0Voe8AOa1jyN891,0.620697
8,Roar,Katy Perry,6F5c58TMEs1byxUstkzVeM,0.619416
9,The Holy Grail,John Fogerty,6dh3RjahKiBrygKA8we05z,0.611681


In [124]:
#Finesse (Remix) (feat. Cardi B) - Bruno Mars
get_recs('3Vo4wInECJQuz9BIBMOu8i', k=10, cos_sim_mat=cos_sim_mat_af)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Take You There,Sean Kingston,6AdLyvisRKHLAwARaZMGr6,0.838186
1,I Could Be The One,Stacie Orrico,0cBHjx5NwJFAFMSYO7Uak9,0.734053
2,The Party's Just Begun,The Cheetah Girls,5Au8ObNQvrwFVEgGqQQL6D,0.730922
3,Object Of My Desire,Starpoint,63Ql2zSLiBryLRPihaUToK,0.72717
4,Who's In The House,Carman,602otoaP8ITTHNlpAbe1WT,0.726203
5,Todo Lo Que Sube Baja,Olga Tanon,4U0wBFXkq5pnMVAZdNKpRf,0.722698
6,Disco Nights (Rock Freak) (Edit),G.Q.,5Ftiez8SeofWdg8cqqiEmm,0.715116
7,Everybody Everybody - Lelewel Re-Freak Mix,Black Box,4vG2cYhMz6Oq4aEKo3GwV1,0.705337
8,True Fuschnick,FU-Schnickens,3oaWYEUBZ4QXrx4qgGIRwF,0.700244
9,Last Night,Keyshia Cole,3Cn1FSSTXeQ7hCfrbizrD0,0.697212


In [125]:
# Clique - Kanye West
get_recs('3rbNV2GI8Vtd8byhUtXZID', k=10, cos_sim_mat=cos_sim_mat_af)

Unnamed: 0,Song Name,Artist,Song ID,Similarity
0,Master Thesis,Canibus,1xDJGJLS3DwcxRyacQXNbd,0.752429
1,Willy Wonka (feat. Offset),Macklemore,7uKSil5r7uHexnM9lukPTc,0.749893
2,Get Me Home,Foxy Brown,6Xo9osN1HErsEJoqwj4eDg,0.748324
3,Ali Bomaye,The Game,0LFcFGLYTX6UmeIdO7NVd7,0.746301
4,100 Bars,Canibus,4FBzlDdNlnQH03zCwNv3GZ,0.740567
5,Hate Me Now,Nas,158DIbrVt4YbqNnWyRCS3P,0.710441
6,8 Iz Enuff,Big L,5LZ4d69L4TK8hVyh2q7YEJ,0.702978
7,Radio,Keith Murray,7sq0xuxswwUuHgRFEe3vjB,0.665399
8,My Shit Bang,E-40,7eaEXFYxeRs0TMXFyayhdo,0.663826
9,Desperados,Canibus,7xkSrWq4k87x1tYkaTkqMS,0.66093
