In [67]:
from sklearn.neighbors import NearestNeighbors as NN
import numpy as np
import pandas as pd
import glob, os, random

In [68]:
df_track_genre_pairs = pd.read_csv('ids_genre_df.csv')
df_genre_keys = pd.read_csv('../fma_metadata/genres.csv')

In [69]:
def embeddings_and_labels(embedding_dir):
    avg_song_embeddings = []
    avg_song_labels = []
    song_genres = []
    songs = glob.glob(embedding_dir + '*')
    random.shuffle(songs)
    for song_path in songs:
        try:
            base_id = os.path.basename(song_path).replace('.npy', '.wav')
            rm_lead_zeros_id = (base_id.replace('.wav', '')).lstrip('0')
            g_id = df_track_genre_pairs.loc[df_track_genre_pairs['track_id'] == int(rm_lead_zeros_id), 'genre_id'].values
            g_id = g_id[0]
            genre_name = df_genre_keys.loc[df_genre_keys['genre_id'] == g_id, 'title'].values
            genre_name = genre_name[0]
            song_genres.append(genre_name)

            song_emb = np.load(song_path)
            avg_song_embeddings.append(song_emb)
            avg_song_labels.append(base_id)
        except:
            pass #print('Average embedding not created yet for song:', song_path)

    avg_song_embeddings = np.array(avg_song_embeddings)
    avg_song_labels = np.array(avg_song_labels)
    song_genres = np.array(song_genres)
    
    return avg_song_embeddings, avg_song_labels, song_genres

In [70]:
def neighbor_inds_sims(avg_song_embeddings):
    nbrs = NN(n_neighbors=6, metric='cosine').fit(avg_song_embeddings)
    distances, inds = nbrs.kneighbors(avg_song_embeddings)
    cos_similarities = 1 - distances
    
    return inds, cos_similarities

In [71]:
def gen_neighbor_data(inds, cos_similarities, avg_song_labels, song_genres):
    song_idx = inds[:, 0]
    song_neighbors = avg_song_labels[song_idx]
    song_neighbors = np.column_stack((song_neighbors, song_genres))

    for i in range(1,6):
        neighbor_idx = inds[:, i]
        neighbor_labels = avg_song_labels[neighbor_idx]
        neighbor_genres = []
        for lab in neighbor_labels:
            non_zero_lab = (lab.replace('.wav','')).lstrip('0')
            g_id = df_track_genre_pairs.loc[df_track_genre_pairs['track_id'] == int(non_zero_lab), 
                                            'genre_id'].values
            g_id = g_id[0]
            genre_name = df_genre_keys.loc[df_genre_keys['genre_id'] == g_id, 'title'].values
            genre_name = genre_name[0]
            neighbor_genres.append(genre_name)

        neighbor_genres = np.array(neighbor_genres)
        song_neighbors = np.column_stack((song_neighbors, neighbor_labels, 
                                          neighbor_genres, cos_similarities[:, i]))
        
    return song_neighbors
        

## Song only D-Vector Model Embeddings

In [72]:
avg_song_embeddings, avg_song_labels, song_genres = embeddings_and_labels('avg_song_embeddings/')
inds, cos_similarities = neighbor_inds_sims(avg_song_embeddings)
song_neighbors = gen_neighbor_data(inds, cos_similarities, avg_song_labels, song_genres)

In [73]:
df = pd.DataFrame(
	song_neighbors, 
	columns=[
		'song', 'song_genre', 
		'1st_Neighbor', '1st_Neighbor_Genre', '1st_Neighbor_Cos_Similarity',
		'2nd_Neighbor', '2nd_Neighbor_Genre', '2nd_Neighbor_Cos_Similarity',
		'3rd_Neighbor', '3rd_Neighbor_Genre', '3rd_Neighbor_Cos_Similarity',
		'4th_Neighbor', '4th_Neighbor_Genre', '4th_Neighbor_Cos_Similarity',
		'5th_Neighbor', '5th_Neighbor_Genre', '5th_Neighbor_Cos_Similarity'
	])
df.to_csv('song_neighbors.csv')
df.head(10)

Unnamed: 0,song,song_genre,1st_Neighbor,1st_Neighbor_Genre,1st_Neighbor_Cos_Similarity,2nd_Neighbor,2nd_Neighbor_Genre,2nd_Neighbor_Cos_Similarity,3rd_Neighbor,3rd_Neighbor_Genre,3rd_Neighbor_Cos_Similarity,4th_Neighbor,4th_Neighbor_Genre,4th_Neighbor_Cos_Similarity,5th_Neighbor,5th_Neighbor_Genre,5th_Neighbor_Cos_Similarity
0,074560.wav,Electronic,027303.wav,Hip-Hop,0.8399957083035571,109670.wav,Hip-Hop,0.8306614229458059,049478.wav,Hip-Hop,0.8232258129862868,063413.wav,Electronic,0.8222695005396332,071153.wav,Hip-Hop,0.8178388265706124
1,080077.wav,Electronic,133916.wav,Noise,0.9455240364551728,108228.wav,Electronic,0.9183652376414102,027752.wav,Electronic,0.8886250484043219,082218.wav,Breakbeat,0.8863923111435349,107102.wav,Punk,0.8830951692854869
2,107809.wav,Hip-Hop,105942.wav,Psych-Rock,0.9099107371478636,094463.wav,Hip-Hop,0.9098850776659532,056053.wav,Rock,0.899347088636925,022336.wav,Electro-Punk,0.8863142482182272,030735.wav,Chip Music,0.8861146873928989
3,093867.wav,Psych-Rock,010150.wav,Rock,0.9107995811488028,055925.wav,Punk,0.886812871736188,029622.wav,Rock,0.8637684227680692,049443.wav,Post-Punk,0.8570968768899888,113301.wav,Punk,0.8533164134517299
4,137422.wav,Indie-Rock,124458.wav,Metal,0.9076068904658212,051557.wav,Garage,0.9038381554356364,011446.wav,Electro-Punk,0.8977147325530669,141074.wav,Soundtrack,0.8924288210826703,060353.wav,Rock,0.8782187276677044
5,023155.wav,International,147950.wav,Indie-Rock,0.8678043850462239,042732.wav,Ambient Electronic,0.8531346376047991,101104.wav,Rock,0.8521532588991584,046525.wav,Ambient,0.8520847896875234,021957.wav,Ambient Electronic,0.8510651079582658
6,105094.wav,Punk,032685.wav,Rock,0.9633909318834682,105099.wav,Punk,0.9360359350475096,083871.wav,Rock,0.9028556472234718,032678.wav,Rock,0.8940284284001427,032832.wav,Rock,0.890070145947832
7,141990.wav,Electronic,089623.wav,Hip-Hop,0.8854343336738421,097680.wav,Electronic,0.8719400937105307,085698.wav,Shoegaze,0.8713479863771798,020226.wav,Indie-Rock,0.8704841430923894,033541.wav,Rock,0.8600344670397535
8,122958.wav,Electronic,004742.wav,Americana,0.9406045543901438,015139.wav,Jazz,0.940572653451722,118058.wav,Soundtrack,0.93345510684979,144472.wav,Singer-Songwriter,0.9277636546537994,007393.wav,Audio Collage,0.9268166159076874
9,048598.wav,Chip Music,013649.wav,Electronic,0.8935263946486771,054964.wav,Electronic,0.8880702483503446,012189.wav,Trip-Hop,0.8864454128444205,024173.wav,Electronic,0.8793757771963793,129583.wav,Noise,0.8730687575837159


### Nearest Songs (often mislabled duplicate songs)

In [74]:
df.sort_values(['1st_Neighbor_Cos_Similarity'], ascending=[False]).head(10)

Unnamed: 0,song,song_genre,1st_Neighbor,1st_Neighbor_Genre,1st_Neighbor_Cos_Similarity,2nd_Neighbor,2nd_Neighbor_Genre,2nd_Neighbor_Cos_Similarity,3rd_Neighbor,3rd_Neighbor_Genre,3rd_Neighbor_Cos_Similarity,4th_Neighbor,4th_Neighbor_Genre,4th_Neighbor_Cos_Similarity,5th_Neighbor,5th_Neighbor_Genre,5th_Neighbor_Cos_Similarity
3567,087370.wav,Ambient Electronic,094411.wav,Ambient Electronic,0.9990761021336232,111789.wav,Ambient,0.8639901611648774,127335.wav,Post-Rock,0.8604352446926153,016554.wav,Ambient Electronic,0.8551518029313454,098237.wav,Ambient,0.852532587542077
610,094411.wav,Ambient Electronic,087370.wav,Ambient Electronic,0.9990761021336232,111789.wav,Ambient,0.865732572120132,127335.wav,Post-Rock,0.8654531110745742,098237.wav,Ambient,0.8590843658746372,051151.wav,Ambient Electronic,0.8534860000113695
2078,006719.wav,Metal,006735.wav,Metal,0.9916480828735582,113301.wav,Punk,0.9206122721115028,106338.wav,Garage,0.906084377066822,006718.wav,Metal,0.9043243190153626,121863.wav,Rock,0.9005740700539997
827,006735.wav,Metal,006719.wav,Metal,0.9916480828735582,113301.wav,Punk,0.9348529928267216,106338.wav,Garage,0.923717748155032,121863.wav,Rock,0.9166331776245418,029622.wav,Rock,0.8811661866955129
2743,136404.wav,Punk,136402.wav,Punk,0.9869960417399248,136403.wav,Punk,0.9697697774688024,111703.wav,Garage,0.9306034362496622,066459.wav,Rock,0.927527182870047,068902.wav,Rock,0.9250591424248532
1375,136402.wav,Punk,136404.wav,Punk,0.9869960417399248,136403.wav,Punk,0.9770344435677508,066459.wav,Rock,0.93114406918929,111703.wav,Garage,0.9251195570754738,087155.wav,Metal,0.9221867997981318
3950,060191.wav,Spoken,054079.wav,Interview,0.9863387139532332,004848.wav,Middle East,0.967428608755084,024514.wav,Electroacoustic,0.966320021355192,125813.wav,Soundtrack,0.9661804409475842,049663.wav,Contemporary Classical,0.9631111880910764
887,054079.wav,Interview,060191.wav,Spoken,0.9863387139532332,109373.wav,Radio Art,0.977203182077166,024514.wav,Electroacoustic,0.976595372072724,053675.wav,Experimental,0.9584684076006686,136714.wav,Electronic,0.9574700262893504
3288,066538.wav,Lo-Fi,066535.wav,Lo-Fi,0.9859254187147424,066534.wav,Lo-Fi,0.9739566186298646,128261.wav,Punk,0.942112043822998,050651.wav,Loud-Rock,0.9369135533972686,093397.wav,Lo-Fi,0.9150344899928556
230,066535.wav,Lo-Fi,066538.wav,Lo-Fi,0.9859254187147424,066534.wav,Lo-Fi,0.9854020810039054,128261.wav,Punk,0.9194633074635156,050651.wav,Loud-Rock,0.9180315791352348,093397.wav,Lo-Fi,0.9147173860423308


### Nearest Songs in the Classical Genre 
Classical songs generally have the best sound quality in the dataset. 

In [75]:
df.loc[df['song_genre'] == 'Classical'].sort_values(['1st_Neighbor_Cos_Similarity'], ascending=[False]).head(10)

Unnamed: 0,song,song_genre,1st_Neighbor,1st_Neighbor_Genre,1st_Neighbor_Cos_Similarity,2nd_Neighbor,2nd_Neighbor_Genre,2nd_Neighbor_Cos_Similarity,3rd_Neighbor,3rd_Neighbor_Genre,3rd_Neighbor_Cos_Similarity,4th_Neighbor,4th_Neighbor_Genre,4th_Neighbor_Cos_Similarity,5th_Neighbor,5th_Neighbor_Genre,5th_Neighbor_Cos_Similarity
61,028099.wav,Classical,028102.wav,Classical,0.9858247791862496,059587.wav,Classical,0.9779623947284126,028112.wav,Classical,0.9723486396439872,028100.wav,Classical,0.9680775890891404,065667.wav,Classical,0.9669255378032762
3787,028102.wav,Classical,028099.wav,Classical,0.9858247791862496,059587.wav,Classical,0.9789114936628236,028098.wav,Classical,0.9775456748284386,028100.wav,Classical,0.9650704962355552,053231.wav,Choral Music,0.9594339489893504
1801,065669.wav,Classical,065667.wav,Classical,0.9851470870035628,065680.wav,Classical,0.9745239931394512,028112.wav,Classical,0.9721346860787576,136084.wav,Composed Music,0.9623274280059002,097539.wav,Electronic,0.9602087196682344
805,065667.wav,Classical,065669.wav,Classical,0.9851470870035628,097539.wav,Electronic,0.9834141569704462,065680.wav,Classical,0.9692787210062626,028099.wav,Classical,0.9669255378032762,028112.wav,Classical,0.9642716124551168
2776,143143.wav,Classical,079912.wav,Classical,0.983259257157704,143146.wav,Classical,0.967964866284387,065794.wav,Classical,0.9665974563204006,136089.wav,Composed Music,0.9564329836335408,065667.wav,Classical,0.9539286609622404
1326,079912.wav,Classical,143143.wav,Classical,0.983259257157704,028098.wav,Classical,0.9768734740743514,143146.wav,Classical,0.9755295680896884,031249.wav,Classical,0.965633012545336,065667.wav,Classical,0.9607724736637152
1314,065678.wav,Classical,065653.wav,Classical,0.9799335778317172,136085.wav,Composed Music,0.9685691454314848,026764.wav,Experimental Pop,0.953623612282524,048043.wav,Unclassifiable,0.9519695916207448,067047.wav,Free-Jazz,0.9419238016749923
1128,065653.wav,Classical,065678.wav,Classical,0.9799335778317172,065680.wav,Classical,0.9778242174747804,136089.wav,Composed Music,0.9610134519313214,067047.wav,Free-Jazz,0.9512777674424084,136085.wav,Composed Music,0.9502471538190784
1535,059587.wav,Classical,028102.wav,Classical,0.9789114936628236,028099.wav,Classical,0.9779623947284126,097539.wav,Electronic,0.9747913201435304,065667.wav,Classical,0.9625585092113746,047770.wav,Classical,0.9540847151187054
3406,065680.wav,Classical,065653.wav,Classical,0.9778242174747804,065669.wav,Classical,0.9745239931394512,031233.wav,Classical,0.9700732900074158,065667.wav,Classical,0.9692787210062626,028112.wav,Classical,0.9638018262592024


### Average Neighbor Cosine Similarities for First Five Neighbors

In [76]:
neighs = ['1st', '2nd', '3rd', '4th', '5th']
for n in neighs:
    avg_cos_sim = df[n + '_Neighbor_Cos_Similarity'].values.astype(np.float64)
    avg_cos_sim = np.mean(avg_cos_sim)
    print(n + ' Neighbor ' + str(avg_cos_sim))

1st Neighbor 0.901009294378329
2nd Neighbor 0.8834145169420191
3rd Neighbor 0.8729802312113795
4th Neighbor 0.8653912632765849
5th Neighbor 0.8592024677800237


## Genre D-Vector Model Embeddings

In [77]:
avg_song_embeddings, avg_song_labels, song_genres = embeddings_and_labels('avg_genre_song_embeddings/')
inds, cos_similarities = neighbor_inds_sims(avg_song_embeddings)
song_neighbors = gen_neighbor_data(inds, cos_similarities, avg_song_labels, song_genres)

In [78]:
df = pd.DataFrame(
	song_neighbors, 
	columns=[
		'song', 'song_genre', 
		'1st_Neighbor', '1st_Neighbor_Genre', '1st_Neighbor_Cos_Similarity',
		'2nd_Neighbor', '2nd_Neighbor_Genre', '2nd_Neighbor_Cos_Similarity',
		'3rd_Neighbor', '3rd_Neighbor_Genre', '3rd_Neighbor_Cos_Similarity',
		'4th_Neighbor', '4th_Neighbor_Genre', '4th_Neighbor_Cos_Similarity',
		'5th_Neighbor', '5th_Neighbor_Genre', '5th_Neighbor_Cos_Similarity'
	])
df.to_csv('song_neighbors.csv')
df.head(10)

Unnamed: 0,song,song_genre,1st_Neighbor,1st_Neighbor_Genre,1st_Neighbor_Cos_Similarity,2nd_Neighbor,2nd_Neighbor_Genre,2nd_Neighbor_Cos_Similarity,3rd_Neighbor,3rd_Neighbor_Genre,3rd_Neighbor_Cos_Similarity,4th_Neighbor,4th_Neighbor_Genre,4th_Neighbor_Cos_Similarity,5th_Neighbor,5th_Neighbor_Genre,5th_Neighbor_Cos_Similarity
0,028702.wav,Chip Music,028802.wav,Pop,0.9224079077182212,138353.wav,Punk,0.9210713316932088,014080.wav,Techno,0.9133144954435094,069776.wav,Post-Punk,0.911529778334485,063766.wav,Hip-Hop,0.9074724222398982
1,133804.wav,Punk,089882.wav,Industrial,0.9449773746372232,137952.wav,Indie-Rock,0.9446004065973642,127275.wav,Pop,0.9441956429964352,126667.wav,Rock,0.9400713730448672,125446.wav,Rock,0.9394817127240572
2,047032.wav,Punk,063925.wav,Avant-Garde,0.9695927046723412,099073.wav,Hip-Hop,0.9645865720819636,007487.wav,Hip-Hop,0.9567744802065922,054024.wav,Electronic,0.9563793916995602,035184.wav,Pop,0.956247798817981
3,001183.wav,Rock,014103.wav,Punk,0.9701784067476604,001888.wav,Rock,0.9511499512118806,105111.wav,Rock,0.950230137490681,140506.wav,Rock,0.9456357146364288,138276.wav,Rock,0.9456049493833528
4,092366.wav,Ambient Electronic,035184.wav,Pop,0.9576676614221916,063656.wav,Ambient Electronic,0.9552446899543922,012520.wav,Electronic,0.9545221331229996,130803.wav,Techno,0.954290326498624,111372.wav,Pop,0.9501944495983272
5,108500.wav,Hip-Hop,011834.wav,Hip-Hop,0.9441231564747647,007483.wav,Hip-Hop,0.9369750002500832,107028.wav,Hip-Hop,0.936274251511416,127359.wav,Reggae - Dancehall,0.9347603582256364,108495.wav,Hip-Hop,0.933231379964886
6,054719.wav,Electronic,075420.wav,Rap,0.9629930086329422,043904.wav,Hip-Hop,0.9551616622924768,043844.wav,Hip-Hop,0.9549145958127402,110774.wav,Hip-Hop,0.9507947639819554,086117.wav,Pop,0.9489130539503766
7,001075.wav,Audio Collage,038880.wav,Field Recordings,0.936087260421438,136636.wav,Electroacoustic,0.9331181024331624,147165.wav,Electronic,0.9230048078523208,100544.wav,Electronic,0.9119174846824972,113203.wav,Noise,0.8918296310635155
8,024999.wav,Sound Collage,050368.wav,Jazz,0.940771212161938,035818.wav,Avant-Garde,0.932256416609976,087104.wav,Soundtrack,0.932015133197381,060037.wav,North African,0.9315975567439434,072387.wav,Jazz,0.9284447217282228
9,041454.wav,Chiptune,137432.wav,Electronic,0.927822689934965,140611.wav,Hip-Hop,0.927638141210172,015867.wav,Hip-Hop,0.9210843576247908,068600.wav,Alternative Hip-Hop,0.919223352623636,154512.wav,Electronic,0.9190624928118126


### Nearest Songs (often mislabled duplicate songs)

In [79]:
df.sort_values(['1st_Neighbor_Cos_Similarity'], ascending=[False]).head(10)

Unnamed: 0,song,song_genre,1st_Neighbor,1st_Neighbor_Genre,1st_Neighbor_Cos_Similarity,2nd_Neighbor,2nd_Neighbor_Genre,2nd_Neighbor_Cos_Similarity,3rd_Neighbor,3rd_Neighbor_Genre,3rd_Neighbor_Cos_Similarity,4th_Neighbor,4th_Neighbor_Genre,4th_Neighbor_Cos_Similarity,5th_Neighbor,5th_Neighbor_Genre,5th_Neighbor_Cos_Similarity
2011,094411.wav,Ambient Electronic,087370.wav,Ambient Electronic,0.9981734971378148,016554.wav,Ambient Electronic,0.9425432334242988,076293.wav,Electronic,0.9318145610944668,087350.wav,Electronic,0.9317911030872796,127078.wav,House,0.9291783834912852
698,087370.wav,Ambient Electronic,094411.wav,Ambient Electronic,0.9981734971378148,016554.wav,Ambient Electronic,0.9481731527893172,087350.wav,Electronic,0.9342395653833506,017893.wav,Punk,0.9304032427050152,076293.wav,Electronic,0.9303206798784188
2303,101947.wav,Punk,101949.wav,Punk,0.990771734115224,101948.wav,Punk,0.97934446299306,004793.wav,Rock,0.954477237387291,105081.wav,Punk,0.9476589387858518,020347.wav,Punk,0.9471482156645532
528,101949.wav,Punk,101947.wav,Punk,0.990771734115224,101948.wav,Punk,0.9777623094870852,004793.wav,Rock,0.9449494016804022,020347.wav,Punk,0.9447439155208764,105081.wav,Punk,0.9392267593839698
1951,065669.wav,Classical,065680.wav,Classical,0.9904898935453498,097539.wav,Electronic,0.9692911247759002,065653.wav,Classical,0.964202130950885,028102.wav,Classical,0.963373984620206,031233.wav,Classical,0.96266175619194
183,065680.wav,Classical,065669.wav,Classical,0.9904898935453498,065653.wav,Classical,0.976642038669609,031233.wav,Classical,0.9758458219628754,143142.wav,Classical,0.97293499478572,097539.wav,Electronic,0.9720843830337884
2291,004035.wav,Hardcore,004034.wav,Hardcore,0.9885437481729632,004029.wav,Hardcore,0.9742119579597328,130111.wav,Rock,0.9573161433249368,123429.wav,Punk,0.9452717173275128,051309.wav,Punk,0.9423237624831108
461,004034.wav,Hardcore,004035.wav,Hardcore,0.9885437481729632,130111.wav,Rock,0.969985337249191,004029.wav,Hardcore,0.966667417439346,051309.wav,Punk,0.9523762261472096,123429.wav,Punk,0.9497478108959346
1547,006719.wav,Metal,006735.wav,Metal,0.9862878864380904,113301.wav,Punk,0.9237788693766592,055925.wav,Punk,0.9030907226168003,058225.wav,Lo-Fi,0.8979031825797132,004198.wav,Rock,0.8872925297438027
2516,006735.wav,Metal,006719.wav,Metal,0.9862878864380904,113301.wav,Punk,0.940798899067905,055925.wav,Punk,0.9086105281101556,058225.wav,Lo-Fi,0.9053928584435812,109442.wav,Goth,0.8931926336850454


### Nearest Songs in the Classical Genre 
Classical songs generally have the best sound quality in the dataset. 

In [80]:
df.loc[df['song_genre'] == 'Classical'].sort_values(['1st_Neighbor_Cos_Similarity'], ascending=[False]).head(10)

Unnamed: 0,song,song_genre,1st_Neighbor,1st_Neighbor_Genre,1st_Neighbor_Cos_Similarity,2nd_Neighbor,2nd_Neighbor_Genre,2nd_Neighbor_Cos_Similarity,3rd_Neighbor,3rd_Neighbor_Genre,3rd_Neighbor_Cos_Similarity,4th_Neighbor,4th_Neighbor_Genre,4th_Neighbor_Cos_Similarity,5th_Neighbor,5th_Neighbor_Genre,5th_Neighbor_Cos_Similarity
183,065680.wav,Classical,065669.wav,Classical,0.9904898935453498,065653.wav,Classical,0.976642038669609,031233.wav,Classical,0.9758458219628754,143142.wav,Classical,0.97293499478572,097539.wav,Electronic,0.9720843830337884
1951,065669.wav,Classical,065680.wav,Classical,0.9904898935453498,097539.wav,Electronic,0.9692911247759002,065653.wav,Classical,0.964202130950885,028102.wav,Classical,0.963373984620206,031233.wav,Classical,0.96266175619194
805,143142.wav,Classical,031233.wav,Classical,0.9815362941553746,143139.wav,Classical,0.9765856830806507,065680.wav,Classical,0.97293499478572,065669.wav,Classical,0.9594516122255348,001082.wav,International,0.9532399589563484
1857,031233.wav,Classical,143142.wav,Classical,0.9815362941553746,065680.wav,Classical,0.9758458219628754,143139.wav,Classical,0.9721575508265352,065669.wav,Classical,0.96266175619194,048046.wav,Unclassifiable,0.9540462943019414
2723,065653.wav,Classical,065678.wav,Classical,0.9789060759946108,065680.wav,Classical,0.976642038669609,065669.wav,Classical,0.964202130950885,136089.wav,Composed Music,0.9641259774575498,097539.wav,Electronic,0.9613697578330934
2328,065678.wav,Classical,065653.wav,Classical,0.9789060759946108,136085.wav,Composed Music,0.9707648451204292,067047.wav,Free-Jazz,0.9603134274517016,015629.wav,Classical,0.9601548201145794,011432.wav,Classical,0.9503782180941188
1509,143139.wav,Classical,143142.wav,Classical,0.9765856830806507,031233.wav,Classical,0.9721575508265352,001082.wav,International,0.9539992587984004,065680.wav,Classical,0.9510949877100164,122875.wav,Old-Time / Historic,0.94994825880786
1393,028098.wav,Classical,079912.wav,Classical,0.97505184067403,028102.wav,Classical,0.9722444531702688,059944.wav,Jazz,0.9626635928578108,143143.wav,Classical,0.953417313848119,097539.wav,Electronic,0.9509064529763918
2834,079912.wav,Classical,028098.wav,Classical,0.97505184067403,028102.wav,Classical,0.9739350387191102,059587.wav,Classical,0.9667024877449658,065794.wav,Classical,0.9664656695237098,143143.wav,Classical,0.9646146471098422
674,065794.wav,Classical,124877.wav,Soundtrack,0.974836298612944,143146.wav,Classical,0.9733755149787378,136089.wav,Composed Music,0.970673977369053,143143.wav,Classical,0.9680937825326088,079912.wav,Classical,0.9664656695237098


### Average Neighbor Cosine Similarities for First Five Neighbors

In [81]:
neighs = ['1st', '2nd', '3rd', '4th', '5th']
for n in neighs:
    avg_cos_sim = df[n + '_Neighbor_Cos_Similarity'].values.astype(np.float64)
    avg_cos_sim = np.mean(avg_cos_sim)
    print(n + ' Neighbor ' + str(avg_cos_sim))

1st Neighbor 0.9424774486988764
2nd Neighbor 0.9333961028384342
3rd Neighbor 0.9278806971113569
4th Neighbor 0.9239328104450951
5th Neighbor 0.9209062315770291
