In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import operator
import pickle 
import json

In [2]:
track_master_df = pd.read_csv('track_master_df.csv')
track_master_df.shape

(9650, 17)

In [3]:
test_tracks = pd.read_csv('test_case.csv')

In [4]:
drop_cols = ['artist', 'album', 'track', 'track_id']

X_train = track_master_df.drop(columns = drop_cols)

X_test = test_tracks.drop(columns = drop_cols)

In [9]:
knn = NearestNeighbors(n_neighbors=5, algorithm='kd_tree').fit(X_train)

In [12]:
distances, indices = knn.kneighbors(X_test)

In [None]:
# Its important to use binary mode 
knn_pickle = open('knnpickled', 'wb') 

# source, destination 
pickle.dump(knn, knn_pickle)                      

In [5]:
# load the model
loaded_model = pickle.load(open('knnpickled', 'rb'))

distances, indices = loaded_model.kneighbors(X_test) 

In [6]:
indices

array([[5693, 8126, 7977, 5042, 6045],
       [8705, 3553, 2901, 6352, 6515],
       [8897, 4800, 4039, 8046, 8544],
       [4902, 4064, 1805, 3584, 9416],
       [ 282, 9078, 3728, 6810, 1877],
       [1541, 6254, 9505, 7190, 1536],
       [1021, 2645, 3074, 3678, 2393],
       [1752, 7300, 6257, 5395, 5807],
       [4811, 4285, 1940, 6662, 4304],
       [1808, 4188, 1044, 6815, 6972],
       [3293, 4055, 7476,  875, 4383],
       [5472, 8062, 8429, 4018, 6250],
       [9250, 5585,  413, 3967, 9476]])

In [7]:
distances

array([[39.5792277 , 45.51040963, 46.61482086, 50.45865301, 52.36851431],
       [ 0.        , 22.55081201, 43.19630838, 45.32310325, 45.65762443],
       [12.57199594, 22.1328905 , 43.00935584, 43.32750337, 50.41571789],
       [19.36216389, 25.34454699, 35.55630885, 53.58995679, 63.32647685],
       [ 0.        , 20.82312796, 30.40603846, 32.37235047, 34.89708647],
       [ 0.        , 15.93425035, 40.36314622, 43.05843212, 48.54124602],
       [ 0.        , 16.61446361, 31.37678678, 60.79655261, 67.57502377],
       [ 0.        , 64.3038712 , 64.66443145, 68.07058708, 75.62057531],
       [15.09146261, 26.1371749 , 32.46923998, 32.98035392, 35.49199129],
       [ 0.        ,  6.49446415, 13.30877887, 39.22663807, 40.21411076],
       [40.12189874, 40.23662806, 42.12032638, 45.53288   , 45.79620138],
       [30.03719626, 33.0794943 , 33.74515013, 34.45749801, 43.73032492],
       [ 3.08967913, 31.10158097, 34.58640959, 34.65045427, 46.6986893 ]])

In [6]:
recommend_indices = []
for ii, dists in enumerate(distances):
    for jj, val in enumerate(dists):
        if (val > 0) & (val < 50):
            recommend_indices.append((indices[ii][jj], int(round(val))))

recommend_indices = sorted(recommend_indices, key = operator.itemgetter(1))

In [7]:
ind, val = zip(*recommend_indices) 

In [22]:
recommendations = track_master_df.iloc[list(ind[:10])][drop_cols]

recommendations

Unnamed: 0,artist,album,track,track_id
9250,Jaira Burns,Ugly,Ugly,28RluRGmXQFTG2oSgFtR5u
4188,The Shadowboxers,Build The Beat,Build The Beat,1za5fakUqxQMpaFvBT8UuO
8897,Halsey,hopeless fountain kingdom (Deluxe),Eyes Closed,2WQn7Yvs728KZmmY6tgWqH
1044,Unwritten Law,Funk Metal Favorites,Seein' Red,01b3XYqB8CJsOprmOgaXjh
4811,Felipe Muñiz,Deje de Amar,Deje de Amar,4SB87vYiWYWFg9ekXay2Fo
6254,Common,Black America Again,Unfamiliar,3uOvWrcYKPyaNHV7wzHGXL
2645,The Go-Go's,Return To The Valley Of The Go-Go's,This Town,0Fx2Tn4jT0c2Z77R9JM7sJ
4902,Fools Garden,Dish Of The Day,Lemon Tree,4fXGWiVhlOLdhwRDP6pIFG
9078,The Detroit Emeralds,Greatest Hits,Baby Let Me Take You (In My Arms),4CrPVZPXRTGl5EhsKjGL6L
4800,Mackenzie Thoms,Shout,Shout,7cV11UE69pmujhVIzpHJA5


In [23]:
rec_json = recommendations.to_json(orient = 'table', index = False, force_ascii = False)
rec_json

'{"schema": {"fields":[{"name":"artist","type":"string"},{"name":"album","type":"string"},{"name":"track","type":"string"},{"name":"track_id","type":"string"}],"pandas_version":"0.20.0"}, "data": [{"artist":"Jaira Burns","album":"Ugly","track":"Ugly","track_id":"28RluRGmXQFTG2oSgFtR5u"},{"artist":"The Shadowboxers","album":"Build The Beat","track":"Build The Beat","track_id":"1za5fakUqxQMpaFvBT8UuO"},{"artist":"Halsey","album":"hopeless fountain kingdom (Deluxe)","track":"Eyes Closed","track_id":"2WQn7Yvs728KZmmY6tgWqH"},{"artist":"Unwritten Law","album":"Funk Metal Favorites","track":"Seein\' Red","track_id":"01b3XYqB8CJsOprmOgaXjh"},{"artist":"Felipe Muñiz","album":"Deje de Amar","track":"Deje de Amar","track_id":"4SB87vYiWYWFg9ekXay2Fo"},{"artist":"Common","album":"Black America Again","track":"Unfamiliar","track_id":"3uOvWrcYKPyaNHV7wzHGXL"},{"artist":"The Go-Go\'s","album":"Return To The Valley Of The Go-Go\'s","track":"This Town","track_id":"0Fx2Tn4jT0c2Z77R9JM7sJ"},{"artist":"Fo

In [24]:
rec_file = open('recommendations.json', 'w')
rec_file.write(rec_json)
rec_file.close()