In [34]:
import pandas as pd
import numpy as np
import pickle
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [27]:
ratings = pd.read_csv('../data/ratings_cleaned.csv')

In [28]:
ratings.head()

Unnamed: 0,boardgame_id,user_name,ratings,comments,user_id
0,230802,HelloLemur,7.0,,11811
1,178900,Chris569x,6.5,,7267
2,70323,RyokoLainDW,7.0,,14627
3,10547,jimjimbo,5.0,,7726
4,14996,Kuato_NL,8.0,,13089


## Create sparse User-Item-Matrix

In [29]:
user_item = csr_matrix((ratings['ratings'], (ratings['user_id'], ratings['boardgame_id'])))

In [30]:
#shape = (number of users, max boardgame_id)
user_item.shape

(19235, 266193)

## Create Model

In [31]:
model = NearestNeighbors(metric='cosine')

In [32]:
model.fit(user_item)

NearestNeighbors(metric='cosine')

In [36]:
with open('../models/small_model.pickle', 'wb') as file:
    pickle.dump(model, file)

## Create User Vectors

### a. of an existing user

In [38]:
user = ratings[ratings['user_name']=='shadowjump']
user

Unnamed: 0,boardgame_id,user_name,ratings,comments,user_id
1813,84876,shadowjump,9.0,,5
14510,30549,shadowjump,6.8,Storage Box R,5
17573,478,shadowjump,7.0,Keep Sell Trade,5
24332,98778,shadowjump,5.0,,5
28562,120677,shadowjump,8.0,From my Secret Santa 2013!,5
38190,129622,shadowjump,8.0,,5
39735,178900,shadowjump,6.0,Sell or Trade,5


In [62]:
user_boardgames = user['boardgame_id']

In [39]:
vector_length = ratings['boardgame_id'].max()
vector = np.repeat(0, vector_length+1)

In [40]:
vector[user['boardgame_id']] = user['ratings']

In [43]:
vector.shape

(266193,)

## Find Neighbors

In [44]:
distances, neighbor_ids = model.kneighbors([vector], n_neighbors=20)

In [45]:
neighbor_ids

array([[    5,   361,   500,  2670,  1490,   385,   332,  4079,  1462,
         2597,  7142, 17908, 18437, 19078, 10718, 11135,  5087,  6731,
        13134, 14103]])

In [46]:
distances

array([[0.0007876 , 0.19869388, 0.23101355, 0.24395008, 0.29522088,
        0.30941251, 0.31509027, 0.31894285, 0.32605511, 0.35650602,
        0.36089852, 0.36091802, 0.36091802, 0.36112715, 0.36115737,
        0.36135307, 0.36149514, 0.36175912, 0.36200137, 0.36200137]])

In [60]:
ratings[ratings['user_id']==5]

Unnamed: 0,boardgame_id,user_name,ratings,comments,user_id
1813,84876,shadowjump,9.0,,5
14510,30549,shadowjump,6.8,Storage Box R,5
17573,478,shadowjump,7.0,Keep Sell Trade,5
24332,98778,shadowjump,5.0,,5
28562,120677,shadowjump,8.0,From my Secret Santa 2013!,5
38190,129622,shadowjump,8.0,,5
39735,178900,shadowjump,6.0,Sell or Trade,5


In [54]:
neighbor_filter = ratings['user_id'].isin(neighbor_ids[0][1:])
ratings[neighbor_filter]

Unnamed: 0,boardgame_id,user_name,ratings,comments,user_id
555,84876,Ploedminka,8.5,,11135
965,120677,dcpremix,9.5,,5087
2396,30549,galchri,7.0,A modern classic - though it can get a bit pro...,2597
3288,84876,Swindler,8.0,,1462
3957,178900,galchri,7.0,,2597
5003,129622,arhkit,6.0,,6731
5651,110327,BimmyJim,3.0,,385
5669,84876,Mwroskam,9.2,Classic Euro mechanics with a nice dice driven...,361
7376,129622,mesaverde,8.0,,19078
7833,84876,dcpremix,9.8,,5087


In [68]:
neighbor_taste = ratings[neighbor_filter].groupby('boardgame_id').mean()['ratings'].sort_values(ascending=False)
neighbor_taste

boardgame_id
120677    8.357143
84876     8.323529
98778     7.500000
478       7.416667
178900    7.375000
129622    7.238462
30549     7.233333
110327    3.000000
Name: ratings, dtype: float64

In [70]:
played_filter = ~neighbor_taste.index.isin(user_boardgames)
played_filter

array([False, False, False, False, False, False, False,  True])

In [72]:
neighbor_taste[played_filter].index

Int64Index([110327], dtype='int64', name='boardgame_id')

In [74]:
boardgames = pd.read_csv('../data/boardgames.csv', index_col='id')

In [77]:
boardgames.loc[neighbor_taste[played_filter].index]['name'].tolist()

['Lords of Waterdeep']