In [1]:
import pandas as pd
import numpy as np

In [2]:
def initial_matix(n_users=10000, n_restaurants=1000, sparsity=0.9):
    rating_matrix = np.random.choice([1,3,4,5],size=(n_users,n_restaurants))
    zero_indices = np.random.choice(rating_matrix.shape[1]*rating_matrix.shape[0], replace=False, size=int(rating_matrix.shape[1]*rating_matrix.shape[0]*sparsity))
    rating_matrix[np.unravel_index(zero_indices, rating_matrix.shape)] = 0 
    return rating_matrix

In [3]:
from surprise import Dataset, Reader, KNNWithMeans
import pandas as pd

In [4]:
rating_matrix = pd.DataFrame(initial_matix(n_users=1000,n_restaurants=100))
rating_matrix['index'] = list(range(rating_matrix.shape[0]))
ratings = rating_matrix.melt(id_vars='index',var_name='restaurant_id',value_name='rating')
ratings.rename(columns = {'index':'user_id'}, inplace=True)
ratings = ratings[ratings['rating']!=0]
ratings.sort_values('user_id')

Unnamed: 0,user_id,restaurant_id,rating
49000,0,49,4
5000,0,5,3
65000,0,65,3
12000,0,12,5
14000,0,14,1
...,...,...,...
13999,999,13,3
28999,999,28,1
85999,999,85,4
71999,999,71,4


In [21]:
idx = (ratings['user_id'] == 999) & (ratings['restaurant_id'] == 71)

In [26]:
ratings.loc[idx,'rating'] = 3

In [39]:
ratings.loc[idx]['rating'].values[0]

3

In [5]:
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(ratings, reader)

In [6]:
sim_parameters = {'name': 'cosine', 'user_based': True, 'min_support': 5}
algo = KNNWithMeans(sim_options=sim_parameters, k=10)

In [7]:
trainset = data.build_full_trainset()
algo.fit(trainset)
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [8]:
from collections import defaultdict
def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.
    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.
    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n(predictions, n=10)

In [12]:
from itertools import islice

def take(n, iterable):
    "Return first n items of the iterable as a list"
    return list(islice(iterable, n))

for uid, user_ratings in take(10,top_n.items()):
    print(uid, [iid for (iid, _) in user_ratings])

11 [41, 42, 74, 36, 76, 23, 84, 97, 1, 3]
32 [1, 2, 3, 4, 5, 6, 8, 9, 10, 11]
33 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
57 [4, 23, 27, 44, 52, 67, 80, 81, 53, 62]
65 [79, 20, 37, 62, 1, 39, 64, 73, 22, 19]
66 [14, 16, 63, 73, 75, 86, 93, 95, 35, 50]
89 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
95 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
96 [1, 30, 31, 64, 66, 73, 42, 69, 79, 29]
103 [58, 93, 96, 11, 65, 35, 30, 57, 91, 51]


In [11]:
top_n[0]

[(3, 5),
 (19, 5),
 (24, 5),
 (58, 5),
 (73, 4.95747334577953),
 (30, 4.900953957598404),
 (93, 4.825174825174825),
 (48, 4.5174825174825175),
 (95, 4.440559440559441),
 (80, 4.247064909273087)]