In [55]:
from collections import defaultdict

from surprise import KNNBasic, Reader, evaluate
from surprise import Dataset


def get_top_n(predictions, n=5):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

items_stream = open('u.item', 'r')
item_data = items_stream.read().split('\n')
item_data = list(map(lambda item: item.split('|')[:2], item_data))
items_stream.close()

# database = pd.read_csv('ml-100k/u1.base.csv')
# user_set = set(database.user_id)
# item_set = set(database.item_id)
# not_watch = {user: item_set.difference(database.query('user_id == %s' %(user)).item_id) for user in user_set}

# path to dataset folder
files_dir = os.path.expanduser('ml-100k/')

# This time, we'll use the built-in reader.
reader = Reader('ml-100k')

# folds_files is a list of tuples containing file paths:
# [(u1.base, u1.test), (u2.base, u2.test), ... (u5.base, u5.test)]
train_file = files_dir + 'u%d.base'
test_file = files_dir + 'u%d.test'
folds_files = [(train_file % i, test_file % i) for i in [1]]

data = Dataset.load_from_folds(folds_files, reader=reader)
pkf = PredefinedKFold()

sim_options = {
    'name': 'cosine',
    'user_based': True  # compute  similarities between users
}

algo_knn = KNNBasic(sim_options=sim_options, k=4, min_k=2)

for trainset, testset in pkf.split(data):

    # train and test algorithm.
    algo_knn.fit(trainset)
    predictions_knn = algo_knn.test(testset)
    rmse_knn = accuracy.rmse(predictions_knn)


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 76620: invalid continuation byte

In [52]:
predicoes = {}
for i in range(1, 932):
    user_pred = []
    for filme in mapa_de_filmes:
        p = algo.predict(str(i), str(filme))
        user_pred.append((p.iid, p.est))
    predicoes[str(i)] = sorted(user_pred, key=lambda x: x[1], reverse=True)

In [53]:
predicoes['5']

[('814', 5),
 ('1122', 5),
 ('1189', 5),
 ('1201', 5),
 ('1293', 5),
 ('1467', 5),
 ('1500', 5),
 ('1536', 5),
 ('1599', 5),
 ('1653', 5),
 ('1656', 5),
 ('1463', 4.840989399293286),
 ('100', 4.727438747687303),
 ('1449', 4.679144775768372),
 ('172', 4.639711767451531),
 ('134', 4.636141618474983),
 ('12', 4.608342244204311),
 ('169', 4.597291821335583),
 ('483', 4.57420678030629),
 ('114', 4.56948126711881),
 ('119', 4.56709782383533),
 ('174', 4.5661921617234755),
 ('313', 4.541342502497537),
 ('1191', 4.538461538461538),
 ('1594', 4.53757225433526),
 ('408', 4.5359593738204085),
 ('1233', 4.521472392638036),
 ('285', 4.509121185571423),
 ('513', 4.503348590066939),
 ('64', 4.499596435290144),
 ('1398', 4.458451350380979),
 ('318', 4.451426434253312),
 ('1642', 4.449275362318841),
 ('178', 4.44315425750136),
 ('511', 4.442408055038739),
 ('50', 4.431152369239253),
 ('1592', 4.4272603730177735),
 ('480', 4.403258149005204),
 ('251', 4.401467972687147),
 ('1612', 4.387795931977325),
 (

In [3]:
def get_k_nearest_neighbors(userId):
    # Retrieve inner id of the user
    inner_id = algo.trainset.to_inner_uid(userId)

    # Retrieve inner ids of the nearest neighbors of the requested user
    user_neighbors = algo.get_neighbors(inner_id, k=3)

    # Convert inner ids to raw ids
    neighbors = (algo.trainset.to_raw_uid(inner_id)
                           for inner_id in user_neighbors)
    neigbhors_list = [id for id in neighbors]
    return neigbhors_list


def getRecommendations(uid):
    movies = top_n[uid]
    neighbours = get_k_nearest_neighbors(str(uid))
    return {
        'movies': movies,
        'neighbours': neighbours
    }

In [8]:
top_n

defaultdict(list,
            {'196': [('1189', 5),
              ('1500', 5),
              ('814', 5),
              ('1536', 5),
              ('1293', 5),
              ('1599', 5),
              ('1653', 5),
              ('1467', 5),
              ('1122', 5),
              ('1201', 5)],
             '186': [('1189', 5),
              ('814', 5),
              ('1536', 5),
              ('1599', 5),
              ('1653', 5),
              ('1467', 5),
              ('1201', 5),
              ('1500', 4.999999999999999),
              ('1293', 4.999999999999999),
              ('1122', 4.999999999999999)],
             '22': [('1189', 5),
              ('1500', 5),
              ('814', 5),
              ('1536', 5),
              ('1599', 5),
              ('1656', 5),
              ('1653', 5),
              ('1467', 5),
              ('1122', 5),
              ('1201', 5)],
             '244': [('1189', 5),
              ('814', 5),
              ('1293', 5),
              ('1

In [21]:
import csv
mapa_de_filmes = {}

with open("u.item", encoding="ISO-8859-1") as tsv:
    for line in csv.reader(tsv, dialect="excel-tab"):
        linha = line[0].split('|')
        mapa_de_filmes[linha[0]] = linha[1]

In [1]:
print(mapa_de_filmes['1122'])
mapeados = map(lambda x: mapa_de_filmes[x[0]], getRecommendations('2')['movies'])
for aa in mapeados:
    print(aa)

NameError: name 'mapa_de_filmes' is not defined