In [48]:
import io
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import get_dataset_dir
from collections import defaultdict
from json import dumps

min_films = 5
knn = 4

In [49]:
def get_info():
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    info = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as file:
        for line in file:
            line = line.split('|')
            info[line[0]] = (line[1], line[2])
    return info

In [50]:
def rnk_surprise(usr):
    data = Dataset.load_builtin('ml-100k')
    trainset = data.build_full_trainset()

    sim_options = {'name': 'cosine', 'user_based': True, 'min_support': min_films}
    algo = KNNWithMeans(k = knn, min_k = knn, sim_options = sim_options)
    algo.fit(trainset)

    testset = trainset.build_anti_testset()
    testset = filter(lambda x: x[0] == usr, testset)
    predictions = algo.test(testset)
    top_n = defaultdict(list)

    for uid, iid, _, est, _ in predictions:
        top_n[uid].append((iid, round(est, 3)))

    for uid, user_rnk in top_n.items():
        user_rnk.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_rnk[:min_films]

    info = get_info()
    
    recommend = {}
    row_format = "{:^10} {:<70} {:^1}"
    
    print("User " + str(usr))
    for film_rid, rnk in top_n[usr]:
        print(row_format.format(film_rid, str(info[film_rid]), rnk))
        recommend[film_rid] = (str(info[film_rid]), rnk)
    output = {
        "user": "User " + usr,
        1: recommend,
    }
    
    file_name = "user" + usr + "_surprise.json"
    json_file = open(file_name,'w')
    json_file.write(dumps(output))
    json_file.close()

In [51]:
usr = input("Enter user id: ")
rnk_surprise(usr)

Enter user id: 1
Computing the cosine similarity matrix...
Done computing similarity matrix.
User 1
   302     ('L.A. Confidential (1997)', '01-Jan-1997')                            4.953
   902     ('Big Lebowski, The (1998)', '26-Dec-1997')                            4.87
   1367    ('Faust (1994)', '01-Jan-1994')                                        4.761
   516     ('Local Hero (1983)', '01-Jan-1983')                                   4.76
   1449    ('Pather Panchali (1955)', '22-Mar-1996')                              4.736
