In [14]:
# collaborative algorithm

import numpy as np
import pandas as pd



from surprise import Dataset, Reader
from surprise import NMF, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.dataset import DatasetAutoFolds

from collections import defaultdict


def collaborative_filtering(input, algorithm, n_suggestion=10):

    def load_data(path_csv):
        reader = Reader()
        ratings = pd.read_csv(path_csv)
        ratings.head()
        data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
        
        return data
        
    def fit_predict(algo, path_csv):
        data = load_data(path_csv)
        
        # fit and predict in data - to know RMSE
        trainset, testset = train_test_split(data, test_size=0.25)

        algo.fit(trainset)
        predictions_test = algo.test(testset)
        
        # Real predict: predict ratings for all pairs (u, i) that are NOT in the training set.
        full_trainset = data.build_full_trainset()
        anti_testset = full_trainset.build_anti_testset()
        predictions = algo.test(anti_testset)
        
        return predictions, accuracy.rmse(predictions_test)
    
    
    def get_top_n(algo, path_csv, n=10):
        """Return the top-N recommendation for each user from a set of predictions.

        Returns:
        A dict where keys are user (raw) ids and values are lists of tuples:
            [(raw item id, rating estimation), ...] of size n.
        """
        
        predictions = fit_predict(algo, path_csv)[0]
        
        # First map the predictions to each user.
        top_n = defaultdict(list)
        for uid, iid, true_r, est, _ in predictions:
            top_n[uid].append((iid, est))

        # Then sort the predictions for each user and retrieve the k highest ones.
        for uid, user_ratings in top_n.items():
            user_ratings.sort(key=lambda x: x[1], reverse=True)
            top_n[uid] = user_ratings[:n]

        return top_n
    
    return get_top_n(algorithm, input)


## Example

top_n = collaborative_filtering(input='/kaggle/input/the-movies-dataset/ratings_small.csv', algorithm=NMF())

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

RMSE: 0.9550
1 [67504, 4754, 3216, 1251, 26974, 40412, 92494, 32898, 107559, 3030]
2 [2318, 1860, 3030, 3307, 100556, 2066, 8154, 40629, 112515, 2924]
3 [1211, 1251, 1232, 52767, 3437, 5765, 6063, 2285, 3216, 92494]
4 [1172, 1263, 1293, 50, 265, 272, 457, 497, 593, 318]
5 [745, 926, 1649, 1859, 1860, 3010, 3030, 73344, 116897, 2066]
6 [3320, 89904, 92494, 83359, 3216, 116897, 4731, 6021, 3310, 83411]
7 [3320, 3915, 4731, 86781, 116897, 3022, 927, 3216, 92494, 8535]
8 [3030, 6783, 2066, 5017, 599, 4967, 5114, 6918, 42004, 3357]
9 [123, 1859, 1860, 3010, 3030, 4037, 6650, 116897, 4809, 1948]
10 [7502, 123, 1859, 1860, 3010, 3030, 3320, 3915, 4731, 8638]
11 [1293, 1206, 1208, 2692, 1719, 78499, 162, 233, 322, 1060]
12 [123, 3262, 1232, 3676, 71899, 62912, 3054, 67788, 45837, 129514]
13 [994, 1859, 3030, 77846, 90061, 5765, 6063, 9010, 67504, 83359]
14 [3320, 39446, 2043, 8530, 132333, 720, 1859, 8197, 926, 2165]
15 [92494, 26974, 40412, 3216, 83359, 6021, 6273, 4754, 5238, 52952]
16 [1208

In [16]:
## Example

top_n = collaborative_filtering(input='/kaggle/input/the-movies-dataset/ratings_small.csv', algorithm=SVD())

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

RMSE: 0.8974
1 [318, 858, 88125, 1219, 903, 969, 6016, 1203, 296, 2858]
2 [4226, 3462, 858, 745, 1952, 1228, 1196, 4973, 912, 750]
3 [1172, 608, 922, 1193, 1247, 3462, 2692, 88125, 926, 908]
4 [593, 318, 1193, 1221, 1247, 111, 2692, 923, 162, 750]
5 [1228, 6016, 899, 527, 1254, 969, 1262, 1172, 1276, 318]
6 [1228, 318, 1196, 1131, 750, 2064, 50, 922, 3462, 1198]
7 [2019, 1228, 923, 858, 1221, 475, 608, 903, 1252, 1945]
8 [969, 58559, 1252, 745, 3462, 899, 1228, 608, 162, 109487]
9 [1228, 858, 475, 899, 1263, 1293, 969, 1217, 4226, 908]
10 [527, 858, 904, 6016, 4235, 913, 1247, 608, 1254, 926]
11 [1217, 3462, 1228, 2324, 318, 898, 111, 745, 232, 899]
12 [1089, 858, 1228, 1172, 745, 7153, 6016, 1203, 2542, 2571]
13 [969, 858, 1228, 1254, 1196, 1252, 913, 1299, 898, 5952]
14 [50, 858, 1221, 318, 1254, 926, 912, 913, 2571, 922]
15 [280, 8607, 599, 1351, 928, 1299, 3424, 7669, 1090, 892]
16 [1193, 1254, 969, 905, 1228, 2571, 913, 745, 1252, 858]
17 [88125, 2019, 58559, 750, 1228, 3104, 4851

In [17]:
top_n[14]
# user 14, suggest 10 movies 50, 858,...,922 with rating estimated 4.339, 4.262,....

[(50, 4.339180604686895),
 (858, 4.262432884700491),
 (1221, 4.2142239195147475),
 (318, 4.137740510408003),
 (1254, 4.074826763601229),
 (926, 4.069742792053704),
 (912, 4.02561927823419),
 (913, 4.020813143519046),
 (2571, 4.016863992067038),
 (922, 4.015292343131747)]