In [3]:
! pip install scikit-surprise



In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from collections import defaultdict

In [2]:
df = pd.read_csv("ratings.csv",low_memory=True)[:20000]
df

Unnamed: 0,userID,itemID,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
19995,132,1028,4.0,1157923009
19996,132,1035,3.5,1157922959
19997,132,1042,3.0,1157923521
19998,132,1059,3.0,1329984112


Next, we need to use two objects from the Surprise library. First, we need to create a Reader object, to which we will indicate the rating scale. Remember, the user rated items on a scale of 1 to 5 stars.

In [3]:
reader = Reader(rating_scale=(1, 5))
reader

<surprise.reader.Reader at 0x7f63f20a0c10>

Afterwards, we will create a Dataset object that will load the information from our DataFrame into a data object that we can work with in Surprise.

In [6]:
data = Dataset.load_from_df(df[["userID", "itemID", "rating"]], reader)
data

<surprise.dataset.DatasetAutoFolds at 0x7f63f1dfe2e0>

In [7]:
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data, test_size=0.25)

#### With two lines of code we train our algorithm

In [8]:
algo = SVD()
algo.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f63f2360d30>

In [16]:
predictions = algo.test(trainset.build_anti_testset())
# taking first 4 prediction data
predictions[:4]

[Prediction(uid=73, iid=76751, r_ui=3.6013, est=3.6632193599098524, details={'was_impossible': False}),
 Prediction(uid=73, iid=435, r_ui=3.6013, est=2.6409810212383866, details={'was_impossible': False}),
 Prediction(uid=73, iid=17, r_ui=3.6013, est=3.552442609840415, details={'was_impossible': False}),
 Prediction(uid=73, iid=4993, r_ui=3.6013, est=3.920445912912125, details={'was_impossible': False})]

In [12]:
# top_n = defaultdict(list)
# top_n

In [32]:
top_n = defaultdict(list)
for uid, iid, true_r, est, _ in predictions:
  top_n[uid].append((iid, est))

In [33]:
k = 10
for uid, user_ratings in top_n.items():
  user_ratings.sort(key=lambda x: x[1], reverse=True)
  top_n[uid] = user_ratings[:k]

In [34]:
top_n
# for key in top_n.keys():
#     print(key)
len(top_n)

132

In [40]:
user_ratings

[(1221, 4.894249651591709),
 (1198, 4.865840888398351),
 (1210, 4.755898080457173),
 (16, 4.74299214547761),
 (541, 4.734548697912365),
 (1196, 4.72824979411848),
 (1197, 4.710736309537787),
 (5618, 4.705201467809434),
 (858, 4.702803595443689),
 (527, 4.690310205297098)]

In [42]:
print("Recommendations")
print("=" * 40)
for uid, user_ratings in list(top_n.items())[:10]:
  items = [iid for (iid, _) in user_ratings]
  print(f"UserId {uid}\titems: {items}")

Recommendations
UserId 73	items: [1198, 2858, 1206, 318, 475, 2300, 2423, 296, 3147, 527]
UserId 68	items: [40815, 741, 912, 32587, 70183, 44195, 62439, 55118, 457, 3793]
UserId 21	items: [318, 3082, 912, 2081, 2261, 1097, 2078, 1101, 2692, 68954]
UserId 71	items: [318, 1196, 1198, 2858, 1197, 49272, 1089, 1221, 2028, 8633]
UserId 94	items: [2571, 1197, 260, 858, 1221, 541, 49272, 58559, 50, 1210]
UserId 82	items: [50, 1212, 2160, 912, 593, 2692, 260, 318, 3275, 1206]
UserId 112	items: [1198, 1196, 49272, 260, 480, 2858, 48780, 362, 4848, 54881]
UserId 122	items: [541, 1198, 858, 16, 3147, 1221, 527, 1653, 1210, 912]
UserId 111	items: [2959, 912, 1278, 955, 1198, 50, 1258, 318, 3147, 553]
UserId 119	items: [1221, 1198, 1210, 16, 541, 1196, 1197, 5618, 858, 527]
