In [3]:
!pip install --upgrade pip setuptools wheel Cython numpy==1.23.5

# Step 2: Clone and build scikit-surprise manually
!git clone https://github.com/NicolasHug/Surprise.git
%cd Surprise
!python setup.py install
%cd ..

# Step 1: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy

[33mDEPRECATION: Loading egg at /usr/local/lib/python3.11/dist-packages/scikit_surprise-1.1.4-py3.11-linux-x86_64.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
fatal: destination path 'Surprise' already exists and is not an empty directory.
/content/Surprise
!!

        ********************************************************************************
        Please use a simple string containing a SPDX expression for `project.license`. You can also use `project.license-files`. (Both options available on setuptools>=77.0.0).

        By 2026-Feb-18, you need to update your project and remove deprecated calls
        or your builds will no longer be supported.

        See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license for details.
        ***************************************************************

In [6]:
file_path = 'drive/MyDrive/datasets/amazon_review.csv'
df = pd.read_csv(file_path, header=None, names=['userId', 'itemId', 'rating', 'timestamp'])
df = df[['userId', 'itemId', 'rating']]
df.dropna(inplace=True)

print(df.head())

           userId      itemId  rating
0   AKM1MP6P0OYPR  0132793040     5.0
1  A2CX7LUOHB2NDG  0321732944     5.0
2  A2NWSAGRHCP8N5  0439886341     1.0
3  A2WNBOD3WNDNKT  0439886341     3.0
4  A1GI0U4ZRJA8WN  0439886341     1.0


In [8]:
min_ratings = 5
df = df.groupby("userId").filter(lambda x: len(x) >= min_ratings)

In [11]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[["userId", "itemId", "rating"]], reader)

In [12]:
from surprise.model_selection import train_test_split, GridSearchCV
param_grid = {
    "n_epochs": [10, 20],
    "lr_all": [0.002, 0.005],
    "reg_all": [0.02, 0.1]
}

gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

print("Best RMSE score:", gs.best_score["rmse"])
print("Best parameters:", gs.best_params["rmse"])

Best RMSE score: 1.135482549666239
Best parameters: {'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.1}


In [13]:
model = gs.best_estimator["rmse"]
trainset = data.build_full_trainset()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x787fe6bc1a50>

In [14]:
trainset_split, testset_split = train_test_split(data, test_size=0.2)
model.fit(trainset_split)
predictions = model.test(testset_split)
print("RMSE:", accuracy.rmse(predictions))
print("MAE:", accuracy.mae(predictions))

RMSE: 1.1292
RMSE: 1.1291869472355403
MAE:  0.8504
MAE: 0.8504054680960125


In [15]:
def get_unseen_items(uid, trainset):
    if uid not in trainset._raw2inner_id_users:
        return []
    seen = set(j for (j, _) in trainset.ur[trainset.to_inner_uid(uid)])
    unseen = set(trainset.all_items()) - seen
    return [trainset.to_raw_iid(iid) for iid in unseen]

def recommend_top_n(model, trainset, uid, n=5):
    unseen_items = get_unseen_items(uid, trainset)
    predictions = [model.predict(uid, iid) for iid in unseen_items]
    top_n = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    return [(pred.iid, pred.est) for pred in top_n]

example_user = df["userId"].iloc[0]
recommendations = recommend_top_n(model, trainset, example_user, n=5)

print(f"\n🎯 Top 5 recommendations for user {example_user}:")
for item_id, rating in recommendations:
    print(f"Item ID: {item_id}, Predicted Rating: {rating:.2f}")


🎯 Top 5 recommendations for user A3J3BRHTDRFJ2G:
Item ID: B003U4VIWW, Predicted Rating: 4.12
Item ID: B007BXD6DW, Predicted Rating: 4.03
Item ID: B009ACD844, Predicted Rating: 4.03
Item ID: B0007Y791C, Predicted Rating: 4.02
Item ID: B00FA1Y65O, Predicted Rating: 4.02
