In [1]:
%pip install pandas
%pip install scikit-learn
%pip install -U numpy

Collecting pandas
  Using cached pandas-2.2.3-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas)
  Downloading numpy-2.2.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp311-cp311-win_amd64.whl (11.6 MB)
Downloading numpy-2.2.3-cp311-cp311-win_amd64.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
   -- ------------------------------------- 0.8/12.9 MB 6.7 MB/s eta 0:00:02
   ---- ----------------------------------- 1.3/12.9 MB 3.4 MB/s eta 0:00:04
   ----- ---------------------------------- 1.8/12.9 MB 3.0 MB/s eta 0:00:04
   ------ --------------------------------- 2.1/12.9 MB 2.7 MB/s eta 0:00:04
   ------- -------------------------------- 2.4/12.9 MB 2.4 MB/s eta 0:00:05
   ------- -----

# Load Packages

In [21]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
import random

# Setting random seeds to replicate results easily
random.seed(0)
np.random.seed(0)

# Load the Dataset
We will load a dataset containing three columns: user_id, business_id, and stars. This dataset serves as the foundation for our collaborative filtering methods.

## Handling Missing Data
We have two model choices for handling missing ratings when computing similarity:
- Fill with zero
- Fill with the mean rating of (user/item)

In [22]:
train_data = pd.read_csv('data/train.csv')
valid_data = pd.read_csv('data/valid.csv')

In [23]:
train_data

Unnamed: 0,user_id,business_id,stars
0,ec8f38aa91755dcf5837020d022ad384,ecaa90564e18dca1c7b653038f71d6bf,1.0
1,64fe4dd0a489c9b96a3e8d7fbd337888,ef118bb0ae1fc369e1f47d1b34f6acee,5.0
2,a49909b39426ebb3538aa837b5b88840,e8b182a923810d52981aa02d56dde799,5.0
3,a56726d5676d647e42e2aca54f21b075,250040e979eae9ef5912aa5a1d285e4e,5.0
4,3e19d8260e655ba87bea0922bac92266,e02880faf4d42fe1df7bd370fb1c787b,4.0
...,...,...,...
99995,555883855cca31f06edb517762bc8171,396739eaa8b6cbfd078628567f1f01cf,5.0
99996,634d0478d05af1775d606058521593dc,1bfdf72aac7cbe0fc94caeb4422b629d,4.0
99997,0f37c0d13690b022a55470894714afd6,78a3e9835377387d0a04664abec56143,4.0
99998,54e7e0b4b4a95c6a8a979d24d383e39d,0b0ffffc5987e32fd87dd4782bb75eb1,4.0


In [24]:
valid_data

Unnamed: 0,user_id,business_id,stars
0,961883d7f2eb08e4b284777b46dc5971,580818eb8f57a77ab6eb515b458c9d18,5.0
1,1ec826daf60faf7b04a1d59f6e9dea73,572999170778033bb8ee5663ca65405f,4.0
2,2339479e4b9c6af9dfbebf12bcb3e6b2,98f0f68de27c76c27f47350fe6d0a60d,5.0
3,d9fb027b8e54080bfb252516f7ba127d,8394e1fa45ad59bc7fe82fd33905ce05,3.0
4,20537e5565e978b43e9672415444de79,5b95559eccd88ab36dd6f5ccaa994ed6,4.0
...,...,...,...
9995,69f56168ebf98ff343826df8f2cda55d,28f3d407928ae2d7c36bcea629186843,2.0
9996,3859285f936bfee2a6caf2644a936a19,8d512bca383062d902d9bce89ea8fda0,5.0
9997,491a93d31d9c8cadcbae16ff2fc46f83,b0a12211aef1cd25f9575778d677dfcb,4.0
9998,74fb19ad36602ca0edd9904547f830f5,1f3d76e6addc57dfae8578af06603ccb,3.0


### Fill with zero

In [25]:
train_user_item_matrix = train_data.pivot_table(index='user_id', columns='business_id', values='stars').fillna(0)
train_item_user_matrix = train_user_item_matrix.T

In [26]:
train_user_item_matrix

business_id,0001cf8af2e89975526c034208d594de,0002d36982e9e94fd7fac3fc0b679fa6,0007914f8b767c8911d0dc3918efbcbd,000895d3383110c195db37741685a61a,0010f8613132238cf232233773e1b5f6,0011d4f704a068dc132189dd461b99c9,00264d2e24dc6e390393fefbf27ca6b2,0028633d685de4e78ec246ed258b9191,00341d8cc185a878381dc6bb8232539f,0035051403ce50ae8c9abbaea203c3cf,...,ffcf3bcc5befe7b4351bbc3983673389,ffdb157261d9f0a88f8d19bfa5f8bfdf,ffe343e346b265cf80fd3967e0faf9b4,ffe7a8a332d17b1d2f12bb00d89fa0a0,ffec2982734fc9ab45d62d7607e423e7,ffec9635f0d9d190a7c56119b2732bd7,ffef4c57ceac6f0d82062c60b48f3d0c,fff167315087fdc06da3139b95225ecf,fff5e5dce52e64c796e6f96dd210f3d5,fffe1a411f2bb390db833242c804df5d
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00014c1f8bf3f3f2b0a6e7e3afbcfcff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00071a5828d6ef5cc9c2304f8a885dac,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000ce881071b718d42891c8af25db49d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000f69e2c9a757b4933f5a84786d3c3f,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
001580c5ebab8c6ad20ee723f1d8ef30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffd73a462731f7bf0c92aedd758ccd74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ffd8f1dd687038bfe22b2c670f36ecf4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
ffe392414354420af3d92adf8fd4f453,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ffe4c99a5648ffacaa9986f2e81e2454,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
train_item_user_matrix

user_id,00014c1f8bf3f3f2b0a6e7e3afbcfcff,00071a5828d6ef5cc9c2304f8a885dac,000ce881071b718d42891c8af25db49d,000f69e2c9a757b4933f5a84786d3c3f,001580c5ebab8c6ad20ee723f1d8ef30,002181e296aceefc175232f0ffc276ac,002bfd30499b9eb1add3b0445e0df3a2,004acdacdaa67159b53d5c7910372867,00587a03f269b979aa831ee3245b65ba,0064c2d6106c4b8701d2ccae390ffb45,...,ffc7ea7264f1fa51983fbb3cda327249,ffce2121b1f090bf902e3266bfc88f2d,ffce74b3ebf62304b453c696fc5fb6a6,ffd2f7fe1530130b6791634e4c4bb8a3,ffd3a2562345ce83fe6a85d8f95f9631,ffd73a462731f7bf0c92aedd758ccd74,ffd8f1dd687038bfe22b2c670f36ecf4,ffe392414354420af3d92adf8fd4f453,ffe4c99a5648ffacaa9986f2e81e2454,ffe8417dde74a95e4199026178aa28f5
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0001cf8af2e89975526c034208d594de,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0002d36982e9e94fd7fac3fc0b679fa6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0007914f8b767c8911d0dc3918efbcbd,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000895d3383110c195db37741685a61a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0010f8613132238cf232233773e1b5f6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffec9635f0d9d190a7c56119b2732bd7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ffef4c57ceac6f0d82062c60b48f3d0c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fff167315087fdc06da3139b95225ecf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fff5e5dce52e64c796e6f96dd210f3d5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Fill with the mean rating of (user/item)

In [28]:
train_user_item_matrix = train_data.pivot_table(index='user_id', columns='business_id', values='stars')
train_item_user_matrix = train_user_item_matrix.T

# fill with mean
train_user_item_matrix = train_user_item_matrix.apply(lambda x: x.fillna(x.mean()), axis=0)
train_item_user_matrix = train_item_user_matrix.apply(lambda x: x.fillna(x.mean()), axis=0)

In [29]:
print('Number of Training Users:', train_user_item_matrix.shape[0])
print('Number of Training Items:', train_user_item_matrix.shape[1])

Number of Training Users: 4980
Number of Training Items: 10844


In [30]:
train_user_item_matrix.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4980 entries, 00014c1f8bf3f3f2b0a6e7e3afbcfcff to ffe8417dde74a95e4199026178aa28f5
Columns: 10844 entries, 0001cf8af2e89975526c034208d594de to fffe1a411f2bb390db833242c804df5d
dtypes: float64(10844)
memory usage: 412.0+ MB


# Introduction to Collaborative Filtering
Collaborative Filtering (CF) is a widely used technique in recommendation systems. It generates predictions about a user's interests by collecting preferences from multiple users. In this tutorial, we will explore three main methods of collaborative filtering:

- User-Based Collaborative Filtering (User-CF)
- Item-Based Collaborative Filtering (Item-CF)
- Singular Value Decomposition (SVD)

We will implement these methods using a dataset containing user ratings for various businesses.

## Compute Similarity
We will use the normal cosine similarity in both User-based CF and Item-based CF for simplicity.

$ sim(\hat{a},\hat{b}) = cos(\hat{a}, \hat{b}) = \frac{\hat{a} \cdot \hat{b}}{|\hat{a}| * |\hat{b}|}$

In [31]:
def similarity(matrix):
    return cosine_similarity(matrix)

# User-based CF

### Prediction Function
We will implemented the user-based prediciton function as mentioned in lecture.

$pred(a,p) = \bar{r_a} + \sum_{b\in N} \frac{sim(a,b) \cdot (r_{b,p}- \bar{r_b})}{\sum_{b\in N}|sim(a,b)|}$

It utilize similarity between users to make predictions


In [32]:
def predict_user_based(user_item_matrix, user_similarity):
    user_ratings_mean = user_item_matrix.mean(axis=1).values.reshape(-1, 1) # (num_users, 1)
    centered_user_item_matrix = user_item_matrix - user_ratings_mean # (num_users, num_items)
    sim_sum = np.abs(user_similarity).sum(axis=1)[:,None] # (num_users, 1)
    pred_ratings = user_ratings_mean + np.dot(user_similarity, centered_user_item_matrix) / sim_sum # (num_users, num_items)
    # return as pd.DataFrame
    return pd.DataFrame(pred_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

In [33]:
users_similarity_matrix = similarity(train_user_item_matrix)
user_based_pred = predict_user_based(train_user_item_matrix, users_similarity_matrix)

In [34]:
user_based_pred

business_id,0001cf8af2e89975526c034208d594de,0002d36982e9e94fd7fac3fc0b679fa6,0007914f8b767c8911d0dc3918efbcbd,000895d3383110c195db37741685a61a,0010f8613132238cf232233773e1b5f6,0011d4f704a068dc132189dd461b99c9,00264d2e24dc6e390393fefbf27ca6b2,0028633d685de4e78ec246ed258b9191,00341d8cc185a878381dc6bb8232539f,0035051403ce50ae8c9abbaea203c3cf,...,ffcf3bcc5befe7b4351bbc3983673389,ffdb157261d9f0a88f8d19bfa5f8bfdf,ffe343e346b265cf80fd3967e0faf9b4,ffe7a8a332d17b1d2f12bb00d89fa0a0,ffec2982734fc9ab45d62d7607e423e7,ffec9635f0d9d190a7c56119b2732bd7,ffef4c57ceac6f0d82062c60b48f3d0c,fff167315087fdc06da3139b95225ecf,fff5e5dce52e64c796e6f96dd210f3d5,fffe1a411f2bb390db833242c804df5d
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00014c1f8bf3f3f2b0a6e7e3afbcfcff,4.428215,2.666311,3.999644,3.999644,4.399644,3.499644,4.999644,2.999644,3.666311,3.166311,...,4.399644,4.181462,3.332977,3.922721,4.627551,4.666311,4.124644,4.999644,4.499644,4.599644
00071a5828d6ef5cc9c2304f8a885dac,4.429428,2.667523,4.000856,4.000856,4.400856,3.500857,5.000856,3.000857,3.667523,3.167523,...,4.400856,4.182675,3.334190,3.923933,4.628763,4.667523,4.125856,5.000856,4.500856,4.600856
000ce881071b718d42891c8af25db49d,4.428750,2.666846,4.000179,4.000179,4.400179,3.500179,5.000179,3.000179,3.666846,3.166846,...,4.400179,4.181997,3.333512,3.923256,4.628086,4.666846,4.125179,5.000179,4.500179,4.600179
000f69e2c9a757b4933f5a84786d3c3f,4.429753,2.667849,4.001182,4.001182,4.401182,3.501182,5.001182,3.001182,3.667849,3.167849,...,4.401182,4.183000,3.334515,3.924259,4.629089,4.667849,4.126182,5.001182,4.501182,4.601182
001580c5ebab8c6ad20ee723f1d8ef30,4.428709,2.666805,4.000138,4.000138,4.400138,3.500138,5.000138,3.000138,3.666805,3.166805,...,4.400138,4.181956,3.333471,3.923215,4.628045,4.666805,4.125138,5.000138,4.500138,4.600138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ffd73a462731f7bf0c92aedd758ccd74,4.429134,2.667229,4.000563,4.000563,4.400563,3.500563,5.000563,3.000563,3.667229,3.167229,...,4.400563,4.182381,3.333896,3.923640,4.628470,4.667229,4.125563,5.000563,4.500563,4.600563
ffd8f1dd687038bfe22b2c670f36ecf4,4.430496,2.668591,4.001925,4.001925,4.401925,3.501925,5.001925,3.001925,3.668591,3.168591,...,4.401925,4.183743,3.335258,3.925002,4.629832,4.668591,4.126925,5.001925,4.501925,4.601925
ffe392414354420af3d92adf8fd4f453,4.428245,2.666341,3.999674,3.999674,4.399674,3.499674,4.999674,2.999674,3.666341,3.166341,...,4.399674,4.181492,3.333007,3.922751,4.627581,4.666341,4.124674,4.999674,4.499674,4.599674
ffe4c99a5648ffacaa9986f2e81e2454,4.428869,2.666964,4.000298,4.000298,4.400298,3.500298,5.000298,3.000298,3.666964,3.166964,...,4.400298,4.182116,3.333631,3.923375,4.628205,4.666964,4.125298,5.000298,4.500298,4.600298


In [35]:
user_based_pred.shape

(4980, 10844)

# Item-based CF

### Prediction Function
We will implemented the item-based prediciton function as mentioned in lecture.

$pred(u,p) = \frac {\sum sim(i,p)*r_{u,i}}{\sum sim(i,p)}$

It utilize similarity between items to make predictions.


In [36]:
def predict_item_based(item_user_matrix, item_similarity):
    nom = np.dot(item_similarity, item_user_matrix) # (num_items, num_users)
    denom =  np.abs(item_similarity).sum(axis=1)[:,None] # (num_items, 1)
    pred_ratings = nom / denom # division through broadcasting
    return pd.DataFrame(pred_ratings, index=item_user_matrix.index, columns=item_user_matrix.columns)

In [37]:
items_similarity_matrix = similarity(train_item_user_matrix)
item_based_pred = predict_item_based(train_item_user_matrix, items_similarity_matrix)

# SVD

In [38]:
def svd_rating_prediction(pivot_table, n_components=2):
    U, s, Vh = np.linalg.svd(pivot_table)
    # select top n_components
    U = U[:, :n_components]
    s = np.diag(s[:n_components])
    Vh = Vh[:n_components, :]
    return np.dot(np.dot(U, s), Vh)

You can also use truncatedSVD for faster computation

In [39]:
def svd_rating_prediction(pivot_table, n_components=2):
    svd = TruncatedSVD(n_components=n_components)
    matrix = svd.fit_transform(pivot_table)
    return svd.inverse_transform(matrix)

In [40]:
svd_pred = svd_rating_prediction(train_user_item_matrix, n_components=2)

# Evaluation

# Root Mean Squared Error (RMSE)

We need a reliable way to evaluate the performance of recommendation algorithms. RMSE is one of the popular metrics to estimate how good the recommendation algorithm is. Since RMSE is measuring the prediction errors, the smaller error that the model can achieve, the better performance it is, and vice versa.

$$RMSE=\sqrt{\sum_{i=1}^n\frac{(\hat{y}_i - y_i)^2}{N}}$$

$\hat{y}_i$: The predicted answer of sample $i$

$y$: The ground truth answer of sample $i$

In [41]:
def rmse(pred, actual):
    '''
    params:
        pred <np.array>: an array containing all predicted ratings
        actual <np.array>: an array containing all ground truth ratings

    return:
        a scalar whose value is the rmse
    '''
    return np.sqrt(mean_squared_error(actual, pred))

In [42]:
def get_predictions(pred_matrix, user_id_map, business_id_map, valid_data):
    predictions = []
    actuals = []
    
    for _, row in valid_data.iterrows():
        user_id = row['user_id']
        business_id = row['business_id']
        actual_rating = row['stars']
        
        if user_id in user_id_map and business_id in business_id_map:
            user_idx = user_id_map[user_id]
            business_idx = business_id_map[business_id]
            pred_rating = pred_matrix[user_idx, business_idx]
            predictions.append(pred_rating)
            actuals.append(actual_rating)
    
    return np.array(predictions), np.array(actuals)

In [43]:
user_id_map = {user_id: idx for idx, user_id in enumerate(train_user_item_matrix.index)}
business_id_map = {business_id: idx for idx, business_id in enumerate(train_user_item_matrix.columns)}

# Get predictions for User-based CF
user_filtered_pred, user_filtered_actual = get_predictions(user_based_pred.values, user_id_map, business_id_map, valid_data)

# Get predictions for Item-based CF
item_filtered_pred, item_filtered_actual = get_predictions(item_based_pred.T.values, user_id_map, business_id_map, valid_data)

# Get predictions for SVD
svd_filtered_pred, svd_filtered_actual = get_predictions(svd_pred, user_id_map, business_id_map, valid_data)

# Calculate RMSE for User-based CF
user_based_rmse = rmse(user_filtered_pred, user_filtered_actual)
print(f'User-based RMSE: {user_based_rmse}')

# Calculate RMSE for Item-based CF
item_based_rmse = rmse(item_filtered_pred, item_filtered_actual)
print(f'Item-based RMSE: {item_based_rmse}')

# Calculate RMSE for SVD
svd_rmse = rmse(svd_filtered_pred, svd_filtered_actual)
print(f'SVD RMSE: {svd_rmse}')

User-based RMSE: 1.1519717012263793
Item-based RMSE: 1.1333071844426112
SVD RMSE: 1.1518864042405965


The RMSE values indicate that our models have significant errors in predicting the star ratings, which may be due to several reasons:
- Sparse data
- Handling missing values as zeros
