# Collaborative-based Filtering

## Loading the data

In [6]:
import pandas as pd

rating_df = pd.read_csv("ratings.csv")[["userId", "movieId", "rating"]]
rating_df.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


## Creation of dataset

In [7]:
from surprise import Dataset, Reader

reader = Reader(rating_scale=(1,5))
dataset = Dataset.load_from_df(rating_df, reader)

## Building the trainset

In [9]:
trainset = dataset.build_full_trainset()

## Training the model

In [13]:
from surprise import SVD

svd = SVD()

In [14]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f1fcbca8fd0>

In [15]:
svd.predict(15, 1956)

Prediction(uid=15, iid=1956, r_ui=None, est=3.1464568110313476, details={'was_impossible': False})

## Validation

In [16]:
from surprise import model_selection

model_selection.cross_validate(svd, dataset, measures=["RMSE", "MAE"])

{'test_rmse': array([0.90833278, 0.89547335, 0.89509458, 0.89485017, 0.89240207]),
 'test_mae': array([0.69682405, 0.68983574, 0.68897801, 0.68766238, 0.68743194]),
 'fit_time': (0.7348694801330566,
  0.7218313217163086,
  0.7597203254699707,
  0.6850697994232178,
  0.6593649387359619),
 'test_time': (0.09359145164489746,
  0.09125852584838867,
  0.08858871459960938,
  0.08927536010742188,
  0.08929157257080078)}

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=5d567609-f49c-4e02-96bb-cb4780f2efca' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>