In [19]:
import pandas as pd
ratings = pd.read_csv("csv_files/ratings.csv")[["userId", "movieId", "rating"]]
ratings

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
...,...,...,...
99999,671,6268,2.5
100000,671,6269,4.0
100001,671,6365,4.0
100002,671,6385,2.5


### create the dataset

In [25]:
from surprise import Dataset, Reader
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings, reader)
data

<surprise.dataset.DatasetAutoFolds at 0x7fd4c0b476a0>

### build the trainset

In [46]:
trainset = data.build_full_trainset()
list(trainset.all_ratings())

[(0, 0, 2.5),
 (0, 1, 3.0),
 (0, 2, 3.0),
 (0, 3, 2.0),
 (0, 4, 4.0),
 (0, 5, 2.0),
 (0, 6, 2.0),
 (0, 7, 2.0),
 (0, 8, 3.5),
 (0, 9, 2.0),
 (0, 10, 2.5),
 (0, 11, 1.0),
 (0, 12, 4.0),
 (0, 13, 4.0),
 (0, 14, 3.0),
 (0, 15, 2.0),
 (0, 16, 2.0),
 (0, 17, 2.5),
 (0, 18, 1.0),
 (0, 19, 3.0),
 (1, 20, 4.0),
 (1, 21, 5.0),
 (1, 22, 5.0),
 (1, 23, 4.0),
 (1, 24, 4.0),
 (1, 25, 3.0),
 (1, 26, 3.0),
 (1, 27, 4.0),
 (1, 28, 3.0),
 (1, 29, 5.0),
 (1, 30, 4.0),
 (1, 31, 3.0),
 (1, 32, 3.0),
 (1, 33, 3.0),
 (1, 34, 3.0),
 (1, 35, 3.0),
 (1, 36, 3.0),
 (1, 37, 5.0),
 (1, 38, 1.0),
 (1, 39, 3.0),
 (1, 40, 3.0),
 (1, 41, 3.0),
 (1, 42, 4.0),
 (1, 43, 4.0),
 (1, 44, 5.0),
 (1, 45, 5.0),
 (1, 46, 3.0),
 (1, 47, 4.0),
 (1, 48, 3.0),
 (1, 49, 4.0),
 (1, 50, 3.0),
 (1, 51, 4.0),
 (1, 52, 2.0),
 (1, 53, 1.0),
 (1, 54, 3.0),
 (1, 55, 4.0),
 (1, 56, 4.0),
 (1, 57, 3.0),
 (1, 58, 3.0),
 (1, 59, 3.0),
 (1, 60, 3.0),
 (1, 61, 2.0),
 (1, 62, 3.0),
 (1, 63, 3.0),
 (1, 64, 3.0),
 (1, 65, 3.0),
 (1, 66, 2.0),
 (1, 

### train the model

In [49]:
from surprise import SVD
svd = SVD()

In [55]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fd4a84662c0>

In [58]:
svd.predict(3,30)

Prediction(uid=3, iid=30, r_ui=None, est=3.724260971780314, details={'was_impossible': False})

### validation

In [64]:
from surprise import model_selection
model_selection.cross_validate(svd, data, measures=['RMSE', 'MAE'])

{'test_rmse': array([0.90076275, 0.89619475, 0.89633631, 0.89701282, 0.90704469]),
 'test_mae': array([0.6945161 , 0.68994947, 0.69248005, 0.68994235, 0.69556855]),
 'fit_time': (1.3276350498199463,
  1.2940537929534912,
  1.240506649017334,
  1.3905057907104492,
  1.395096778869629),
 'test_time': (0.21163535118103027,
  0.18689632415771484,
  0.3777611255645752,
  0.1916947364807129,
  0.18228793144226074)}