In [1]:
import numpy as np
import pandas as pd

from surprise import SVD
from surprise import dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

movies = "movies_metadata.csv/movies_metadata.csv"
ratings = "ratings_small.csv/ratings_small.csv"
links = "links_small.csv"
keywords = "keywords.csv/keywords.csv"
credits = "credits.csv/credits.csv"

### Parsing the dataset for use

In [2]:
df = pd.read_csv(ratings)
print(df.head())
reader = Reader(line_format='user item rating timestamp', rating_scale=(1, 5))

class MyDataset(dataset.DatasetAutoFolds):

    def __init__(self, df, reader):

        self.raw_ratings = [(uid, iid, r, t) for (uid, iid, r, t) in
                            zip(df['userId'], df['movieId'], df['rating'], df['timestamp'])]
        self.reader=reader

data = MyDataset(df, reader)

   userId  movieId  rating   timestamp
0       1       31     2.5  1260759144
1       1     1029     3.0  1260759179
2       1     1061     3.0  1260759182
3       1     1129     2.0  1260759185
4       1     1172     4.0  1260759205


### Creating the Algo

In [13]:
algo = SVD()
trainset, testset = train_test_split(data, test_size=0.25)
algo.fit(trainset)
predictions = algo.test(testset)
folds = 5
valid = cross_validate(algo, data,measures=['RMSE', 'MAE'],cv=folds, verbose=False, n_jobs=-2)
sums = 0
for x in valid['test_rmse']:
    sums += x
print("rmse: " + str(sums/folds))

sums = 0
for x in valid['test_mae']:
    sums += x
print("mae: " + str(sums/folds))

rmse: 0.8955473527533115
mae: 0.6888858730937051
