In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import pickle

# Load and preprocess data
data_path = '../data/ml-latest-small/ratings.csv'
item_path = '../data/ml-latest-small/movies.csv'

ratings = pd.read_csv(data_path)
movies = pd.read_csv(item_path, usecols=['movieId', 'title'])

# The 'ratings' dataset already has the columns 'userId', 'movieId', 'rating', 'timestamp'
# We will drop the 'timestamp' column
ratings = ratings.drop('timestamp', axis=1)

# Prepare data for Surprise
reader = Reader(rating_scale=(0.5, 5.0))  # MovieLens ratings are between 0.5 and 5.0
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

# Train the model
model = SVD()
model.fit(trainset)

# Save the model
with open('../models/recommendation_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Evaluate the model
predictions = model.test(testset)
from surprise import accuracy
accuracy.rmse(predictions)


RMSE: 0.8751


0.8750562358611308