In [7]:
# Initialise everything from the previous 2 attempts 
from surprise import SVDpp
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader
from surprise.model_selection import GridSearchCV
import pandas as pd
import numpy as np
import csv

# Read CSV
movies = pd.read_csv('movies.csv',sep=';',encoding = "utf-8",header=None)
predictions = pd.read_csv('predictions.csv',sep=';',header=None)
users = pd.read_csv('users.csv',sep=';',header=None)
ratings = pd.read_csv('ratings.csv',sep=';',header=None)

# Add column names
movies.columns = ['movieId','movieYear','movieTitle']
predictions.columns = ['userId','movieId']
users.columns = ['userId','userGender','userAge','userProfession']
ratings.columns = ['userId','movieId','rating']

def number_rounder(number: int) -> int:
    if (number < 1):
        return 1
    elif (number > 5):
        return 5
    else:
        return int(round(number))

In [8]:
surprise_data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], Reader(rating_scale=(1, 5)))

In [9]:
param_grid = {"n_epochs": [95,100,105], "n_factors": [3, 5, 10, 15]}

In [10]:
gs = GridSearchCV(SVDpp, param_grid, measures=["rmse", "mae"],n_jobs=-1)

In [11]:
gs.fit(surprise_data)

In [14]:
print(gs.best_params["rmse"])
print(gs.best_score["rmse"])

{'n_epochs': 105, 'n_factors': 5}
0.8660500556131897


In [15]:
enhanced_param_grid = {"n_epochs": [105, 115, 130 ], "n_factors": [4,5,6,7]}

In [16]:
enhanced_grid_search = GridSearchCV(SVDpp, enhanced_param_grid, measures=["rmse", "mae"], n_jobs=-1 )

In [17]:
enhanced_grid_search.fit(surprise_data)

In [18]:
print(enhanced_grid_search.best_params["rmse"])
print(enhanced_grid_search.best_score["rmse"])

{'n_epochs': 105, 'n_factors': 6}
0.8646176540465186


In [28]:
algo = enhanced_grid_search.best_estimator["rmse"]
full_trainingset = surprise_data.build_full_trainset()
algo.fit(full_trainingset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x7f0e0add93d0>

In [34]:
csv_data = []
combinations_to_predict = []

def inner_id_mapper(user_id, movie_id):
    return full_trainingset.to_inner_uid(user_id), full_trainingset.to_inner_iid(movie_id)

# Write to File
for index, row in predictions.iterrows():
    cur_user_id = row['userId']
    cur_movie_id = row['movieId']

    combinations_to_predict.append([cur_user_id,cur_movie_id,1])

test_answers = algo.test(np.array(combinations_to_predict))

pred_ratings = [prediction.est for prediction in test_answers]

for i in range(1, len(predictions) + 1):
    csv_data.append([i,pred_ratings[i-1]])


with open('guess_surprise.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(csv_data)