In [None]:
# pylint: disable=wrong-import-position

# temporary solution with working directory hoping for correct imports
%cd ".."

import clearml
import numpy as np
import pandas as pd
import surprise
from surprise import SVD, Dataset, Reader

from utils.dataset_for_evaluation_collector import collect_unwatched_train_watched_test_movies
from utils.recommendations_evaluation import evaluate_recommendations, generate_recommendations

np.random.seed(42)

%cd "models"

In [None]:
task = clearml.Task.init(project_name="MoviesGRS_MFDP", task_name="SVDRecommender", tags=["SVDRecommender"])

In [3]:
GROUPS_LIST = [f"group{i}" for i in range(1, 8)]

In [4]:
unwatched_train_watched_test_movies: pd.DataFrame = collect_unwatched_train_watched_test_movies(GROUPS_LIST)

In [5]:
MIN_RATING = 1
MAX_RATING = 5

train_data: pd.DataFrame = pd.read_parquet("../data/ratings_train.pq")
reader = Reader(rating_scale=(MIN_RATING, MAX_RATING))
surprise_train_dataset = Dataset.load_from_df(train_data[["userId", "movieId", "rating"]], reader)
trainset: surprise.Trainset = surprise_train_dataset.build_full_trainset()

svd = SVD(n_factors=17, n_epochs=30)
svd.fit(trainset)

## Average user

In [6]:
def recommend_avg_user(row: pd.Series) -> np.array:
    movie_pseudorating: np.array = svd.bi[row["unwatched"]] + (
        svd.qi[row["unwatched"]] @ np.mean(svd.pu[row["userId"]], axis=0)
    )
    top_movies: np.array = row["unwatched"][np.argsort(-movie_pseudorating)][:10]
    return top_movies

In [None]:
recommends_avg_user: pd.DataFrame = generate_recommendations(
    recommend_avg_user, unwatched_train_watched_test_movies, GROUPS_LIST
)

In [8]:
average_user_results: pd.DataFrame = evaluate_recommendations(recommends_avg_user, GROUPS_LIST)
average_user_results  # pylint: disable=pointless-statement

ClearML Monitor: Could not detect iteration reporting, falling back to iterations as seconds-from-start


Unnamed: 0,MAP,NDCG
group1,0.013029,0.119228
group2,0.013027,0.117886
group3,0.012804,0.115516
group4,0.012553,0.11302
group5,0.012593,0.113308
group6,0.012281,0.11105
group7,0.012091,0.109125


## Group Sum

In [9]:
def recommend_group_sum(row: pd.Series) -> np.array:
    movie_pseudorating: np.array = svd.bi[row["unwatched"]] + (
        np.sum(svd.qi[row["unwatched"]] @ svd.pu[row["userId"]].T, axis=1)
    )
    top_movies: np.array = row["unwatched"][np.argsort(-movie_pseudorating)][:10]
    return top_movies

In [None]:
recommends_group_sum: pd.DataFrame = generate_recommendations(
    recommend_group_sum, unwatched_train_watched_test_movies, GROUPS_LIST
)

In [11]:
group_sum_results: pd.DataFrame = evaluate_recommendations(recommends_group_sum, GROUPS_LIST)
group_sum_results  # pylint: disable=pointless-statement

Unnamed: 0,MAP,NDCG
group1,0.013029,0.119228
group2,0.011667,0.109666
group3,0.010804,0.103025
group4,0.010118,0.097659
group5,0.010304,0.098881
group6,0.009949,0.096097
group7,0.009546,0.093057


In [None]:
task.upload_artifact("avg_user_metrics", average_user_results)
task.upload_artifact("group_sum_metrics", group_sum_results)

In [13]:
task.close()