In [None]:
import clearml
import numpy as np
import pandas as pd
from dotenv import load_dotenv

from utils.evaluation import evaluate_recommendations, generate_recommendations
from utils.recommender_data_preprocessor import get_recommender_data

np.random.seed(42)
load_dotenv()

In [None]:
task = clearml.Task.init(
    project_name="MoviesGRS_MFDP", task_name="TopPopularMoviesRecommender", tags=["TopPopularMoviesRecommender"]
)

In [3]:
GROUPS_LIST = [f"group{i}" for i in range(1, 8)]

In [4]:
recommender_data: pd.DataFrame = get_recommender_data(GROUPS_LIST)

In [5]:
top_popular_movies: pd.DataFrame = (
    pd.read_parquet("data/ratings_train.pq")
    .groupby(by="movieId")
    .agg({"userId": "nunique"})
    .sort_values(by="userId", ascending=False)
    .rename(columns={"userId": "userCount"})
    .reset_index()
)

In [6]:
def recommend(row: pd.Series) -> np.array:
    top_movies = []
    for movie in top_popular_movies.movieId:
        if movie in row["unwatched"]:
            top_movies.append(movie)
            if len(top_movies) == 10:
                return np.array(top_movies)
    return np.array(top_movies)

In [None]:
recommends: pd.DataFrame = generate_recommendations(recommend, recommender_data, GROUPS_LIST)

In [8]:
results: pd.DataFrame = evaluate_recommendations(recommends, GROUPS_LIST)
results  # pylint: disable=pointless-statement

ClearML Monitor: Could not detect iteration reporting, falling back to iterations as seconds-from-start


Unnamed: 0,MAP,NDCG
group1,0.056851,0.277816
group2,0.049428,0.258088
group3,0.043397,0.240154
group4,0.038745,0.226101
group5,0.03979,0.228874
group6,0.036306,0.217078
group7,0.03388,0.208103


In [None]:
task.upload_artifact("metrics", results)

In [10]:
task.close()