In [1]:
import pandas as pd
import numpy as np
np.random.seed(42)

from evaluation import generate_recommendations, evaluate_recommendations
from recommender_data_preprocessor import get_recommender_data

import clearml

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
task = clearml.Task.init(
    project_name = 'MoviesGRS_MFDP', 
    task_name = 'TopPopularMoviesRecommender', 
    tags = ['TopPopularMoviesRecommender', 'Evaluation', 'TimeSeriesSplit']
)

ClearML Task: created new task id=4c80f417962142c2a89705999579622f
2023-06-01 22:15:21,405 - clearml.Task - INFO - Storing jupyter notebook directly as code
ClearML results page: https://app.clear.ml/projects/f3cb8157bfe7443abdc531a44bb15332/experiments/4c80f417962142c2a89705999579622f/output/log


In [3]:
GROUPS_LIST = [f'group{i}' for i in range(1, 8)]

In [4]:
recommender_data = get_recommender_data(GROUPS_LIST)

In [5]:
top_popular_movies = (
    pd.read_parquet('data/ratings_train.pq')
    .groupby(by='movieId')
    .agg({'userId': 'nunique'})
    .sort_values(by='userId', ascending=False)
    .rename(columns={'userId': 'userCount'})
    .reset_index()
)

In [6]:
def recommend(row):
    top_movies = []
    for movie in top_popular_movies.movieId:
        if movie in row["unwatched"]:
            top_movies.append(movie)
            if len(top_movies) == 10:
                return np.array(top_movies)

In [7]:
recommends = generate_recommendations(recommend, recommender_data, GROUPS_LIST)
recommends.head(1)

Unnamed: 0,userId,group1,group2,group3,group4,group5,group6,group7,movieId,rating,unwatched,group1_rec,group2_rec,group3_rec,group4_rec,group5_rec,group6_rec,group7_rec
0,1,1,14465,39625,6774,4424,23830,7737,"[613, 176, 734, 114, 270, 485, 352, 201, 571, ...","[5.0, 5.0, 5.0, 5.0, 4.5, 4.5, 4.5, 4.5, 4.5, ...","[22, 80, 14, 26, 310, 376, 99, 96, 75, 70, 74,...","[22, 80, 14, 26, 310, 376, 99, 96, 75, 70]","[80, 310, 376, 99, 96, 75, 70, 74, 98, 243]","[176, 407, 550, 251, 41, 95, 111, 406, 54, 69]","[26, 99, 96, 75, 74, 98, 243, 63, 267, 156]","[80, 26, 98, 243, 204, 166, 197, 33, 71, 86]","[22, 80, 310, 376, 75, 70, 74, 98, 243, 265]","[1, 50, 95, 94, 54, 61, 69, 361, 79, 77]"


In [8]:
results = evaluate_recommendations(recommends, GROUPS_LIST)
results

ClearML Monitor: Could not detect iteration reporting, falling back to iterations as seconds-from-start


Unnamed: 0,MAP,NDCG
group1,0.056851,0.277816
group2,0.049428,0.258088
group3,0.043397,0.240154
group4,0.038745,0.226101
group5,0.03979,0.228874
group6,0.036306,0.217078
group7,0.03388,0.208103


In [9]:
task.upload_artifact('metrics', results)

True

In [10]:
task.close()