In [None]:
from typing import List
import sys

import pandas as pd
import numpy as np
from tqdm.autonotebook import tqdm

from recsys24_daos.models import OpenPop
from recsys24_daos.datasets import to_microsoft
from recsys24_daos.model_selection import time_freq_split_current

In [None]:
SPLITS_FREQ = "W-THU"
SPLITS_NORMALIZE = True
LAST_SPLITS = 10

K_RECOMMENDATIONS: List[int] = [5, 10, 15]

## Obtain dataset

In [None]:
dfp = pd.read_csv("../data/decentraland/proposals.csv", parse_dates=['date', 'start', 'end'])
dfv = pd.read_csv("../data/decentraland/votes.csv", parse_dates=['date'])

df = to_microsoft(dfv)

print(dfp.info())
print(dfv.info())
print(df.info())

## Running openpop baseline

In [None]:
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k

In [None]:
from collections import defaultdict

folds = list(time_freq_split_current(df, SPLITS_FREQ, dfp, remove_not_in_train_col='userID', normalize=SPLITS_NORMALIZE))
metrics_f = { 'precision':precision_at_k, 'ndcg': ndcg_at_k, 'ndcg': ndcg_at_k, 'map': map_at_k, 'recall': recall_at_k, }
metrics = defaultdict(list)
perfectmetrics = defaultdict(list)

for i, (train, test) in enumerate(tqdm(folds)):
    t = train.timestamp.max()

    assert not train.empty, f"Train should not be empty on fold {i}"
    if test.empty:
        print(f"Warning, empty test fold {i}", file=sys.stderr)

    open_proposals = dfp[(dfp['start'] < t) & (t < dfp['end']) ]['id'].unique()

    train_filtered = train[train['itemID'].isin(open_proposals) ]
    test_filtered = test[test['itemID'].isin(open_proposals) ]
    metrics['t'].append(t)
    perfectmetrics['t'].append(t)
    for k_recs in K_RECOMMENDATIONS:
        model = OpenPop(train)
        recs = model.recommend_k_items(test_filtered['userID'].unique(), k_recs, recommend_from=open_proposals)
    
        for m, f in metrics_f.items():
            r = f(test_filtered, recs, k=k_recs)
            metrics[m+f'@{k_recs}'].append(r)
            
        recs = test_filtered.copy()
        recs['prediction'] = True
        for m, f in metrics_f.items():
            r = f(test_filtered, recs, k=k_recs)
            perfectmetrics[m+f'@{k_recs}'].append(r)


## Plotting some graphs

In [None]:
mdf = pd.DataFrame(metrics)
mdf['perfect precision@5'] = perfectmetrics['precision@5']
mdf['perfect precision@10'] = perfectmetrics['precision@10']
mdf[['precision@5', 'perfect precision@5', 'precision@10', 'perfect precision@10']].plot(title='Evaluación modelo baseline MP')
mdf.describe()