In [None]:
import torch
import os

from src.rl.evaluators.evaluator_dqn import EvaluatorDQN
from src.rl.evaluators.evaluator_c51 import EvaluatorC51
from src.rl.evaluators.evaluator_qr import EvaluatorQR
from src.rl.evaluators.evaluator_iqn import EvaluatorIQN
from src.rl.evaluators.evaluator_fqf import EvaluatorFQF
from src.rl.evaluators.evaluator_ddpg import EvaluatorDDPG
from src.rl.evaluators.evaluator_td3 import EvaluatorTD3
from src.rl.evaluators.evaluator_reinforce import EvaluatorREINFORCE

In [None]:
%load_ext autoreload
%autoreload 2

# Random Baseline

In the following cell, we create random predictions to be used as a lower bound baseline. We get random baselines for the average return on the development set, as well as predictions on the test set to be evaluated via the MIND competition.

In [None]:
from csv import writer
from io import StringIO
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import src.constants as constants
from src.common_utils import read_pickled_data

In [None]:
# Change the values, if wanted
seeds = [7, 42]

In [None]:
eval_data = read_pickled_data([
    constants.DEV_PATH,
    "preprocessed",
    "behaviors.pkl"
])
model_dir = os.path.join(
    constants.MODELS_PATH,
    "Baseline"
)
gamma = 0.9

In [None]:
for seed in seeds:
    results = StringIO()
    csv_writer = writer(results)
    columns = ["checkpoint", "mean_return"]
    csv_writer.writerow(columns)

    pred_dir_name = "predictions" if seed is None else f"predictions_{seed}"
    pred_dir = os.path.join(
        model_dir,
        pred_dir_name
    )
    if not os.path.exists(pred_dir):
        os.makedirs(pred_dir)
    #! Set seed
    np.random.seed(seed)

    returns = []

    for row in tqdm(eval_data.itertuples(), total=len(eval_data)):
        shown_news = row.shown_news
        clicked_news = set(row.clicked_news)

        # Randomly order candidates
        np.random.shuffle(shown_news)
        
        # Compute return
        G = 0
        for t, news_id in enumerate(shown_news):
            reward = 0
            if news_id in clicked_news:
                reward = 1
            G += ((gamma**t) * reward)
        returns.append(G)

    # Compute average return
    mean_return = np.array(returns).mean()
    csv_writer.writerow(["Baseline", mean_return])

    print(f"[INFO] writing evaluation results file to {pred_dir}")
    results.seek(0)
    data_eval_results = pd.read_csv(results)
    data_eval_results.to_csv(
        os.path.join(pred_dir, "eval_results.txt"),
        sep='\t',
        index=False,
        header=True
    )

### Development Set

We compute the average return for multiple random baselines, as well as the average over all baselines.

# Evaluation

In [None]:
development = True
model_name = "DQN-nf-trainnorm-noeng-m"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

evaluator = EvaluatorDQN(development, model_name, device, seed=7)
evaluator.set_evaluatee()
evaluator.evaluate()