In [1]:
# Imports

import argparse
import json
import logging
import os
import random
import sys
from pathlib import PurePath
from dataclasses import dataclass
from typing import Tuple, Iterable
from multiprocessing import Pool

import numpy as np
import pandas as pd
import torch
from torch import Tensor

from reagent.ope.estimators.estimator import Estimator, EstimatorResult, Evaluator
from reagent.ope.estimators.contextual_bandits_estimators import (
    Action,
    ActionDistribution,
    ActionRewards,
    BanditsEstimatorInput,
    BanditsModel,
    DMEstimator,
    DoublyRobustEstimator,
    IPSEstimator,
    LogSample,
    SwitchEstimator,
    SwitchDREstimator
)
from reagent.ope.estimators.types import ActionSpace, Policy, Trainer
from reagent.ope.trainers.linear_trainers import (
    LogisticRegressionTrainer,
    SGDClassifierTrainer,
    TrainingData,
    DecisionTreeTrainer,
    LinearTrainer,
    NNTrainer
)
from reagent.ope.test.multiclass_bandits import (
    MultiClassDataRow,
    UCIMultiClassDataset,
    MultiClassContext,
    MultiClassModel,
    MultiClassPolicy,
    evaluate_all
)
from reagent.ope.utils import RunningAverage, Clamper

import matplotlib
import matplotlib.pyplot as plt

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Configuration Settings

Edit the experiments list with the names of UCI datasets given in reagent/test/data to produce results for each dataset. 

In [2]:
# Configuration

DEFAULT_ITERATIONS = 500
TEST_ROOT_PATH = '..'
UCI_DATASET_CONFIGS = os.path.join(TEST_ROOT_PATH, 'configs')
MAX_METRIC_NAME_LENGTH = 20
experiments = ["ecoli", "letter_recog", "pendigits", "optdigits", "satimage"]
#experiments = ["ecoli"]

experiment_params = []
for exp in experiments:
    with open(os.path.join(UCI_DATASET_CONFIGS, exp + '_config.json'), "r") as f:
        params = json.load(f)
        if "dataset" in params:
            if "file" in params["dataset"]:
                params["dataset"]["file"] = os.path.join(TEST_ROOT_PATH, params["dataset"]["file"])
        experiment_params.append({"name": exp, "params": params})     

## Run an experiment

We load the given dataset, and create trainers (which will be used for generating the policies for the logger and target). To try different trainers, modify the `log_trainer` and `tgt_trainer` variables with different `LinearTrainer`s. 

Note that DM's performance is highly dependent on the reward model. To try different reward models, modify the trainer passed into `DMEstimator` and `DoublyRobustEstimator` with different `LinearTrainer`s. 

In [3]:
def evaluate_all_noisy(
    experiments: Iterable[Tuple[Iterable[Estimator], int]],
    dataset: UCIMultiClassDataset,
    log_trainer: Trainer,
    log_epsilon: float,
    tgt_trainer: Trainer,
    tgt_epsilon: float,
    max_num_workers: int,
    random_reward_prob: float = 0.0,
    device=None,
):
    action_space = ActionSpace(dataset.num_actions)
    config_path = PurePath(dataset.config_file)
    data_name = config_path.stem
    log_model_name = data_name + "_" + log_trainer.__class__.__name__ + ".pickle"
    log_model_file = str(config_path.with_name(log_model_name))
    tgt_model_name = data_name + "_" + tgt_trainer.__class__.__name__ + ".pickle"
    tgt_model_file = str(config_path.with_name(tgt_model_name))

    #log_trainer.load_model(log_model_file)
    #tgt_trainer.load_model(tgt_model_file)
    if not log_trainer.is_trained or not tgt_trainer.is_trained:
        (
            train_x,
            train_y,
            train_r,
            val_x,
            val_y,
            val_r,
            test_x,
            test_y,
            test_r,
            train_choices,
        ) = dataset.train_val_test_split((0.5, 0.8))
        trainer_data = TrainingData(train_x, train_y, None, val_x, val_y, None)
        #if not log_trainer.is_trained:
        #    log_trainer.train(trainer_data)
        #    log_trainer.save_model(log_model_file)
        if not tgt_trainer.is_trained:
            tgt_trainer.train(trainer_data)
            tgt_trainer.save_model(tgt_model_file)
            
            
    tgt_results = tgt_trainer.predict(dataset.features)
    assert tgt_results.probabilities is not None
    tgt_policy = MultiClassPolicy(action_space, tgt_results.probabilities, tgt_epsilon)
    
    #log_results = log_trainer.predict(dataset.features)
    #assert log_results.probabilities is not None
    uniform = torch.full(tgt_results.probabilities.shape, 1.0 / len(action_space))
    #log_policy = MultiClassPolicy(action_space, log_results.probabilities, log_epsilon)
    log_policy = MultiClassPolicy(action_space, uniform, log_epsilon)

    tasks = []
    test_queries = list(set(range(len(dataset))) - set(train_choices))
    for estimators, num_samples in experiments:
        samples = []
        for _ in range(num_samples):
            qid = random.sample(test_queries, 1)
            label = int(dataset.labels[qid].item())
            log_action, log_action_probabilities = log_policy(qid)
            log_reward = 1.0 if log_action.value == label else 0.0
            tgt_action, tgt_action_probabilities = tgt_policy(qid)
            ground_truth_reward = 1.0 if tgt_action.value == label else 0.0
            item_feature = dataset.features[qid]
            random_reward = random.random() < random_reward_prob
            samples.append(
                LogSample(
                    context=qid,
                    log_action=log_action,
                    log_reward=random.randint(0, 1) if random_reward else log_reward,
                    log_action_probabilities=log_action_probabilities,
                    tgt_action_probabilities=tgt_action_probabilities,
                    tgt_action=tgt_action,
                    ground_truth_reward=ground_truth_reward,
                    item_feature=item_feature,
                )
            )
        tasks.append((estimators, BanditsEstimatorInput(action_space, samples, False)))

    evaluator = Evaluator(tasks, max_num_workers)
    results = evaluator.evaluate()
    Evaluator.report_results(results)
    return results

In [4]:
def load_dataset(params):
        return UCIMultiClassDataset(params["dataset"])

In [5]:
# Experiment(s)
def run_experiment(dataset): 
    random.seed(1234)
    np.random.seed(1234)
    torch.random.manual_seed(1234)

    log_trainer = LogisticRegressionTrainer()
    log_epsilon = 0.1
    tgt_trainer = SGDClassifierTrainer()
    tgt_epsilon = 0.1
    experiments = [
        (
            (
                SwitchEstimator(LogisticRegressionTrainer(), rmax=1.0),
                SwitchDREstimator(LogisticRegressionTrainer(), rmax=1.0),
                DMEstimator(LogisticRegressionTrainer()),
                IPSEstimator(),
                DoublyRobustEstimator(LogisticRegressionTrainer()),
            ),
            1000,
        )
        for _ in range(100)
    ]
    results = evaluate_all_noisy(
        experiments, dataset, log_trainer, log_epsilon, tgt_trainer, tgt_epsilon, 0, 0.5
    )
    return results


## Result Generation

For each UCI dataset, we generate a logging and target policy, create a simulated dataset using the logging policy, and evaluate the target policy using DM, IPS, and DR. The bias, rmse, and variance against the ground truth is plotted for each dataset. 


For the settings with the logging policy trained with a `LogisticRegressionTrainer`, the target policy with a `SGDClassifierTrainer`, and the reward model for DM and DR trained with a `LogisticRegressionTrainer`, a sample result gives:


![alt text](img/bias.png "Bias")![alt text](img/variance.png "Bias")![alt text](img/rmse.png "Bias")

In [6]:
datasets = []
for params in experiment_params:
    datasets.append(load_dataset(params['params']))
labels = []

bias_result_mapping = {}
var_result_mapping = {}
rmse_result_mapping = {}

In [None]:
for dataset, params in zip(datasets, experiment_params):
    print("Running experiment " + params["name"])
    if params["name"] in labels:
        continue
    exp_results = run_experiment(dataset)
    labels.append(params["name"])

    for estimator_name, result in exp_results.items():
        _, _, _, tgt_gt, _, _ = result.report()
        result_var = torch.tensor(
            [res.estimated_reward for res in result.results],
            dtype=torch.double,
        ).var().item()
        if not estimator_name in bias_result_mapping:
            bias_result_mapping[estimator_name] = []
        if not estimator_name in var_result_mapping:
            var_result_mapping[estimator_name] = []
        if not estimator_name in rmse_result_mapping:
            rmse_result_mapping[estimator_name] = []

        bias_result_mapping[estimator_name].append(tgt_gt.bias.cpu().numpy())
        var_result_mapping[estimator_name].append(result_var)
        rmse_result_mapping[estimator_name].append(tgt_gt.rmse.cpu().numpy())



Running experiment ecoli
SwitchEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.3145700000000001 tgt_reward[0.5792702929675579] gt_reward[0.6479199999999998], diffs: tgt-gt[samples=100, rmse=0.08826843444395464, bias=-0.06864970703244203, variance=0.003109630549036968] tgt-log[samples=100, rmse=0.269050868616176, bias=0.2647002929675578, variance=0.0023455806121291606]
SwitchDREstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.3145700000000001 tgt_reward[0.5768058840930462] gt_reward[0.6479199999999998], diffs: tgt-gt[samples=100, rmse=0.08041803618813323, bias=-0.07111411590695375, variance=0.001424083902149753] tgt-log[samples=100, rmse=0.26409560671470594, bias=0.2622358840930461, variance=0.000988717757522366]
DMEstimator(trainer(logistic_regression,device(None)) rewards: log_reward0.3145700000000001 tgt_reward[0.4787884335635231] gt_reward[0.6479199999999998], diffs:

In [None]:
# Generate Bar Charts, a la https://arxiv.org/pdf/1511.03722.pdf
print(labels)
def create_and_show_chart(labels, results, title):
    # Width of each bar
    width = 0.1

    metrics = list(results.keys())
    
    # Set position of bar on X axis
    barpos = [np.arange(len(results[metrics[0]]))]
    for m in range(len(metrics)-1):
        barpos.append([x + width for x in barpos[-1]])
        
    fig, ax = plt.subplots()
    for metric, barpositions in zip(metrics, barpos):
        ax.bar(barpositions, results[metric], width, label=metric[:MAX_METRIC_NAME_LENGTH])

    ax.set_ylabel(title)
    plt.xticks([r + width for r in range(len(labels))], labels)
    ax.set_xticklabels(labels)
    ax.legend()

    fig.tight_layout()

    plt.show()

create_and_show_chart(labels, bias_result_mapping, 'Bias')
create_and_show_chart(labels, rmse_result_mapping, 'RMSE')
create_and_show_chart(labels, var_result_mapping, 'Variance')