In [77]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [78]:
import sys
root_path = '../'
sys.path.insert(0, root_path)

import lightning as L
from natsort import natsorted

from run import *
from utils import *

Load checkpoints

In [None]:
%load_ext autoreload
%autoreload 2

In [51]:
run_name = "max_reward"
root_dir = os.path.join("../results", "DPT_3", run_name, "checkpoints")
checkpoint = natsorted(os.listdir(root_dir))[-1]
checkpoint = os.path.join(root_dir, checkpoint)

model = DPTSolver.load_from_checkpoint(checkpoint).cpu()
config = model.config
# config["problem_params"]["use_problems"] = 1000 #250000
datamodule = ProblemDataModule(config, path='..')
datamodule.setup()

In [52]:
model.config["online_steps"] = 51

In [53]:
def online_inference(model, save_path):
    if not os.path.exists(save_path):
        os.makedirs(save_path, exist_ok=True)

    tester = L.Trainer(logger=False, precision=config["precision"])
    test_dataloader = datamodule.test_dataloader()

    # check out two strategies of online inference:
    # - where a predicted action is the argmax of a predicted distribution 
    # - where a predicted action is sampled with temperature = 1 from a predicted distribution 
    hparams = [
        ("argmax", {"do_sample": False, "temperature": 0.0}),
        ("sampling", {"do_sample": True, "temperature": 1.0}),
    ]
    for label, hparam in hparams:
        model.config["do_sample"] = hparam["do_sample"]
        model.config["temperature"] = hparam["temperature"]
        tester.test(model=model, dataloaders=test_dataloader)
        results = model.save_results
        results = {
            "MAE(best x, x*)": results["best_x_mae"].cpu().tolist(),
            "MAE(best y, y*)": results["best_y_mae"].cpu().tolist(),
            "MAE(all x, x*)": results["all_x_mae"].cpu().tolist(),
            "MAE(all y, y*)": results["all_y_mae"].cpu().tolist(),
        }
        with open(f'{save_path}/{label}.json', 'w') as f:
            json.dump(results, f)

In [None]:
save_dir = f'../results/DPT_3/{run_name}/online_inference'
online_inference(model, save_dir)

In [43]:
from matplotlib import colormaps as cm

def plot(results, label, axes=None, c='red'):
    if axes is None:
        fig, axes = plt.subplots(1, 2, figsize=(10, 4))

    axes[0].set_title('MAE (x, x*)')
    axes[1].set_title('MAE (y, y*)')

    axes[0].plot(results["MAE(all x, x*)"], '-')
    axes[0].plot(results["MAE(best x, x*)"], '--')
    axes[1].plot(results["MAE(all y, y*)"], '-', label=f'{label} all')
    axes[1].plot(results["MAE(best y, y*)"], '--', label=f'{label} best')

    for ax in axes:
        ax.set_xlim(0, None)
        ax.set_xlabel('Step')

    axes[1].set_yscale('log', base=10)
    axes[1].legend(loc=1)

    # plt.tight_layout()
    # plt.show()
    return axes

max reward

In [None]:
read_dir = save_dir
axes = None
for file_name in os.listdir(read_dir):
    file_path = os.path.join(read_dir, file_name)
    with open(file_path) as f:
        results = json.load(f)
    label = file_name.split('.')[0]
    axes = plot(results, label, axes)

dpt reward

In [None]:
read_dir = save_dir
axes = None
for file_name in os.listdir(read_dir):
    file_path = os.path.join(read_dir, file_name)
    with open(file_path) as f:
        results = json.load(f)
    label = file_name.split('.')[0]
    axes = plot(results, label, axes)

my reward

In [None]:
read_dir = save_dir
axes = None
for file_name in os.listdir(read_dir):
    file_path = os.path.join(read_dir, file_name)
    with open(file_path) as f:
        results = json.load(f)
    label = file_name.split('.')[0]
    axes = plot(results, label, axes)

In [14]:
# %config InlineBackend.figure_format='retina'

In [36]:
train_offline_dataset = datamodule.train_dataloader().dataset
val_offline_dataset = datamodule.val_dataloader().dataset
val_online_dataset = datamodule.test_dataloader().dataset

In [None]:
sample, outputs, predictions, metrics = run(model, val_offline_dataset[0])
print_sample(sample, predictions)
print_metrics(metrics)

An example of online mode for a problem from the validation dataset.

In [None]:
# one may choose a strategy
model.config["do_sample"] = False
model.config["temperature"] = 0.0
# model.config["do_sample"] = False
# model.config["temperature"] = 1.0

sample, outputs, predictions, metrics = run(model, val_online_dataset[0], n_steps=50)
print_sample(sample, print_ta=True, print_fm=True)
print_metrics(metrics)

Other solvers

In [81]:
from solvers import *

In [None]:
problem = val_offline_dataset[0]["problem"]

In [None]:
solver = PROTES(problem=problem, budget=100, k_init=0, k_samples=50, k_top=5, seed=0)
logs = solver.optimize()
plt.plot(logs['m_list'], logs['y_list'], '-o')

In [None]:
solvers = [
    ("PROTES", partial(PROTES, budget=100, k_init=0, k_samples=50, k_top=5, seed=0)),
    ("OnePlusOne", partial(OnePlusOne, budget=100, k_init=0, k_samples=1, seed=0)),
    ("PSO", partial(PSO, budget=100, k_init=0, k_samples=1, seed=0)),
    ("NoisyBandit", partial(NoisyBandit, budget=100, k_init=0, k_samples=1, seed=0)),
    ("SPSA", partial(SPSA, budget=100, k_init=0, k_samples=1, seed=0)),
    ("Portfolio", partial(Portfolio, budget=100, k_init=0, k_samples=1, seed=0)),
]
for name, solver in solvers:
    logs = solver(problem=problem).optimize()
    plt.plot(logs['m_list'], logs['y_list'], label=name)
plt.legend()
plt.show()