In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams["text.usetex"] = True

font = {"family": "normal", "weight": "bold", "size": 18}
import matplotlib

matplotlib.rc("font", **font)

from vgi import *
from examples import *

# Create problem instance

In [None]:
# form problem instance
m = 6
problem = CommitmentsProblem.create_problem_instance(m, seed=0, processes=5)
V_lb = problem.V_lb()

# to create policies without compiling with cvxpygen, set compile=False
# create CE-MPC policy
mpc = problem.create_policy(lookahead=30, compile=True, name="commitments_mpc")

# create ADP policy
policy = problem.create_policy(compile=True, name="commitments_policy", V=V_lb)

# Run VGI

In [None]:
# run VGI
vgi = VGI(
    problem,
    policy,
    QuadGradReg(),
    trajectory_len=50,
    num_trajectories=1,
    damping=0.5,
)
vgi_policy = vgi(20, V0=V_lb, eval_freq=1)

# MPC

In [None]:
mpc_cost = problem.cost(mpc, seed=0)
print("mpc cost: {:.2f}".format(mpc_cost))

# Plot VGI progress

In [None]:
vgi_steps = [50 * i for i in range(len(vgi.costs))]
plt.figure(figsize=(8, 5))
plt.step(vgi_steps, vgi.costs, label="VGI", linewidth=4)
plt.hlines(
    mpc_cost,
    0,
    vgi_steps[-1],
    label="CE-MPC",
    linestyles="dashed",
    color="orange",
    linewidth=4,
)
plt.legend()
plt.ylabel("Cost")
plt.xlabel("Policy evaluations")
plt.tight_layout()
plt.grid()
plt.savefig("commitments_vgi_ce.pdf", dpi=500)
plt.show()

print("vgi", vgi.costs[-1], "steps", vgi_steps[-1])
print(
    vgi.costs[0],
    vgi.costs[-1],
    (vgi.costs[-1] - vgi.costs[0]) / vgi.costs[0],
    (vgi.costs[-1] - mpc_cost) / mpc_cost,
)

# FItted value iteration

In [None]:
fvi = FVI(
    problem,
    policy,
    QuadReg(),
    trajectory_len=200,
    num_trajectories=2,
    damping=0.5,
)
fvi_policy = fvi(25, V0=V_lb, eval_freq=20)

# COCP gradient

In [None]:
# run cocp gradient
trajectory_len = 100
num_trajectories = 3
num_iters = 100
learning_rate = 1e-4
cocp_grad = commitment_cocp_grad(
    problem,
    trajectory_len,
    num_iters,
    learning_rate,
    seed=2,
    V0=V_lb,
    num_trajectories=num_trajectories,
    policy=policy,
    eval_freq=10,
)