In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# if latex not installed, set to False and remove tex symbols from plots
plt.rcParams["text.usetex"] = True

font = {"family": "normal", "weight": "bold", "size": 18}
import matplotlib

matplotlib.rc("font", **font)

from vgi import *
from commitments import *

# Create problem instance

In [None]:
# form problem instance
m = 6
problem = CommitmentsProblem.create_problem_instance(m, seed=0, processes=5)
V_lb = problem.V_lb()

# to create policies without compiling with cvxpygen, set compile=False
# create CE-MPC policy
mpc = problem.create_policy(lookahead=30, compile=True, name="commitments_mpc")

# create ADP policy
cocp = problem.create_policy(compile=True, name="commitments_policy", V=V_lb)

# Run VGI

In [None]:
# run VGI
vgi = VGI(
    problem,
    cocp,
    QuadGradReg(),
    trajectory_len=50,
    num_trajectories=1,
    damping=0.5,
)
vgi_policy = vgi(20, V0=V_lb, eval_freq=1, seed=0)

# MPC

In [None]:
mpc_cost = problem.cost(mpc, seed=1)
print("mpc cost: {:.2f}".format(mpc_cost))

# Plot VGI progress

In [None]:
vgi_steps = [50 * i for i in range(len(vgi.costs))]
plt.figure(figsize=(8, 5))
plt.step(vgi_steps, vgi.costs, label="VGI", linewidth=4)
plt.hlines(
    mpc_cost,
    0,
    vgi_steps[-1],
    label="CE-MPC",
    linestyles="dashed",
    color="orange",
    linewidth=4,
)
plt.legend()
plt.ylabel("Cost")
plt.xlabel("Policy evaluations")
plt.tight_layout()
plt.grid()
plt.savefig("commitments_vgi_ce.pdf", dpi=500)
plt.show()

print("vgi", vgi.costs[-1], "steps", vgi_steps[-1])
print(
    vgi.costs[0],
    vgi.costs[-1],
    (vgi.costs[-1] - vgi.costs[0]) / vgi.costs[0],
    (vgi.costs[-1] - mpc_cost) / mpc_cost,
)

# Plot trajectory

In [None]:
# vgi policy
vgi_cocp = cocp.clone()
vgi_cocp.update_value(vgi.iterates[-1])

# simulate vgi policy
T = 50
traj = problem.simulate(vgi_cocp, T, seed=0)
vgi_states = traj.states_matrix
vgi_controls = traj.controls_matrix

# plot trajectory for one asset
asset = 1
fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharex=True, dpi=500)

axs[0].plot(vgi_controls[:, asset], linewidth=4, color="purple")
axs[0].hlines(
    problem.u_ss[asset],
    0,
    T,
    color="black",
    linestyle="dashed",
    label="$u^{\\textrm{sso}}_1$",
    linewidth=4,
)
axs[0].set_ylabel("$(u_t)_1$")
axs[0].legend(loc="upper right")
axs[0].grid(True)

axs[1].plot(vgi_states[:, asset], linewidth=4)
axs[1].hlines(
    problem.target_nav[asset],
    0,
    T,
    color="orange",
    linestyle="dashed",
    label="$n^{\\textrm{tar}}_1$",
    linewidth=4,
)
axs[1].set_ylabel("$(n_t)_1$")
axs[1].legend(loc="upper right")
axs[1].grid(True)

axs[2].plot(vgi_states[:, asset + 6], linewidth=4, color="green")
axs[2].set_ylabel("$(l_t)_1$")
axs[2].grid(True)
axs[2].set_xlabel("Quarters $t$")
plt.savefig("commitments_trajectory.pdf", bbox_inches="tight")
plt.show()

# Fitted value iteration

In [None]:
fvi = FVI(
    problem,
    cocp,
    QuadReg(),
    trajectory_len=100,
    num_trajectories=2,
    damping=0.5,
)
fvi_policy = fvi(20, V0=V_lb, eval_freq=5, seed=1)

# COCP gradient

In [None]:
# run cocp gradient
trajectory_len = 100
num_trajectories = 2
num_iters = 200
learning_rate = 5e-5
cocp_grad = commitment_cocp_grad(
    problem,
    trajectory_len,
    num_iters,
    learning_rate,
    seed=2,
    V0=V_lb,
    num_trajectories=num_trajectories,
    policy=cocp,
    eval_freq=5,
    restart_simulations=False,
)

# Save data

In [None]:
import pickle

results = {
    "vgi": {"costs": vgi.costs, "iterates": vgi.iterates},
    "fvi": {"costs": fvi.costs, "iterates": fvi.iterates},
    "cocp-grad": cocp_grad,
}
pickle.dump(results, open("commitments_results.pkl", "wb"))