In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# if latex not installed, set to False and remove tex symbols from plots
plt.rcParams["text.usetex"] = True

font = {"family": "normal", "weight": "bold", "size": 18}
import matplotlib

matplotlib.rc("font", **font)

from vgi import *
from box_lqr import *

# Create problem instance

In [None]:
n, m = 12, 3
problem = BoxLQRProblem.create_problem_instance(n, m, seed=1, processes=5)
J_lb, V_lb = problem.J_lb()

# to create policies without compiling with cvxpygen, set compile=False
# create CE-MPC policy
mpc = problem.create_policy(lookahead=30, compile=True, name="box_lqr_mpc")

# create ADP policy
cocp = problem.create_policy(compile=True, name="box_lqr_policy", V=V_lb)

# Run VGI

In [None]:
vgi = VGI(
    problem,
    cocp,
    QuadGradReg(symmetric=True),
    trajectory_len=50,
    num_trajectories=1,
    damping=0.5,
)
vgi_policy = vgi(40, V0=QuadForm.eye(n), eval_freq=1, seed=0)

# MPC and upper and lower bounds

In [None]:
print("lower bound: {:.2f}".format(J_lb))

cocp.update_value(V_lb)
J_ub = problem.cost(cocp, seed=1)
print("upper bound: {:.2f}".format(J_ub))

mpc_cost = problem.cost(mpc, seed=1)
print("mpc cost: {:.2f}".format(mpc_cost))

# Plot VGI progress

In [None]:
vgi_steps = [50 * i for i in range(len(vgi.costs))]

plt.figure(figsize=(8, 5))
plt.hlines(
    J_ub,
    0,
    vgi_steps[-1],
    label="Upper bound",
    linestyles="dotted",
    color="green",
    linewidth=4,
)
plt.hlines(
    J_lb,
    0,
    vgi_steps[-1],
    label="Lower bound",
    linestyles="solid",
    color="purple",
    linewidth=4,
)
plt.hlines(
    mpc_cost,
    0,
    vgi_steps[-1],
    label="CE-MPC",
    linestyles="dashed",
    color="orange",
    linewidth=4,
)
plt.step(vgi_steps, vgi.costs, label="VGI", linewidth=4)
plt.grid()
plt.legend()
plt.ylabel("Cost")
plt.xlabel("Total policy evaluations")
plt.tight_layout()
plt.savefig("lqr_vgi_ce.pdf", dpi=500)
plt.show()

print("vgi", vgi.costs[-1], "steps", vgi_steps[-1])
print(vgi.costs[0], vgi.costs[-1], (vgi.costs[-1] - vgi.costs[0]) / vgi.costs[0])
print((vgi.costs[-1] - mpc_cost) / mpc_cost)

# Fitted value iteration

In [None]:
fvi = FVI(
    problem,
    cocp,
    QuadReg(symmetric=True),
    trajectory_len=200,
    num_trajectories=2,
    damping=0.5,
)
fvi_policy = fvi(50, V0=QuadForm.eye(n), eval_freq=5, seed=1)

# COCP gradient

In [None]:
trajectory_len = 100
num_trajectories = 3
num_iters = 100
learning_rate = 1e-2
cocp_grad = box_lqr_cocp_grad(
    problem,
    trajectory_len,
    num_iters,
    learning_rate,
    seed=2,
    V0=QuadForm.eye(n),
    num_trajectories=num_trajectories,
    policy=cocp,
    eval_freq=10,
)

# Save data

In [None]:
import pickle

results = {
    "vgi": {"costs": vgi.costs, "iterates": vgi.iterates},
    "fvi": {"costs": fvi.costs, "iterates": fvi.iterates},
    "cocp-grad": cocp_grad,
}
pickle.dump(results, open("box_lqr_results.pkl", "wb"))