In [2]:
import torch
from torch.autograd import Variable
from mpc import mpc
from mpc.mpc import QuadCost, LinDx

In [None]:
torch.manual_seed(0)

n_batch, n_state, n_ctrl, T = 1, 1, 1, 20
n_sc = n_state + n_ctrl

# Randomly initialize a PSD quadratic cost and linear dynamics.
# 在毕设中，损失函数是由神经网络表示的 x^T net(x) x + 2 c^T x
C = torch.randn(T*n_batch, n_sc, n_sc) # shape (T*n_batch, n_sc, n_sc)
C = torch.bmm(C, C.transpose(1, 2)).view(T, n_batch, n_sc, n_sc) # shape (T, n_batch, n_sc, n_sc) # 二次项损失
c = torch.randn(T, n_batch, n_sc) # 一次项损失

alpha = 0.2
R = (torch.eye(n_state)+alpha*torch.randn(n_state, n_state)).repeat(T, n_batch, 1, 1) # shape (T, n_batch, n_state, n_state)
S = torch.randn(T, n_batch, n_state, n_ctrl) # shape (T, n_batch, n_state, n_ctrl)
F = torch.cat((R, S), dim=3) # shape (T, n_batch, n_state, n_sc) 系统方程， 在毕设中，使用的是生成模型

# The initial state.
x_init = torch.randn(n_batch, n_state)

# The upper and lower control bounds.
u_lower = -torch.rand(T, n_batch, n_ctrl)
u_upper = torch.rand(T, n_batch, n_ctrl)

x_lqr, u_lqr, objs_lqr = mpc.MPC(
    n_state=n_state,
    n_ctrl=n_ctrl,
    T=T,
    u_lower=u_lower, 
    u_upper=u_upper,
    lqr_iter=20,
    verbose=1,
    backprop=False,
    exit_unconverged=False, # 因为是LQR，所以基本上认为是收敛的
)(x_init, QuadCost(C, c), LinDx(F))

In [None]:
# 模拟LQR的结果
# only works for batch_size = 1
cost = 0
x = x_init
print(x_init)

n_sc_lqr = torch.cat((x_lqr, u_lqr), dim=2)

cost_list = []

for i in range(T):
    n_sc_i = n_sc_lqr[i].squeeze()
    C_i = C[i].squeeze()
    # cost
    cost += 0.5 * n_sc_i @ C_i @ n_sc_i + c[i] @ n_sc_i
    cost_list.append(cost.item())
    # dynamics
    x = F[i].squeeze() @ n_sc_i
    print(f'x{i}_next: {x}')

# plot the cost
import matplotlib.pyplot as plt
plt.plot(cost_list)

# plot the state trajectory
x_plot = x_lqr.squeeze(1).detach().numpy()
plt.figure()
plt.plot(x_plot[:, 0], x_plot[:, 1])