In [None]:
import path_ocpy


In [None]:
# from ocpy import OCP, DDPSolver, iLQRSolver
from ocpy import OCP
from ocpy import RiccatiRecursionSolver
from ocpy import symutils

import numpy as np
import sympy as sym
from sympy import sin, cos, tan, exp, log, ln, sinh, cosh, tanh, diff, sqrt
from IPython.display import display, Math


In [None]:
# Dimensions of state and input
n_x = 4
n_u = 1
n_g = 2

sim_name = 'cartpole'

# Define ocp class
ocp = OCP(sim_name, n_x, n_u, n_g)

# Get symbols
t = ocp.get_t()
x = ocp.get_x()
u = ocp.get_u()


In [None]:
# Symbolic expressions of constants.
m_c, m_p, l, g_c, u_min, u_max, u_eps = ocp.define_scalar_constants(
         [('m_c', 2), ('m_p', 0.2), ('l', 0.5), ('g_c', 9.80665), 
          ('u_min', -15),  ('u_max', 15), ('u_eps', 0.001)]
          )

# Cost weight
q = ocp.define_vector_constant('q', [2.5, 10, 0.01, 0.01])
r = ocp.define_vector_constant('r', [1])
q_f = ocp.define_vector_constant('q_{f}', [2.5, 10, 0.01, 0.01])
Q = symutils.diag(q)
Q_f = symutils.diag(q_f)
R = symutils.diag(r)

# Reference state. 
x_ref = ocp.define_vector_constant('x_{ref}', [0, np.pi, 0, 0])


In [None]:
# State-space equation
f = ocp.get_zero_vector(n_x)
f[0] = x[2]
f[1] = x[3]
f[2] = (u[0] + m_p*sin(x[1])*(l*x[3]*x[3] + g_c*cos(x[1])) )/( m_c+m_p*sin(x[1])*sin(x[1]) )
f[3] = (-u[0] * cos(x[1]) - m_p*l*x[3]*x[3]*cos(x[1])*sin(x[1]) - (m_c+m_p)*g_c*sin(x[1]) ) / ( l*(m_c + m_p*sin(x[1])*sin(x[1])))

# constraints. g(x, u, t) <= 0.
g = ocp.get_zero_vector(n_g)
g[0] = u_min - u[0]
g[1] = u[0] - u_max

# Stage cost and terminal cost.
l = 0.5 * (x - x_ref).T * Q * (x - x_ref) + 0.5 * u.T * R * u
lf = 0.5 * (x - x_ref).T * Q_f * (x - x_ref)

# Display state equation and cost function
display(Math(r"\dot{x} = f(x, u, t) \equiv %s" % sym.latex(f)))
display(Math(r"l(x, u) = %s" % sym.latex(l)))
display(Math(r"l_f(x) = %s" % sym.latex(lf)))
display(Math(r"g(x, u) = %s \leq 0" % sym.latex(g)))



In [None]:
# Horizon length and discretization grids.
T = 5.0
N = 200
# Initial condition
t0 = 0.0
x0 = np.array([0.0, 0.0, 0.0, 0.0])

# Define ocp
ocp.define(f, l, lf, g, t0=t0, x0=x0, T=T, N=N)


In [None]:
# Hand over ocp
solver = RiccatiRecursionSolver(ocp)

us_guess = np.zeros((N, n_u))
xs_guess = np.tile(x0, (N + 1, 1))
lmds_guess = np.zeros((N + 1, n_x))

solver.set_guess(xs_guess=xs_guess, us_guess=us_guess)


In [None]:
# set hyperparameters.
solver.set_line_search_param(alpha_min=1e-4, r_alpha=0.8)
solver.set_regularization_param(gamma_init=1e-3, r_gamma=5.0, gamma_min=0.0, gamma_max=1e6)
solver.set_kkt_tol(kkt_tol=1e-5)
solver.set_barrier_param(mu_init=1e-1, r_mu=0.1)
solver.set_max_iters(1000)

# Solve ocp
xs, us, ts, is_success = solver.solve(
    gamma_fixed=0.0, enable_line_search=False, update_mu=True,
    result=True, log=True, plot=True
)


In [None]:
%matplotlib inline
# Visualize
from ocpy.animator import CartPoleAnimator
animator = CartPoleAnimator(solver.get_log_directory(), sim_name)
animator.generate_animation(False)


In [None]:
import matplotlib.pyplot as plt
import math

result = solver.get_result()

cost_hist = result['cost_hist']
plt.plot(cost_hist)
plt.title('cost')
plt.show()

kkt_error_hist = result['kkt_error_hist']
kkt_error_mu_hist = result['kkt_error_mu_hist']
plt.yscale('log')
plt.plot(kkt_error_hist, label='kkt_error')
plt.plot(kkt_error_mu_hist, label='kkt_error_mu')
plt.legend()
plt.title('KKT error')
plt.show()

dyn_error_hist = result['dyn_error_hist']
dyn_error_hist = np.where(dyn_error_hist < 1e-20, np.nan, dyn_error_hist)
plt.yscale('log')
plt.plot(kkt_error_hist, label='kkt_error')
plt.plot(dyn_error_hist, label='dyn_error')
plt.legend()
plt.title('dynamics feasibility error')
plt.show()

gamma_hist = result['gamma_hist']
plt.plot(gamma_hist)
plt.title('gamma')
plt.show()

alpha_hist = result['alpha_hist']
plt.plot(alpha_hist)
plt.title('alpha')
plt.show()
print('average alpha:',sum(alpha_hist / (len(alpha_hist) - 1)))

mu_hist = result['mu_hist']
plt.yscale('log')
plt.plot(mu_hist)
plt.title('barrier parameter')
plt.show()

r_merit_hist = result['r_merit_hist']
plt.plot(r_merit_hist)
plt.title('penalty coefficient of merit function')
plt.show()
