# Stability Verification for a Cart-Pole System

In [None]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
import gpflow
from scipy.linalg import block_diag
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook

import safe_learning
from utilities import uniform_state_sampler, compute_closedloop_response, CartPole, get_max_parameter_change

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x: x
    
np_dtype = safe_learning.config.np_dtype
tf_dtype = safe_learning.config.dtype

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession()
initialized = False

## Dynamics

In [None]:
# Some constants
dt = 0.01   # sampling time
g = 9.81    # gravity

# True system parameters
m = 0.1     # pendulum mass
M = 5.      # cart mass
L = 0.5     # pole length

# State and action normalizers
x_max = 10
theta_max = np.deg2rad(10)
x_dot_max = 5
theta_dot_max = np.deg2rad(5)
u_max = 5*g*m*theta_max

state_norm = (x_max, theta_max, x_dot_max, theta_dot_max)
input_norm = (u_max,)

# Define true system and dynamics
true_cart_pole = CartPole(m, M, L, dt, [state_norm, input_norm])
A_true, B_true = true_cart_pole.linearize()
true_dynamics = safe_learning.LinearSystem((A_true, B_true), name='true_dynamics')

# "Wrong" system
m = 0.2     # pendulum mass
M = 4.5     # cart mass
L = 0.5     # pole length
cart_pole = CartPole(m, M, L, dt, [state_norm, input_norm])
A, B = cart_pole.linearize()
mean_dynamics = safe_learning.LinearSystem((A, B), name='mean_dynamics')

## Rewards

In [None]:
# State cost matrix
Q = np.diag([1., 1., 50., 1.])

# Action cost matrix
R = np.identity(cart_pole.input_dim)

# Quadratic reward (-cost) function
reward_function = safe_learning.QuadraticFunction(block_diag(-Q, -R), name='reward_function')

## Discretization

In [None]:
state_limits = [[-1., 1.]]*cart_pole.state_dim
num_states = [71,]*cart_pole.state_dim
state_discretization = safe_learning.GridWorld(state_limits, num_states)

# Discretization constant
tau = np.min(state_discretization.unit_maxes)

print('Grid size: {0}'.format(state_discretization.nindex))
print('Discretization constant: {0}'.format(tau))

## GP Model

In [None]:
m_true = np.hstack((A_true, B_true))
m = np.hstack((A, B))
variances = (m_true - m) ** 2
np.clip(variances, 1e-5, None, out=variances)

noise_var = 0.001 ** 2

# Kernels
full_dim = cart_pole.state_dim + cart_pole.input_dim

kernel_x = (gpflow.kernels.Linear(full_dim, variance=variances[0, :], ARD=True)
            + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])
            * gpflow.kernels.Linear(1, variance=variances[0, 0]))

kernel_theta = (gpflow.kernels.Linear(full_dim, variance=variances[1, :], ARD=True)
                + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[1])
                * gpflow.kernels.Linear(1, variance=variances[1, 1]))

kernel_x_dot = (gpflow.kernels.Linear(full_dim, variance=variances[2, :], ARD=True)
                + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])
                * gpflow.kernels.Linear(1, variance=variances[2, 2]))

kernel_theta_dot = (gpflow.kernels.Linear(full_dim, variance=variances[3, :], ARD=True)
                    + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[1])
                    * gpflow.kernels.Linear(1, variance=variances[3, 3]))

# Mean dynamics
mean_function_x = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_x')
mean_function_theta = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_theta')
mean_function_x_dot = safe_learning.LinearSystem((A[[2], :], B[[2], :]), name='mean_dynamics_x_dot')
mean_function_theta_dot = safe_learning.LinearSystem((A[[3], :], B[[3], :]), name='mean_dynamics_theta_dot')

# Define a GP model over the dynamics
gp_x = gpflow.gpr.GPR(np.empty((0, full_dim), dtype=np_dtype), np.empty((0, 1), dtype=np_dtype),
                      kernel_x, mean_function=mean_function_x)
gp_x.likelihood.variance = noise_var


gp_theta = gpflow.gpr.GPR(np.empty((0, full_dim), dtype=np_dtype), np.empty((0, 1), dtype=np_dtype),
                          kernel_theta, mean_function=mean_function_theta)
gp_theta.likelihood.variance = noise_var


gp_x_dot = gpflow.gpr.GPR(np.empty((0, full_dim), dtype=np_dtype), np.empty((0, 1), dtype=np_dtype),
                          kernel_x_dot, mean_function=mean_function_x_dot)
gp_x_dot.likelihood.variance = noise_var


gp_theta_dot = gpflow.gpr.GPR(np.empty((0, full_dim), dtype=np_dtype), np.empty((0, 1), dtype=np_dtype),
                              kernel_theta_dot, mean_function=mean_function_theta_dot)
gp_theta_dot.likelihood.variance = noise_var


gp_x_fun = safe_learning.GaussianProcess(gp_x)
gp_theta_fun = safe_learning.GaussianProcess(gp_theta)
gp_x_dot_fun = safe_learning.GaussianProcess(gp_x_dot)
gp_theta_dot_fun = safe_learning.GaussianProcess(gp_theta_dot)

dynamics = safe_learning.FunctionStack((gp_x_fun, gp_theta_fun, gp_x_dot_fun, gp_theta_dot_fun))

## Lyapunov Function

In [None]:
# Fix policy to the LQR solution for the "wrong" system
K, P = safe_learning.utilities.dlqr(A, B, Q, R)
policy = safe_learning.LinearSystem(-K, name='policy')
policy = safe_learning.Saturation(policy, -1, 1)

states = tf.placeholder(tf_dtype, shape=[None, cart_pole.state_dim], name='states')

# Define the Lyapunov function corresponding to the known policy
lyapunov_function = safe_learning.QuadraticFunction(P)

# Lipschitz constants
L_pol = np.linalg.norm(-K, 1)
L_dyn = np.linalg.norm(A_true, 1) + np.linalg.norm(B_true, 1)*L_pol

# Approximate Lipschitz constant for value function over [-1, 1]**d by sampling on the boundaries
# num_boundaries = int(2*cart_pole.state_dim)
# samples_per_boundary = int(1e3)
# test_set = np.random.uniform(-1., 1., (num_boundaries*samples_per_boundary, cart_pole.state_dim))
# for i in range(cart_pole.state_dim):
#     start_idx = (2*i)*samples_per_boundary
#     mid_idx = (2*i + 1)*samples_per_boundary
#     end_idx = (2*i + 2)*samples_per_boundary
#     test_set[start_idx:mid_idx, i] = -1.
#     test_set[mid_idx:end_idx, i] = 1.
# boundary_values = session.run(lyapunov_function(states), {states: test_set})
# L_V = 1.05*np.max(np.abs(boundary_values))

L_V = 1.05*np.max(np.abs(session.run(lyapunov_function(states), {states: state_discretization.all_points})))

print('Lyapunov function Lipschitz constant approx.: {0}'.format(L_V))

lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, L_dyn, L_V, tau, policy)

## Safety

In [None]:
def plot_safe_set(lyapunov, show=True):
    """Plot the safe set for a given Lyapunov function."""
    pos_set = np.logical_and(state_discretization.all_points[:, 2] == 0, state_discretization.all_points[:, 3] == 0)
    vel_set = np.logical_and(state_discretization.all_points[:, 0] == 0, state_discretization.all_points[:, 1] == 0)
    safe_pos = lyapunov.safe_set[pos_set].reshape(num_states[:2])
    safe_vel = lyapunov.safe_set[vel_set].reshape(num_states[2:])
    
    fig = plt.figure(figsize=(10, 5), dpi=100)
    fig.suptitle('Safe Set')
    fig.subplots_adjust(wspace=0.5, hspace=0.5)

    # Safe positions set, with zero velocities
    ax = fig.add_subplot(121)
    ax.set_title(r'$\dot{x} = \dot{\theta} = 0$')
    ax.set_xlabel(r'$x$')
    ax.set_ylabel(r'$\theta$')
    im = ax.imshow(safe_pos.T,
                   origin='lower',
                   extent=lyapunov.discretization.limits[:2, :].ravel(),
                   vmin=0,
                   vmax=1)
    fig.colorbar(im)
    
    # Safe velocities set, with zero positions
    ax = fig.add_subplot(122)
    ax.set_title(r'$x = \theta = 0$')
    ax.set_xlabel(r'$\dot{x}$')
    ax.set_ylabel(r'$\dot{\theta}$')
    im = ax.imshow(safe_vel.T,
                   origin='lower',
                   extent=lyapunov.discretization.limits[2:, :].ravel(),
                   vmin=0,
                   vmax=1)
    fig.colorbar(im)
    
#     if isinstance(lyapunov.dynamics, safe_learning.UncertainFunction):
#         X = lyapunov.dynamics.functions[0].X
#         plt.plot(X[:, 0], X[:, 1], 'rx')
    
    if show:
        plt.show()

# Set initial safe set (level set) based on Lyapunov function
values = session.run(lyapunov_function(states), {states: state_discretization.all_points})
cutoff = 0.005 * np.max(values)
lyapunov.initial_safe_set = np.squeeze(values, axis=1) <= cutoff
lyapunov.update_safe_set()

plot_safe_set(lyapunov)

## Online Learning