# Stability Verification for a Cart-Pole

In [None]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
import gpflow
import safe_learning
import matplotlib.pyplot as plt
import time
import pandas
import os

from scipy.linalg import block_diag
from utilities import CartPole, debug
from safe_learning.utilities import get_storage, set_storage
from matplotlib.font_manager import FontProperties
from matplotlib.colors import ListedColormap

%matplotlib inline
    
NP_DTYPE = safe_learning.config.np_dtype
TF_DTYPE = safe_learning.config.dtype

EPS = 1e-8

HEAT_MAP = plt.get_cmap('inferno', lut=None)
HEAT_MAP.set_over('white')
HEAT_MAP.set_under('black')

LEVEL_MAP = plt.get_cmap('viridis', lut=15)
LEVEL_MAP.set_over('gold')
LEVEL_MAP.set_under('white')

pandas.options.display.float_format = '{:,.4f}'.format
pandas.set_option('expand_frame_repr', False)
np.set_printoptions(precision=4)


# TODO testing ****************************************#

class Options(object):
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        self.__dict__.update(kwargs)

OPTIONS = Options(np_dtype              = safe_learning.config.np_dtype,
                  tf_dtype              = safe_learning.config.dtype,
                  fontproperties        = FontProperties(size=10),
                  dpi                   = 100,
                  eps                   = 1e-8,
                  use_linear_dynamics   = False,
                  saturate              = True,
                  use_zero_threshold    = False,
                  train_hyperparameters = True,
                  save_figs             = True,
                  fig_path              = 'figures/cartpole_stability/')

_STORAGE = {}

HEAT_MAP = plt.get_cmap('inferno', lut=None)
HEAT_MAP.set_over('white')
HEAT_MAP.set_under('black')

LEVEL_MAP = plt.get_cmap('viridis', lut=21)
LEVEL_MAP.set_over('gold')
LEVEL_MAP.set_under('white')

# BINARY_MAP = ListedColormap([(1., 1., 1., 0.), (0., 1., 0., 0.65)])

def binary_cmap(color='red', alpha=1.):
    if color=='red':
        color_code = (1., 0., 0., alpha)
    elif color=='green':
        color_code = (0., 1., 0., alpha)
    elif color=='blue':
        color_code = (0., 0., 1., alpha)
    else:
        color_code = color
    transparent_code = (1., 1., 1., 0.)
    return ListedColormap([transparent_code, color_code])

#******************************************************#


## TensorFlow Session

In [None]:
MAX_CPU_COUNT = os.cpu_count()
NUM_CORES = 8
NUM_SOCKETS = 2

os.environ["KMP_BLOCKTIME"]    = str(0)
os.environ["KMP_SETTINGS"]     = str(1)
os.environ["KMP_AFFINITY"]     = 'granularity=fine,noverbose,compact,1,0'
os.environ["OMP_NUM_THREADS"]  = str(NUM_CORES)

config = tf.ConfigProto(intra_op_parallelism_threads  = NUM_CORES,
                        inter_op_parallelism_threads  = NUM_SOCKETS,
                        allow_soft_placement          = False,
#                         log_device_placement          = True,
                        device_count                  = {'CPU': MAX_CPU_COUNT})

# TODO manually for CPU-only?
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession(config=config)

# print('Found MAX_CPU_COUNT =', MAX_CPU_COUNT)
# for dev in session.list_devices():
#     print(dev)

## Flags

In [None]:
# Saturate the action so that it lies in [-1, 1]
SATURATE = True

# Use the true physical parameters in the GP model
USE_TRUE_PARAMETERS = False

# Use the linearized discrete-time model as the true underlying dynamics
USE_LINEAR_DYNAMICS = False

# Use a threshold of zero when checking for stability
USE_ZERO_THRESHOLD = False

#
USE_LINEAR_KERNELS = False

#
USE_LIPSCHITZ_SCALING = True

# Scaling factor for GP confidence intervals
BETA = 2.

#
GP_SCALING = 1e3

#
NOISE_VAR = 0.001 ** 2

#
ADAPTIVE = True


## Dynamics

In [None]:
# Constants
dt = 0.01   # sampling time
g  = 9.81   # gravity

# True system parameters
m = 0.175    # pendulum mass
M = 1.732    # cart mass
L = 0.28     # pole length
b = 0.01     # rotational friction

# State and action normalizers
x_max     = 0.5
theta_max = np.deg2rad(30)
v_max     = 2.
omega_max = np.deg2rad(30)
u_max     = (m + M) * (v_max ** 2) / x_max

state_norm = (x_max, theta_max, v_max, omega_max)
action_norm = (u_max, )

# Constraints for initial 'safe' states
x_safe     = x_max
theta_safe = 0.3 * theta_max
v_safe     = 0.5 * v_max
omega_safe = 0.25 * omega_max
safe_norm  = np.array([[x_safe / x_max, theta_safe / theta_max, v_safe / v_max, omega_safe / omega_max]])

# Dimensions and domains
state_dim     = 4
action_dim    = 1
state_limits  = np.array([[-1., 1.]] * state_dim)
action_limits = np.array([[-1., 1.]] * action_dim)

# True system
true_cartpole = CartPole(m, M, L, b, dt, [state_norm, action_norm])
A_true, B_true = true_cartpole.linearize()

if USE_LINEAR_DYNAMICS:
    true_dynamics = safe_learning.functions.LinearSystem((A_true, B_true), name='true_dynamics')
else:
    true_dynamics = true_cartpole.__call__

# "Wrong" system
m = 0.2     # pendulum mass
M = 1.5     # cart mass
L = 0.25    # pole length
b = 0.0     # rotational friction
cartpole = CartPole(m, M, L, b, dt, [state_norm, action_norm])
A, B = cartpole.linearize()

if USE_TRUE_PARAMETERS:
    A = A_true
    B = B_true
mean_dynamics = safe_learning.LinearSystem((A, B), name='mean_dynamics')

## GP Model

In [None]:
m_true = np.hstack((A_true, B_true))
m = np.hstack((A, B))
variances = (m_true - m) ** 2

# Make sure at least some non-zero prior variance is maintained
np.clip(variances, 1e-3, None, out=variances)

# Measurement noise
noise_var = NOISE_VAR

# Input to GP is of the form (x,u)
full_dim = state_dim + action_dim

# Kernels
if USE_LINEAR_KERNELS:
    kernel_x = gpflow.kernels.Linear(full_dim, variance=variances[0, :], ARD=True)
    
    kernel_theta = gpflow.kernels.Linear(full_dim, variance=variances[1, :], ARD=True)

    kernel_v = gpflow.kernels.Linear(full_dim, variance=variances[2, :], ARD=True)

    kernel_omega = gpflow.kernels.Linear(full_dim, variance=variances[3, :], ARD=True)

else:
    kernel_x = (gpflow.kernels.Linear(full_dim, variance=variances[0, :], ARD=True)
                + (  gpflow.kernels.Linear(1, variance=variances[0, 1], active_dims=[1])                   # theta
                   + gpflow.kernels.Matern32(1, variance=variances[0, 3], lengthscales=1, active_dims=[3]) # omega
                   + gpflow.kernels.Linear(1, variance=variances[0, 4], active_dims=[4])                   # u
                 ) * gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[1]) 
    )

    kernel_theta = (gpflow.kernels.Linear(full_dim, variance=variances[1, :], ARD=True)
                    + (  gpflow.kernels.Linear(1, variance=variances[1, 1], active_dims=[1])                   # theta
                       + gpflow.kernels.Matern32(1, variance=variances[1, 3], lengthscales=1, active_dims=[3]) # omega
                       + gpflow.kernels.Linear(1, variance=variances[1, 4], active_dims=[4])                   # u
                     ) * gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[1])
    )

    kernel_v = (gpflow.kernels.Linear(full_dim, variance=variances[2, :], ARD=True) 
                + (  gpflow.kernels.Linear(1, variance=variances[2, 1], active_dims=[1])                   # theta
                   + gpflow.kernels.Matern32(1, variance=variances[2, 3], lengthscales=1, active_dims=[3]) # omega
                   + gpflow.kernels.Linear(1, variance=variances[2, 4], active_dims=[4])                   # u
                 ) * gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[1]) 
    )

    kernel_omega = (gpflow.kernels.Linear(full_dim, variance=variances[3, :], ARD=True)
                    + (  gpflow.kernels.Linear(1, variance=variances[3, 1], active_dims=[1])                   # theta
                       + gpflow.kernels.Matern32(1, variance=variances[3, 3], lengthscales=1, active_dims=[3]) # omega
                       + gpflow.kernels.Linear(1, variance=variances[3, 4], active_dims=[4])                   # u
                     ) * gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[1])
    )

# Mean dynamics
mean_function_x = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_x')
mean_function_theta = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_theta')
mean_function_v = safe_learning.LinearSystem((A[[2], :], B[[2], :]), name='mean_dynamics_v')
mean_function_omega = safe_learning.LinearSystem((A[[3], :], B[[3], :]), name='mean_dynamics_omega')

In [None]:
# Define a GP model over the dynamics
X_init = np.zeros((1, full_dim), dtype=NP_DTYPE)
Y_init = np.zeros((1, 1), dtype=NP_DTYPE)

gp_x = gpflow.gpr.GPR(X_init, Y_init, kernel_x, mean_function_x)
gp_theta = gpflow.gpr.GPR(X_init, Y_init, kernel_theta, mean_function_theta)
gp_v = gpflow.gpr.GPR(X_init, Y_init, kernel_v, mean_function_v)
gp_omega = gpflow.gpr.GPR(X_init, Y_init, kernel_omega, mean_function_omega)



# TODO Tensorflow spits out a lot of allocator errors when creating 0-length dataholders in gpflow. Occurs when:
#     - initializing with empty data matrices X and Y
#     - using GPRCached (initializes empty dataholders for Cholesky decomposition)

# X_init = np.empty((0, full_dim), dtype=NP_DTYPE)
# Y_init = np.empty((0, 1), dtype=NP_DTYPE)

# gp_x = safe_learning.GPRCached(X_init, Y_init, kernel_x, mean_function_x, scaling=GP_SCALING)
# gp_theta = safe_learning.GPRCached(X_init, Y_init, kernel_theta, mean_function_theta, scaling=GP_SCALING)
# gp_v = safe_learning.GPRCached(X_init, Y_init, kernel_v, mean_function_v, scaling=GP_SCALING)
# gp_omega = safe_learning.GPRCached(X_init, Y_init, kernel_omega, mean_function_omega, scaling=GP_SCALING)

In [None]:
gp_x.likelihood.variance = noise_var
gp_theta.likelihood.variance = noise_var
gp_v.likelihood.variance = noise_var
gp_omega.likelihood.variance = noise_var

gp_x_fun = safe_learning.GaussianProcess(gp_x, BETA)
gp_theta_fun = safe_learning.GaussianProcess(gp_theta, BETA)
gp_v_fun = safe_learning.GaussianProcess(gp_v, BETA)
gp_omega_fun = safe_learning.GaussianProcess(gp_omega, BETA)

# Stack GP functions => block-diagonal kernel matrix
dynamics = safe_learning.FunctionStack((gp_x_fun, gp_theta_fun, gp_v_fun, gp_omega_fun))

def optimize_gp_hyperparameters(lyapunov, X=None, Y=None, print_params=False):
    """Optimize GP hyperparameters via MLE on a given data set."""
    
    print('Optimizing ...')
    
    for i, gp in enumerate(lyapunov.dynamics.functions):
        if print_params:
            print('Original parameters:\n', gp.gaussian_process, '\n')

        X_save = gp.gaussian_process.X.value
        Y_save = gp.gaussian_process.Y.value
        
        if X is not None:
            gp.gaussian_process.X = X
        if Y is not None:
            gp.gaussian_process.Y = Y[:, i].reshape((-1, 1))
        gp.update_feed_dict()

        gp.gaussian_process.optimize()
       
        # Reset GP data matrices
        gp.gaussian_process.X = X_save
        gp.gaussian_process.Y = Y_save

        with tf.variable_scope(gp.scope_name):
            gp.gaussian_process.make_tf_array(gp.hyperparameters[0])
            gp.update_feed_dict()
        
        if print_params:
            print('New parameters:\n', gp.gaussian_process, '\n')
        
    print('Done!')

## State Discretization

In [None]:
# Number of states along each dimension
num_states = 51

# State grid
grid_limits = np.array([[-1., 1.], [-1., 1.], [-1., 1.], [-1., 1.]])
state_discretization = safe_learning.GridWorld(grid_limits, num_states)

# Discretization constant
if USE_ZERO_THRESHOLD:
    tau = 0.0
else:
    tau = np.sum(state_discretization.unit_maxes) / 2

print('Grid size: {}'.format(state_discretization.nindex))
print('Discretization constant: {}'.format(tau))

## Cost Function

In [None]:
# State cost matrix
Q = np.diag([0.1, 0.1, 0.1, 0.1]).astype(NP_DTYPE)

# Action cost matrix
R = 0.1 * np.identity(action_dim).astype(NP_DTYPE)

# Normalize cost matrices
# cost_norm = np.max([Q.max(), R.max()])
# Q = Q / cost_norm
# R = R / cost_norm

# Quadratic cost function
cost_function = safe_learning.QuadraticFunction(block_diag(Q, R), name='cost_function')

## Policy

In [None]:
# Fix policy to the LQR solution for the true system
K, P = safe_learning.utilities.dlqr(A_true, B_true, Q, R)
policy = safe_learning.LinearSystem(-K, name='policy')

if SATURATE:
    policy = safe_learning.Saturation(policy, -1, 1)


In [None]:
def plot_policy(lyapunov, tf_states, fixed_state, state_norm=None):
    fig, ax = plt.subplots(1, 2, figsize=(12, 5), dpi=300)
    fig.subplots_adjust(wspace=0.4, hspace=0.2)
    ticks = np.linspace(-1., 1., 9)
    cutoff = 1. - 1e-5
    
    fixed_state = np.asarray(fixed_state, dtype=NP_DTYPE)
    for i in range(4):
        dist = np.square(lyapunov.discretization.discrete_points[i] - fixed_state[i])
        idx = np.argmin(dist)
        fixed_state[i] = lyapunov.discretization.discrete_points[i][idx]
    x_fix, theta_fix, v_fix, omega_fix = fixed_state
    pos_set = np.logical_and(lyapunov.discretization.all_points[:, 1] == theta_fix, lyapunov.discretization.all_points[:, 3] == omega_fix)
    vel_set = np.logical_and(lyapunov.discretization.all_points[:, 0] == x_fix, lyapunov.discretization.all_points[:, 2] == v_fix)
    
    if state_norm is not None:
        x_max, theta_max, v_max, omega_max = state_norm
        scale = np.array([x_max, np.rad2deg(theta_max), v_max, np.rad2deg(omega_max)]).reshape((-1, 1))
        limits = scale * lyapunov.discretization.limits
        x_fix, theta_fix, v_fix, omega_fix = fixed_state * scale.ravel()
    else:
        limits = lyapunov.discretization.limits
    
    # Fix v and omega, plot policy over x and theta
    grid = lyapunov.discretization.all_points[pos_set, :]
    z = session.run(lyapunov.policy(tf_states), feed_dict={tf_states: grid}).reshape(lyapunov.discretization.num_points[[0, 2]])
    im = ax[0].imshow(z.T, origin='lower', extent=limits[(0, 2), :].ravel(), aspect=limits[0, 0] / limits[2, 0], cmap=HEAT_MAP, vmin=-cutoff, vmax=cutoff)
    cbar = fig.colorbar(im, ax=ax[0], label=r'$u = \pi(x)$', ticks=ticks)
    ax[0].set_title(r'$\theta = %.3g$ deg, $\omega = %.3g$ deg/s' % (theta_fix, omega_fix))
    ax[0].set_xlabel(r'$x$ [m]')
    ax[0].set_ylabel(r'$v$ [m/s]')
  
    # Fix x and theta, plot policy over v and omega
    grid = lyapunov.discretization.all_points[vel_set, :]
    z = session.run(lyapunov.policy(tf_states), feed_dict={tf_states: grid}).reshape(lyapunov.discretization.num_points[[1, 3]])
    im = ax[1].imshow(z.T, origin='lower', extent=limits[(1, 3), :].ravel(), aspect=limits[1, 0] / limits[3, 0], cmap=HEAT_MAP, vmin=-cutoff, vmax=cutoff)
    cbar = fig.colorbar(im, ax=ax[1], label=r'$u = \pi(x)$', ticks=ticks)
    ax[1].set_title(r'$x = %.3g$ m, $v = %.3g$ m/s' % (x_fix, v_fix))
    ax[1].set_xlabel(r'$\theta$ [deg]')
    ax[1].set_ylabel(r'$\omega$ [deg/s]')

    plt.show()


# Visualize policy
# fixed_state = [0., 0., 0., 0.]
# plot_policy(lyapunov, tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='temp_states'), fixed_state, state_norm)

## Lyapunov Function

In [None]:
# Define the Lyapunov function corresponding to the known policy
lyapunov_function = safe_learning.QuadraticFunction(P)
grad_lyapunov_function = safe_learning.LinearSystem((2*P,))

# Lipschitz constants
L_pol = lambda s: tf.constant(np.linalg.norm(-K, 1), dtype=TF_DTYPE)
L_dyn = lambda s: np.linalg.norm(A_true, 1) + np.linalg.norm(B_true, 1)*L_pol(s)

if USE_LIPSCHITZ_SCALING:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)
    
# Set initial safe set as a level set of the Lyapunov function
states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='temp_states')
values = lyapunov_function(states).eval({states: state_discretization.all_points})
cutoff = 5e-3 * np.max(values)
initial_safe_set = np.squeeze(values, axis=1) <= cutoff

# Set initial safe set as a hypercube in the state space
# norm_states = state_discretization.all_points / safe_norm
# initial_safe_set = np.all(np.logical_and(norm_states >= -1, norm_states <= 1), axis=1, keepdims=False)

# Initialize class
print('Initializing ...')
lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, 
                                  L_dyn, L_v, tau, policy, initial_safe_set, adaptive=ADAPTIVE)
print('Done!')

## TensorFlow Graph

In [None]:
storage = get_storage(_STORAGE)
if storage is None:
    # Current
    states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, lyapunov.discretization.ndim], name='states')
    actions = policy(states)
    values = lyapunov.lyapunov_function(states)
    
    # Predicted future
    future_states_mean, future_states_error = lyapunov.dynamics(states, actions)
    future_values_mean = lyapunov.lyapunov_function(future_states_mean)
    lv = lyapunov.lipschitz_lyapunov(future_states_mean)
    future_values_error = tf.reduce_sum(lv * future_states_error, axis=1, keepdims=True)
    dv_mean = future_values_mean - values
    dv_bound = dv_mean + future_values_error
    
    # True future
    future_states = true_dynamics(states, actions)
    future_values = lyapunov.lyapunov_function(future_states)
    dv = future_values - values
    
    # Discretization effects
    tau = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='discretization_constant')
    threshold = lyapunov.threshold(states, tau)
    negative = tf.less(dv_bound, threshold)
    
    # Place into storage
    storage = [('states', states), ('actions', actions), ('values', values), 
               ('future_states', future_states), ('future_values', future_values), ('dv', dv),
               ('tau', tau), ('threshold', threshold), ('negative', negative)]
    set_storage(_STORAGE, storage)
else:
    # Get from storage
    states, actions, values, future_states, future_values, dv, tau, threshold, negative  = storage.values()


## Visualize Lyapunov ROA and Discretization Effects

In [None]:
def find_nearest(array, value, sorted_1d=True):
    if not sorted_1d:
        array = np.sort(array)
    idx = np.searchsorted(array, value, side='left')
    if idx > 0 and (idx == len(array) or np.abs(value - array[idx - 1]) < np.abs(value - array[idx])):
        idx -= 1
    return idx, array[idx]


def gridify(norms, maxes=None, num_points=25):    
    norms = np.asarray(norms).ravel()
    if maxes is None:
        maxes = norms
    else:
        maxes = np.asarray(maxes).ravel()
    limits = np.column_stack((- maxes / norms, maxes / norms))
    
    if isinstance(num_points, int):
        num_points = [num_points, ] * len(norms)
    grid = safe_learning.GridWorld(limits, num_points)
    return grid


In [None]:
norms = np.asarray([x_max, np.rad2deg(theta_max), v_max, np.rad2deg(omega_max)])
maxes = np.copy(norms)
grid = lyapunov.discretization

# Snap fixed_state to the closest grid point
fixed_state = np.asarray([0., 0., 0., 0.], dtype=OPTIONS.np_dtype)
fixed_index = np.zeros_like(fixed_state, dtype=int)
for d in range(grid.ndim):
    fixed_index[d], fixed_state[d] = find_nearest(grid.discrete_points[d], fixed_state[d])

# Get 2d-planes of the discretization (x vs. v, theta vs. omega) according to fixed_state
planes = [[1, 3], [0, 2]]
grid_slices = []
for p in planes:
    grid_slices.append(np.logical_and(grid.all_points[:, p[0]] == fixed_state[p[0]], 
                                      grid.all_points[:, p[1]] == fixed_state[p[1]]).ravel())

# Adaptive discretization
refinements = []
for mask in grid_slices:
    feed_dict = {states: grid.all_points[mask], tau: [[np.sum(grid.unit_maxes) / 2]]}
    N = (threshold / dv).eval(feed_dict)
    N[np.isnan(N)] = -1
    N[N < 0] = -1
    N = np.ceil(N)
    refinements.append(N)

In [None]:
fontsize = 16
plt.rc('font', size=fontsize)

Nmax = 2000
cmap = plt.get_cmap('viridis', lut=Nmax)
cmap.set_over('gold')
cmap.set_under((1., 1., 1., 0.))
plot_limits = np.asarray(norms).reshape((-1, 1)) * grid.limits
pad = 10

# fig.subplots_adjust(wspace=0.4)

fig1, ax1 = plt.subplots(1, 1, figsize=(8, 8), dpi=OPTIONS.dpi)
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8), dpi=OPTIONS.dpi)

for i, (p, N, fig, ax) in enumerate(zip(planes[::-1], refinements, [fig1, fig2], [ax1, ax2])):
    z = N.reshape(grid.num_points[p])
    im = ax.imshow(z.T, origin='lower', extent=plot_limits[p].ravel(), aspect=plot_limits[p[0], 1] / plot_limits[p[1], 1], 
                   cmap=cmap, vmin=0, vmax=Nmax)
    cbar = fig.colorbar(im, ax=ax, label=r'$N({\bf x})$')
    grid_string = (r'$M = {}$'.format(grid.num_points[0] - 1) 
                   + ',  ' + r'$|\mathcal{X}_\tau|$ = ' + r'{:.1e}'.format((grid.num_points[0] - 1) ** grid.ndim) 
                   + ',  ' + r'$\tau$ = ' + r'{:.0e}'.format(np.sum(grid.unit_maxes) / 2))
    
    if i == 0:
        ax.set_title(r'$\phi = \dot{\phi} = 0$' + '\n' + grid_string, fontsize=fontsize)
        ax.set_xlabel(r'$x$ [m]', labelpad=pad)
        ax.set_ylabel(r'$\dot{x}$ [m/s]', labelpad=pad)
    else:
        ax.set_title(r'$x = \dot{x} = 0$' + '\n' + grid_string, fontsize=fontsize)
        ax.set_xlabel(r'$\phi$ [deg]', labelpad=pad)
        ax.set_ylabel(r'$\dot{\phi}$ [deg/s]', labelpad=pad)

    yticks = cbar.ax.get_yticks()
    tick_labels = ['{:.0f}'.format(y * Nmax) for y in yticks]
    tick_labels[-1] = r'$\geq {}$'.format(Nmax)
    cbar.ax.set_yticklabels(tick_labels)

plt.show()

if OPTIONS.save_figs:
    fig1.savefig(OPTIONS.fig_path + 'cartpole_stability_Nreq_x.pdf', bbox_inches='tight')
    fig2.savefig(OPTIONS.fig_path + 'cartpole_stability_Nreq_phi.pdf', bbox_inches='tight')

In [None]:
# Grid for computing c_max
N = 51
norms = np.asarray([x_max, np.rad2deg(theta_max), v_max, np.rad2deg(omega_max)])
maxes = np.copy(norms)
grid = gridify(norms, maxes, N)

vals, true_dv = session.run([values, dv], {states: grid.all_points})
D = (true_dv < 0).ravel()

# Set value order
value_order = np.argsort(vals.ravel())
safe        = D[value_order]
safe[0]     = True                        # set origin to be safe
max_index   = np.argmin(safe) - 1         # argmin returns index of first False, otherwise 0

c_max = vals[value_order[max_index]]
V_max = (vals <= c_max).ravel()

print(c_max)
print(D.sum() / grid.nindex)
print(V_max.sum())


# #------------------------------------------------------------------------------#
# # Grid for nice plotting
# N = 151
# grid = gridify(norms, maxes, N)

# # Snap fixed_state to the closest grid point
# fixed_state = np.asarray([0., 0., 0., 0.], dtype=OPTIONS.np_dtype)
# fixed_index = np.zeros_like(fixed_state, dtype=int)
# for d in range(grid.ndim):
#     fixed_index[d], fixed_state[d] = find_nearest(grid.discrete_points[d], fixed_state[d])

# # Get 2d-planes of the discretization (x vs. v, theta vs. omega) according to fixed_state
# planes = [[1, 3], [0, 2]]
# grid_slices = []
# for p in planes:
#     grid_slices.append(np.logical_and(grid.all_points[:, p[0]] == fixed_state[p[0]], 
#                                       grid.all_points[:, p[1]] == fixed_state[p[1]]).ravel())

# decrease_sets = [(dv.eval({states: grid.all_points[mask]}) < 0).ravel() for mask in grid_slices]
# safe_sets = [(values.eval({states: grid.all_points[mask]}) <= c_max).ravel() for mask in grid_slices]

In [None]:
# fontsize = 16
# plt.rc('font', size=fontsize)

# colors      = [(1., 1., 1., 0.), (0., 0., 0., 0.3), (1., 0., 0., 0.75)]
# cmap        = ListedColormap(colors)
# plot_limits = np.asarray(norms).reshape((-1, 1)) * grid.limits
# pad         = 10

# # fig.subplots_adjust(wspace=0.4)
# fig1, ax1 = plt.subplots(1, 1, figsize=(8, 8), dpi=OPTIONS.dpi)
# fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8), dpi=OPTIONS.dpi)

# for i, (p, dec, safe, fig, ax) in enumerate(zip(planes[::-1], decrease_sets, safe_sets, [fig1, fig2], [ax1, ax2])):
#     z = (dec.astype(int) + safe.astype(int)).reshape(grid.num_points[p])
#     z[75, 75] = 2
#     im = ax.imshow(z.T, origin='lower', extent=plot_limits[p].ravel(), aspect=plot_limits[p[0], 1] / plot_limits[p[1], 1], cmap=cmap)
    
#     if i == 0:
#         ax.set_title(r'$\phi = \dot{\phi} = 0$' + '\n', fontsize=fontsize)
#         ax.set_xlabel(r'$x$ [m]', labelpad=pad)
#         ax.set_ylabel(r'$\dot{x}$ [m/s]', labelpad=pad)
#     else:
#         ax.set_title(r'$x = \dot{x} = 0$' + '\n', fontsize=fontsize)
#         ax.set_xlabel(r'$\phi$ [deg]', labelpad=pad)
#         ax.set_ylabel(r'$\dot{\phi}$ [deg/s]', labelpad=pad)

#     proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in colors[1:]]
#     labels = [r'$\mathcal{D}$',
#               r'$\mathcal{V}\!\ (c_{max})$']
#     ax.legend(proxy, labels, loc='upper right')
        
# plt.show()
# if OPTIONS.save_figs:
#     fig1.savefig(OPTIONS.fig_path + 'cartpole_stability_Vmax_x.pdf', bbox_inches='tight')
#     fig2.savefig(OPTIONS.fig_path + 'cartpole_stability_Vmax_phi.pdf', bbox_inches='tight')

## True ROA

In [None]:
def compute_roa(grid, closed_loop_dynamics, horizon=250, tol=1e-3, equilibrium=None, no_traj=True):
    # Forward-simulate all trajectories from initial points in the discretization
    if no_traj:
        end_states = grid.all_points
        for t in range(1, horizon):
            end_states = closed_loop_dynamics(end_states)
    else:
        trajectories = np.empty((grid.nindex, grid.ndim, horizon))
        trajectories[:, :, 0] = grid.all_points
        for t in range(1, horizon):
            trajectories[:, :, t] = closed_loop_dynamics(trajectories[:, :, t - 1])
        end_states = trajectories[:, :, -1]
            
    if equilibrium is None:
        equilibrium = np.zeros((1, grid.ndim))
    
    # Compute an approximate ROA as all states that end up "close" to 0
    dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel()
    roa = (dists <= tol)
    if no_traj:
        return roa, dists
    else:
        return roa, dists, trajectories


grid = lyapunov.discretization
closed_loop_dynamics = lambda x: future_states.eval({states: x})
horizon = 500
tol     = 0.1

# roa, _ = compute_roa(grid, closed_loop_dynamics, horizon, tol, no_traj=True)
# print(roa.sum() / grid.nindex)

## Hyperparameter Training via MLE

In [None]:
if OPTIONS.train_hyperparameters:
    # Sample safe state-action pairs (x, u) and observations from the dynamics
    num_samples = int(1e3)
    safe_states = lyapunov.discretization.all_points[lyapunov.safe_set.ravel()]
    idx = np.random.choice(safe_states.shape[0], num_samples, replace=False)
    training_states = safe_states[idx, :]
    training_actions = lyapunov.policy(training_states).eval()
    X = np.concatenate((training_states, training_actions), axis=1)
    Y = true_dynamics(training_states, training_actions).eval()
    optimize_gp_hyperparameters(lyapunov, X, Y)


## Initial Safe Set Visualization

In [None]:
# Compare safe set before and after checking the decrease condition for the first time
c_max = lyapunov.feed_dict[lyapunov.c_max]
init_safe_set_size = np.sum(lyapunov.safe_set)

print('Before update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}\n'.format(init_safe_set_size))

# old_safe_set = np.copy(lyapunov.safe_set)
lyapunov.update_safe_set()

c_max = lyapunov.feed_dict[lyapunov.c_max]
init_safe_set_size = lyapunov.safe_set.sum()

print('After update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}'.format(init_safe_set_size))

# debug(lyapunov, true_dynamics, state_norm, Nmax=50, plot='cartpole')


## Online Learning and Exploration

In [None]:
# action_variation = np.array([-0.1, -0.01, -0.001, 0., 0.001, 0.01, 0.1], dtype=np_dtype).reshape((-1, 1))
action_variation = np.array([0.], dtype=NP_DTYPE).reshape((-1, 1))

with tf.name_scope('add_new_measurement'):
    full_dim = state_dim + action_dim 
    tf_max_state_action = tf.placeholder(TF_DTYPE, shape=[1, full_dim])
    tf_measurement = true_dynamics(tf_max_state_action)
    
def update_gp():
    """Update the GP model based on an actively selected data point."""
    
    # Get a new sample location
    max_state_action, _ = safe_learning.get_safe_sample(lyapunov, action_variation, action_limits, positive=True, num_samples=1000)
    
    # Obtain a measurement of the true dynamics
    lyapunov.feed_dict[tf_max_state_action] = max_state_action
    measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)
    
    # Add the measurement to our GP dynamics
    lyapunov.dynamics.add_data_point(max_state_action, measurement)

In [None]:
data_per_update = 0
safe_set_updates = 1

batch_size          = int(1e3)
can_shrink          = True
n_max               = 5000
safety_factor       = 1.
parallel_iterations = NUM_CORES

print('Discretization size: {}\n'.format(lyapunov.discretization.nindex))

try:
    e         = len(level) - 1
    level     = np.concatenate((level, np.zeros(safe_set_updates)))
    safe_size = np.concatenate((safe_size, np.zeros(safe_set_updates)))
    temp      = data_per_update * np.arange(1, safe_set_updates + 1) + data_size[-1]
    data_size = np.concatenate((data_size, temp))
except NameError:
    e            = 0
    level        = np.zeros(safe_set_updates + 1)
    level[0]     = lyapunov.feed_dict[lyapunov.c_max]
    safe_size    = np.zeros(safe_set_updates + 1)
    safe_size[0] = np.sum(lyapunov.safe_set)
    data_size    = data_per_update * np.arange(safe_set_updates + 1)
    
for i in range(safe_set_updates):
    print('Iteration {} with c_max: {}'.format(e + i + 1, lyapunov.feed_dict[lyapunov.c_max]))

    # TODO single-threaded bottleneck for large state spaces?
    if data_per_update > 0:
        start = time.time()
        for _ in range(data_per_update): 
            update_gp()
        end = time.time()
        duration_gp = end - start
        print('Duration (gp update, total): {}'.format(duration_gp))
        print('Duration (gp update, avg): {}'.format(duration_gp / data_per_update))

    start = time.time()
    lyapunov.update_safe_set(batch_size, can_shrink, n_max, safety_factor, parallel_iterations)
    end = time.time()
    duration_lyap = end - start
    print('Duration (safe set update): {}'.format(duration_lyap))
    
    level[e + i + 1] = lyapunov.feed_dict[lyapunov.c_max]
    safe_size[e + i + 1] = np.sum(lyapunov.safe_set)
    
    num_data = lyapunov.dynamics.functions[0].X.shape[0]
    num_safe = lyapunov.safe_set.sum()
    
    print("Data points collected: {}".format(num_data))
    print('Safe set size: {} ({:.2f}%)'.format(num_safe, 100 * num_safe / lyapunov.discretization.nindex))
    print('Growth: {}'.format(num_safe - init_safe_set_size))
    print("NEW C_MAX: {}".format(lyapunov.feed_dict[lyapunov.c_max]))
    print('')

In [None]:
fontsize = 16
plt.rc('font', size=fontsize)

N = np.copy(lyapunov._n)
num_states = len(N)
num_refined_states = np.sum(N[N > 1] ** state_dim)
print('Grid size:', num_states)
print('Safe set size:', num_safe)
print('Refined grids size:', num_refined_states)
print('Effective total grid size:', num_refined_states + len(N[N <= 1]))
print('Effective safe grid size:', num_refined_states + len(N[N == 1]))

# debug(lyapunov, true_dynamics, state_norm, Nmax=n_max, plot='cartpole', fixed_state=(0., 0., 0., 0.))

#
grid = lyapunov.discretization
vals, true_dv = session.run([values, dv], {states: grid.all_points})
D = (true_dv < 0).ravel()
V = (vals <= c_max).ravel()
Vn = (vals <= lyapunov.feed_dict[lyapunov.c_max]).ravel()
N[np.logical_and(Vn, N <= 0)] = 1
N[N == 0] = -1

# Snap fixed_state to the closest grid point
fixed_state = np.asarray([0., 0., 0., 0.], dtype=OPTIONS.np_dtype)
fixed_index = np.zeros_like(fixed_state, dtype=int)
for d in range(grid.ndim):
    fixed_index[d], fixed_state[d] = find_nearest(grid.discrete_points[d], fixed_state[d])

# Get 2d-planes of the discretization (x vs. v, theta vs. omega) according to fixed_state
planes = [[1, 3], [0, 2]]
grid_slices = []
for p in planes:
    grid_slices.append(np.logical_and(grid.all_points[:, p[0]] == fixed_state[p[0]], 
                                      grid.all_points[:, p[1]] == fixed_state[p[1]]).ravel())
    
    
fig1, ax1 = plt.subplots(1, 1, figsize=(8, 8), dpi=OPTIONS.dpi)
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 8), dpi=OPTIONS.dpi)

for i, (p, mask, fig, ax) in enumerate(zip(planes[::-1], grid_slices, [fig1, fig2], [ax1, ax2])):
    
    z = D[mask].reshape(grid.num_points[p])
    im = ax.imshow(z.T, origin='lower', extent=plot_limits[p].ravel(), aspect=plot_limits[p[0], 1] / plot_limits[p[1], 1], 
                   cmap=binary_cmap((0., 0., 0., 0.3)))
#     z = V[mask].reshape(grid.num_points[p])
#     im = ax.imshow(z.T, origin='lower', extent=plot_limits[p].ravel(), aspect=plot_limits[p[0], 1] / plot_limits[p[1], 1], 
#                    cmap=binary_cmap('blue', 0.3))
    
    z = N[mask].reshape(grid.num_points[p])
    cmap = plt.get_cmap('viridis', lut=n_max)
    cmap.set_over('gold')
    cmap.set_under((1., 1., 1., 0.))
    im = ax.imshow(z.T, origin='lower', extent=plot_limits[p].ravel(), aspect=plot_limits[p[0], 1] / plot_limits[p[1], 1], 
                   cmap=cmap, vmin=0, vmax=1000)
    cbar = fig.colorbar(im, ax=ax, label=r'$N({\bf x})$')
    grid_string = (r'$M = {}$'.format(grid.num_points[0] - 1) 
               + ',  ' + r'$|\mathcal{X}_\tau|$ = ' + r'{:.1e}'.format((grid.num_points[0] - 1) ** grid.ndim) 
               + ',  ' + r'$\tau$ = ' + r'{:.0e}'.format(np.sum(grid.unit_maxes) / 2))
    
    if i == 0:
        ax.set_title(r'$\phi = \dot{\phi} = 0$' + '\n' + grid_string, fontsize=fontsize)
        ax.set_xlabel(r'$x$ [m]', labelpad=pad)
        ax.set_ylabel(r'$\dot{x}$ [m/s]', labelpad=pad)
    else:
        ax.set_title(r'$x = \dot{x} = 0$' + '\n' + grid_string, fontsize=fontsize)
        ax.set_xlabel(r'$\phi$ [deg]', labelpad=pad)
        ax.set_ylabel(r'$\dot{\phi}$ [deg/s]', labelpad=pad)

    #
    initial_safe_set = lyapunov.initial_safe_set[mask].reshape(grid.num_points[p])
    cmap = ListedColormap([(1., 1., 1., 0.), (1., 0., 0., 1)])
    im = ax.imshow(initial_safe_set.T, origin='lower', extent=plot_limits[p].ravel(), aspect=plot_limits[p[0], 1] / plot_limits[p[1], 1], 
                   cmap=cmap, vmin=None, vmax=None)
    
    # Legend
    colors = [(0., 0., 0., 0.3), 'red']
    proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in colors]
    labels = [r'$\mathcal{D}$', r'$\mathcal{S}^{\!\ 0}_\pi$']
    ax.legend(proxy, labels, loc='upper right', fontsize=fontsize)
    
plt.show()

if OPTIONS.save_figs:
    fig1.savefig(OPTIONS.fig_path + 'cartpole_stability_safe_x.pdf', bbox_inches='tight')
    fig2.savefig(OPTIONS.fig_path + 'cartpole_stability_safe_phi.pdf', bbox_inches='tight')

## Training Results

In [None]:
plt.rc('font', size=8)
stop = len(data_size)
    
fig, ax = plt.subplots(1, 2, sharex=False, figsize=(10, 3), dpi=300)
fig.subplots_adjust(wspace=0.3, hspace=0.2)

ax[0].step(data_size[:stop], level[:stop], 'o--', where='post')
ax[0].set_xlabel(r'Number of data points collected', fontsize=12)
ax[0].set_ylabel(r'$c_{max}$', fontsize=12)

ax[1].step(data_size[:stop], safe_size[:stop], 'o--', where='post')
ax[1].set_xlabel(r'Number of data points collected', fontsize=12)
ax[1].set_ylabel(r'Safe set size', fontsize=12)

plt.show()

print(level)
print(data_size)
print(safe_size)

In [None]:
n    = np.array([0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1100, 1100])
cmax = np.array([1.7935, 1.797, 1.797, 1.8433, 3.9234, 4.0195, 4.0214, 4.0214, 4.1209, 4.1209, 4.1209, 4.1209, 4.1209, 4.1209])
safe = np.array([110491, 110847, 110847, 115307, 369831, 383585, 383865, 383865, 398003, 398003, 398003, 398003, 398003, 398003])
grid = 6765201
dv_  = 2619588
vmax = 430693

# Legend
colors = ['red', 'blue']
proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in colors]
labels = [r'$c = c_n$', r'$c = c_{max}$']

#
plt.rc('font', size=16)

fig, ax = plt.subplots(1, 1, figsize=(6, 5), dpi=100)
ax.step(n, cmax, 'ro', where='post', linestyle='--')
ax.set_xlabel(r'$n$')
ax.set_ylabel(r'$c_n$')
ax.set_ylim([1.5, 4.5])
fig.savefig(OPTIONS.fig_path + 'cartpole_cn.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=(6, 5), dpi=100)
ax.step(n, safe / dv_, 'ro', where='post', linestyle='--')
ax.plot(n, (vmax / dv_) * np.ones_like(n), 'b', linewidth=2)
ax.set_xlabel(r'$n$')
ax.set_ylabel(r'$|\mathcal{V}(c) \cap \mathcal{X}_\tau|\ / \ |\mathcal{D} \cap \mathcal{X}_\tau|$')
ax.set_ylim([0.04, 0.18])
ax.legend(proxy, labels, loc='lower right', fontsize=16)
fig.savefig(OPTIONS.fig_path + 'cartpole_safesize.pdf', bbox_inches='tight')

plt.show()

print(c_max)