# Learning a Lyapunov Function for a Cart-Pole

In [None]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
import gpflow
import safe_learning
import matplotlib.pyplot as plt
import time
import pandas
import mosek
import cvxpy as cvx
import os

from scipy.linalg import block_diag
from utilities import CartPole, debug, LyapunovNetwork
from safe_learning.utilities import get_storage, set_storage
from tqdm import tqdm
from tensorflow.python.client import timeline
from matplotlib.colors import ListedColormap
from matplotlib.font_manager import FontProperties

%matplotlib inline

# TODO testing ****************************************#
class Options(object):
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        self.__dict__.update(kwargs)

OPTIONS = Options(np_dtype              = safe_learning.config.np_dtype,
                  tf_dtype              = safe_learning.config.dtype,
                  saturate              = True,
                  eps                   = 1e-8,
                  use_linear_dynamics   = False,
                  use_lipschitz_scaling = True,
                  use_zero_threshold    = True,
                  dpi                   = 200,
                  fontproperties        = FontProperties(size=5),
                  log_path              ='./tensorflow_logs/cartpole/')
#******************************************************#

_STORAGE = {}

HEAT_MAP = plt.get_cmap('inferno', lut=None)
HEAT_MAP.set_over('white')
HEAT_MAP.set_under('black')

LEVEL_MAP = plt.get_cmap('viridis', lut=21)
LEVEL_MAP.set_over('gold')
LEVEL_MAP.set_under('white')

BINARY_MAP = ListedColormap([(1., 1., 1., 0.), (0., 1., 0., 0.65)])

pandas.options.display.float_format = '{:,.4f}'.format
pandas.set_option('expand_frame_repr', False)
np.set_printoptions(precision=4)

plt.rc('font', size=5)

## TensorFlow Session

In [None]:
MAX_CPU_COUNT = os.cpu_count()
NUM_CORES = 8
NUM_SOCKETS = 2

os.environ["KMP_BLOCKTIME"]    = str(0)
os.environ["KMP_SETTINGS"]     = str(1)
os.environ["KMP_AFFINITY"]     = 'granularity=fine,noverbose,compact,1,0'
os.environ["OMP_NUM_THREADS"]  = str(NUM_CORES)

config = tf.ConfigProto(intra_op_parallelism_threads  = NUM_CORES,
                        inter_op_parallelism_threads  = NUM_SOCKETS,
                        allow_soft_placement          = False,
#                         log_device_placement          = True,
                        device_count                  = {'CPU': MAX_CPU_COUNT},
                       )

# TODO manually for CPU-only?
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession(config=config)

# print('Found MAX_CPU_COUNT =', MAX_CPU_COUNT)
# for dev in session.list_devices():
#     print(dev)

## Dynamics

In [None]:
# Constants
dt = 0.01   # sampling time
g = 9.81    # gravity

# True system parameters
m = 0.175    # pendulum mass
M = 1.732    # cart mass
L = 0.28     # pole length
b = 0.1      # rotational friction

# State and action normalizers
x_max     = 0.5
theta_max = np.deg2rad(30)
v_max     = 2. # 5.
omega_max = np.sqrt(g / L)
u_max     = (M + m) * g * np.tan(theta_max)

state_norm = (x_max, theta_max, v_max, omega_max)
action_norm = (u_max, )

# Dimensions and domains
state_dim = 4
action_dim = 1
state_limits = np.array([[-1., 1.]] * state_dim)
action_limits = np.array([[-1., 1.]] * action_dim)

# True system
cartpole = CartPole(m, M, L, b, dt, [state_norm, action_norm])
A, B = cartpole.linearize()

if OPTIONS.use_linear_dynamics:
    dynamics = safe_learning.functions.LinearSystem((A, B), name='dynamics')
else:
    dynamics = cartpole.__call__
    
print(state_norm)
print(action_norm)

## State Discretization

In [None]:
# Number of states along each dimension
num_states = [51, ] * state_dim

# State grid
grid_limits = np.array([[-1., 1.], [-1., 1.], [-1., 1.], [-1., 1.]])
state_discretization = safe_learning.GridWorld(grid_limits, num_states)

# Discretization constant
if OPTIONS.use_zero_threshold:
    tau = 0.0
else:
    tau = np.sum(state_discretization.unit_maxes) / 2

print('Grid size: {}'.format(state_discretization.nindex))
print('Discretization constant: {}'.format(tau))

## Cost Function

In [None]:
# State cost matrix
Q = np.diag([0.1, 0.1, 0.1, 0.1]).astype(OPTIONS.np_dtype)

# Action cost matrix
R = 0.1 * np.identity(action_dim).astype(OPTIONS.np_dtype)

# Normalize cost matrices
# cost_norm = np.max([Q.max(), R.max()])
# Q = Q / cost_norm
# R = R / cost_norm

# Quadratic cost function
cost_function = safe_learning.QuadraticFunction(block_diag(Q, R), name='cost_function')

## Policy

In [None]:
# Fix policy to the LQR solution for the true system
K, P = safe_learning.utilities.dlqr(A, B, Q, R)
policy = safe_learning.LinearSystem(- K, name='policy')
if OPTIONS.saturate:
    policy = safe_learning.Saturation(policy, -1, 1)


def find_nearest(array, value, sorted_1d=True):
    if not sorted_1d:
        array = np.sort(array)
    idx = np.searchsorted(array, value, side='left')
    if idx > 0 and (idx == len(array) or np.abs(value - array[idx - 1]) < np.abs(value - array[idx])):
        idx -= 1
    return idx, array[idx]
    
    
def plot_policy(policy, discretization, state_norm=None, fixed_state=[0., 0., 0., 0.]):
    # Snap fixed_state to the closest grid point
    fixed_state = np.asarray(fixed_state, dtype=OPTIONS.np_dtype)
    fixed_index = np.zeros_like(fixed_state, dtype=int)
    for d in range(discretization.ndim):
        fixed_index[d], fixed_state[d] = find_nearest(discretization.discrete_points[d], fixed_state[d])
        
    # Get 2d-planes of the discretization (x vs. v, theta vs. omega) according to fixed_state
    planes = [[0, 2], [1, 3]]
    disc_slices = [0, ] * len(planes)
    for i, p in enumerate(planes):
        disc_slices[i] = np.logical_and(discretization.all_points[:, p[0]] == fixed_state[p[0]], 
                                        discretization.all_points[:, p[1]] == fixed_state[p[1]])
            
    # Plot x vs. v, and theta vs. omega
    if state_norm is not None:
        x_max, theta_max, v_max, omega_max = state_norm
        scale = np.array([x_max, np.rad2deg(theta_max), v_max, np.rad2deg(omega_max)]).reshape((-1, 1))
        limits = scale * discretization.limits
    else:
        limits = discretization.limits
    
    plt.rc('font', size=5)
    fig, axes = plt.subplots(1, 2, figsize=(6, 3), dpi=OPTIONS.dpi)
    fig.subplots_adjust(wspace=0.4, hspace=0.2)
    ticks = np.linspace(-1., 1., 9)
    cutoff = 1. - 1e-10
    
    for i, p in enumerate(planes):
        z = policy(discretization.all_points[disc_slices[i]]).eval()
        z = z.reshape(discretization.num_points[p])
        im = axes[i].imshow(z.T, origin='lower', extent=limits[p, :].ravel(), aspect=limits[p[0], 0] / limits[p[1], 0], 
                            cmap=HEAT_MAP, vmin=-cutoff, vmax=cutoff)
        cbar = fig.colorbar(im, ax=axes[i], label=r'$u = \pi(x)$', ticks=ticks)
        if i == 0:
            axes[i].set_xlabel(r'$x$ [m]')
            axes[i].set_ylabel(r'$v$ [m/s]')  
        else:
            axes[i].set_xlabel(r'$\theta$ [deg]')
            axes[i].set_ylabel(r'$\omega$ [deg/s]')
 
    plt.show()

# Visualize policy
plot_policy(policy, state_discretization, state_norm)

## LQR Lyapunov Candidate

In [None]:
# Define the Lyapunov function corresponding to the known policy
lyapunov_function = safe_learning.QuadraticFunction(P)
grad_lyapunov_function = safe_learning.LinearSystem((2 * P,))

# initial_safe_set = np.all(state_discretization.all_points == 0.0, axis=1)
values = session.run(lyapunov_function(state_discretization.all_points))
cutoff = 0.01 * np.max(values)
initial_safe_set = np.squeeze(values, axis=1) <= cutoff

# Scaling
lyapunov_function = safe_learning.QuadraticFunction(P / np.max(values))
grad_lyapunov_function = safe_learning.LinearSystem((2 * P / np.max(values),))

# Lipschitz constants
L_pol = lambda s: tf.constant(np.linalg.norm(-K, 1), dtype=OPTIONS.tf_dtype)
L_dyn = lambda s: np.linalg.norm(A, 1) + np.linalg.norm(B, 1)*L_pol(s)

if OPTIONS.use_lipschitz_scaling:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)

# Initialize class
lyapunov_lqr = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)

In [None]:
c = lyapunov_lqr.feed_dict[lyapunov_lqr.c_max]
num_grid = lyapunov_lqr.discretization.nindex
num_safe = lyapunov_lqr.safe_set.sum()

print('Before update ...')
print('c_max: {}'.format(c))
print('grid size: {}'.format(num_grid))
print('safe set size: {} ({:.2f}%)'.format(num_safe, 100 * num_safe / num_grid))
debug(lyapunov_lqr, dynamics, state_norm, plot='cartpole')

lyapunov_lqr.update_values()
lyapunov_lqr.update_safe_set()
c = lyapunov_lqr.feed_dict[lyapunov_lqr.c_max]
num_safe = lyapunov_lqr.safe_set.sum()

print('After update ...')
print('c_max: {}'.format(c))
print('grid size: {}'.format(num_grid))
print('safe set size: {} ({:.2f}%)'.format(num_safe, 100 * num_safe / num_grid))
debug(lyapunov_lqr, dynamics, state_norm, plot='cartpole')

## Neural Network Lyapunov Candidate

In [None]:
layer_dims = [64, 64]
activations = [tf.tanh, tf.tanh]
lyapunov_function = LyapunovNetwork(state_dim, layer_dims, activations, OPTIONS.eps)

# TODO outputs tensor of shape (1, ?, 2)
# grad_lyapunov_function = lambda s: tf.gradients(lyapunov_function(s), s)
# if USE_LIPSCHITZ_SCALING:
#     L_v = lambda s: tf.abs(grad_lyapunov_function(s))
# else:
#     L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keepdims=True)

L_v = 1.

# TODO need to use template before variables exist in the graph
tf_states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')
temp = lyapunov_function(tf_states)
session.run(tf.variables_initializer(lyapunov_function.parameters))

lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)

## TensorFlow Graph

In [None]:
storage = get_storage(_STORAGE)
if storage is None:
    tf_states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')
    tf_actions = policy(tf_states)
    tf_future_states = dynamics(tf_states, tf_actions)
    
    tf_values_lqr = lyapunov_lqr.lyapunov_function(tf_states)
    tf_future_values_lqr = lyapunov_lqr.lyapunov_function(tf_future_states)
    tf_dv_lqr = tf_future_values_lqr - tf_values_lqr

    tf_values = lyapunov.lyapunov_function(tf_states)
    tf_future_values = lyapunov.lyapunov_function(tf_future_states)
    tf_dv = tf_future_values - tf_values
    
    tf_threshold = lyapunov.threshold(tf_states, lyapunov.tau)
    tf_negative = tf.squeeze(tf.less(tf_dv, tf_threshold), axis=1)
    
    storage = [('states', tf_states), 
               ('future_states', tf_future_states), 
               ('values_lqr', tf_values_lqr), 
               ('values', tf_values), 
               ('future_values_lqr', tf_future_values_lqr), 
               ('future_values', tf_future_values),
               ('dv_lqr', tf_dv_lqr), 
               ('dv', tf_dv),
               ('threshold', tf_threshold), 
               ('negative', tf_negative)]
    set_storage(_STORAGE, storage)
else:
    (tf_states, tf_future_states, tf_values_lqr, tf_values, tf_future_values_lqr, tf_future_values, 
     tf_dv_lqr, tf_dv, tf_threshold, tf_negative)  = storage.values()

## True Region of Attraction

In [None]:
def gridify(norms, maxes=None, num_points=25):    
    norms = np.asarray(norms).ravel()
    if maxes is None:
        maxes = norms
    else:
        maxes = np.asarray(maxes).ravel()
    limits = np.column_stack((- maxes / norms, maxes / norms))
    
    if isinstance(num_points, int):
        num_points = [num_points, ] * len(norms)
    grid = safe_learning.GridWorld(limits, num_points)
    return grid


def compute_roa(grid, closed_loop_dynamics, horizon=250, tol=1e-3, equilibrium=None, no_traj=True):
    if isinstance(grid, np.ndarray):
        all_points = grid
        nindex = grid.shape[0]
        ndim = grid.shape[1]
    else:
        all_points = grid.all_points
        nindex = grid.nindex
        ndim = grid.ndim
    
    # Forward-simulate all trajectories from initial points in the discretization
    if no_traj:
        end_states = all_points
        for t in range(1, horizon):
            end_states = closed_loop_dynamics(end_states)
    else:
        trajectories = np.empty((nindex, ndim, horizon))
        trajectories[:, :, 0] = all_points
        for t in range(1, horizon):
            trajectories[:, :, t] = closed_loop_dynamics(trajectories[:, :, t - 1])
        end_states = trajectories[:, :, -1]
            
    if equilibrium is None:
        equilibrium = np.zeros((1, ndim))
    
    # Compute an approximate ROA as all states that end up "close" to 0
    dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel()
    roa = (dists <= tol)
    if no_traj:
        return roa, dists
    else:
        return roa, dists, trajectories


norms = np.array([x_max, np.rad2deg(theta_max), v_max, np.rad2deg(omega_max)])
maxes = np.copy(norms)
plot_limits = np.column_stack((- maxes, maxes))
# N = 21
# grid = gridify(norms, maxes, N)
grid = lyapunov.discretization

closed_loop_dynamics = lambda x: tf_future_states.eval({tf_states: x})
horizon = 1000
tol = 0.001
roa, dists, trajectories = compute_roa(grid, closed_loop_dynamics, horizon, tol, no_traj=True)


In [None]:
def plot_roa(roa, grid, plot_limits, fixed_state=[0., 0., 0., 0.]):
    # Snap fixed_state to the closest grid point
    fixed_state = np.asarray(fixed_state, dtype=OPTIONS.np_dtype)
    fixed_index = np.zeros_like(fixed_state, dtype=int)
    for d in range(grid.ndim):
        fixed_index[d], fixed_state[d] = find_nearest(grid.discrete_points[d], fixed_state[d])

    plt.rc('font', size=5)
    fig, axes = plt.subplots(1, 2, figsize=(6, 3), dpi=OPTIONS.dpi)
    fig.subplots_adjust(wspace=0.4, hspace=0.2)

    ax = axes[0]
    z = roa[:, fixed_index[1], :, fixed_index[3]]
    im = ax.imshow(z.T, origin='lower', extent=plot_limits[[0, 2], :].ravel(), aspect=plot_limits[0, 1] / plot_limits[2, 1], cmap=BINARY_MAP, vmin=0)
    ax.set_xlabel(r'$x$ [m]')
    ax.set_ylabel(r'$v$ [m/s]')

    ax = axes[1]
    z = roa[fixed_index[0], :, fixed_index[2], :]
    im = ax.imshow(z.T, origin='lower', extent=plot_limits[[1, 3], :].ravel(), aspect=plot_limits[1, 1] / plot_limits[3, 1], cmap=BINARY_MAP, vmin=0)
    ax.set_xlabel(r'$\theta$ [deg]')
    ax.set_ylabel(r'$\omega$ [deg/s]')
    
    return fig, axes

print('ROA fraction: {}'.format(roa.sum() / grid.nindex))
fig, axes = plot_roa(roa, grid, plot_limits)


### Neural Network: Supervised Training with LQR Solution

In [None]:
with tf.name_scope('supervised_lyapunov_learning'):
    tf_costs = tf.abs(tf_values_lqr - tf_values)
    tf_inverse_weights = tf_values_lqr + OPTIONS.eps
#     tf_inverse_weights = 1
    tf_objective = tf.reduce_mean(tf_costs / tf_inverse_weights, name='objective')
    
    tf_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
    optimizer = tf.train.GradientDescentOptimizer(tf_learning_rate)
    lyapunov_update = optimizer.minimize(tf_objective, var_list=lyapunov.lyapunov_function.parameters)

session.run(tf.variables_initializer(lyapunov_function.parameters))
# lyapunov.update_values()
# lyapunov.update_safe_set()
# debug(lyapunov, dynamics, state_norm, plot='cartpole')

obj = []
level_states = lyapunov_lqr.discretization.all_points[lyapunov.initial_safe_set]
# level_states = lyapunov_lqr.discretization.all_points

In [None]:
# Training batch from level set
tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
tf_batch = tf.random_uniform([tf_batch_size, ], 0, level_states.shape[0], dtype=tf.int32, name='batch_sample')

# Test set
test_size = int(1e4)
idx = tf_batch.eval({tf_batch_size: test_size})
test_set = level_states[idx, :]

# Uniformly-distributed test set
# test_size = int(1e5)
# grid_length = np.power(test_size, 1 / state_dim)        # test_size = N^d, solve for N
# grid_length = int(2 * np.floor(grid_length / 2) + 1)    # round N to the nearest odd integer to include 0 in grid
# state_limits = np.array([[-1., 1.]] * state_dim)        # states are normalized to [-1, 1]^d
# num_points = [grid_length, ] * state_dim
# test_set = safe_learning.GridWorld(state_limits, num_points).all_points
    
feed_dict = {
    tf_states:         level_states,
    tf_learning_rate:  1e-3,
    tf_batch_size:     int(1e2),
}
max_iters = 250

for i in tqdm(range(max_iters)):
    idx = tf_batch.eval(feed_dict)
    feed_dict[tf_states] = level_states[idx, :]
    session.run(lyapunov_update, feed_dict)

    feed_dict[tf_states] = test_set
    obj.append(tf_objective.eval(feed_dict).ravel())

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 2), dpi=300)
ax.set_xlabel(r'iteration')
ax.set_ylabel(r'objective')
ax.plot(obj, '.-r')

plt.show()

In [None]:
def plot_value_functions(grid, maxes, fixed_state=[0., 0., 0., 0.]):
    outputs = session.run([tf_values, tf_values_lqr, tf_dv, tf_dv_lqr], {tf_states: grid.all_points})
    values, values_lqr, dv, dv_lqr = [out.reshape(grid.num_points) for out in outputs]

    # Snap fixed_state to the closest grid point
    fixed_state = np.asarray(fixed_state, dtype=OPTIONS.np_dtype)
    fixed_index = np.zeros_like(fixed_state, dtype=int)
    for d in range(grid.ndim):
        fixed_index[d], fixed_state[d] = find_nearest(grid.discrete_points[d], fixed_state[d])

    plt.rc('font', size=6)    
    fig, axes = plt.subplots(2, 4, figsize=(12, 6), dpi=OPTIONS.dpi)
    fig.subplots_adjust(wspace=0.6, hspace=0.1)
    for ax in axes.ravel():
        ax.set_xlabel(r'$\theta$ [deg]')
        ax.set_ylabel(r'$\omega$ [deg/s]')

    limits = np.column_stack((- maxes, maxes))
        
    for i, (v, dv) in enumerate(zip((values, values_lqr), (dv, dv_lqr))):
        ax = axes[i, 0]
        z = v[:, fixed_index[1], :, fixed_index[3]]
        im = ax.imshow(z.T, origin='lower', extent=limits[[0, 2], :].ravel(), aspect=limits[0, 0] / limits[2, 0], cmap=LEVEL_MAP, vmin=0)
        ax.set_xlabel(r'$x$ [m]')
        ax.set_ylabel(r'$v$ [m/s]')
        cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')
        
        ax = axes[i, 1]
        z = v[fixed_index[0], :, fixed_index[2], :]
        im = ax.imshow(z.T, origin='lower', extent=limits[[1, 3], :].ravel(), aspect=limits[1, 0] / limits[3, 0], cmap=LEVEL_MAP, vmin=0)
        ax.set_xlabel(r'$\theta$ [deg]')
        ax.set_ylabel(r'$\omega$ [deg/s]')
        cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')
        
        ax = axes[i, 2]
        z = dv[:, fixed_index[1], :, fixed_index[3]]
        im = ax.imshow(z.T, origin='lower', extent=limits[[0, 2], :].ravel(), aspect=limits[0, 0] / limits[2, 0], cmap=HEAT_MAP, vmax=0)
        ax.set_xlabel(r'$x$ [m]')
        ax.set_ylabel(r'$v$ [m/s]')
        cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')
        
        ax = axes[i, 3]
        z = dv[fixed_index[0], :, fixed_index[2], :]
        im = ax.imshow(z.T, origin='lower', extent=limits[[1, 3], :].ravel(), aspect=limits[1, 0] / limits[3, 0], cmap=HEAT_MAP, vmax=0)
        ax.set_xlabel(r'$\theta$ [deg]')
        ax.set_ylabel(r'$\omega$ [deg/s]')
        cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')
        
        for j in range(4):
            if i == 0:
                axes[i, j].set_title('NN')
            else:
                axes[i, j].set_title('LQR')

    plt.show()

plot_value_functions(grid, maxes)

In [None]:
lyapunov.update_values()
lyapunov.update_safe_set()

print('c_max: {}'.format(lyapunov.feed_dict[lyapunov.c_max]))
print('safe set size: {}'.format(lyapunov.safe_set.sum()))
debug(lyapunov, dynamics, state_norm, plot='cartpole')

### Neural Network: Training

In [None]:
# Save checkpoint for neural net weights
saver = tf.train.Saver(var_list=lyapunov.lyapunov_function.parameters)
ckpt_path = saver.save(session, "/tmp/spencerr_cartpole_lyapunov.ckpt")

In [None]:
with tf.name_scope('roa_classification'):
    # Current maximum level set we want to push the ROA in to
    tf_level_multiplier = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='level_multiplier')
    tf_c_max = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='c_max')
    
    # True class labels, converted from Boolean ROA labels {0, 1} to {-1, 1}
    tf_weights = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='class_weights')
    tf_roa = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='labels')
    tf_labels = 2 * tf_roa - 1

    # Construct classifier with output (-1, 1)
#     tf_classifier_output = tf.tanh(100 * (tf_c_max - tf_values))
#     tf_classifier_output = (tf_c_max - tf_values) / (tf.abs(tf_c_max - tf_values) + OPTIONS.eps)
    tf_classifier_output = tf_c_max - tf_values
    
    # Use hinge or perceptron loss for the classification performance
    tf_classifier_loss = tf_weights * tf.maximum(- tf_labels * tf_classifier_output, 0, name='hinge_loss')
        
    # Enforce decrease constraint with Lagrangian relaxation
    tf_lagrange_multiplier = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='lagrange_multiplier')
    tf_decrease_loss = tf_roa * tf.maximum(tf_dv / (tf_values + OPTIONS.eps), 0)
    
    # Construct objective and optimizer
    tf_objective = tf.reduce_mean(tf_classifier_loss + tf_lagrange_multiplier * tf_decrease_loss, name='objective')
    tf_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
    tf_epsilon = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='adam_epsilon')
    
    optimizer = tf.train.GradientDescentOptimizer(tf_learning_rate)
#     optimizer = tf.train.AdamOptimizer(tf_learning_rate, epsilon=tf_epsilon)
#     training_update = optimizer.minimize(tf_objective, var_list=lyapunov.lyapunov_function.parameters)
    grads_and_vars = optimizer.compute_gradients(tf_objective, lyapunov.lyapunov_function.parameters)
    training_update = optimizer.apply_gradients(grads_and_vars)
      

with tf.name_scope('sampling'):
    tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
    tf_idx_range = tf.placeholder(tf.int32, shape=[], name='indices_to_sample')
    tf_idx_batch = tf.random_uniform([tf_batch_size, ], 0, tf_idx_range, dtype=tf.int32, name='batch_sample')
    
#     with tf.name_scope('accuracy'):
#         tf_accuracy = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#         acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#         acc = tf.reduce_mean(tf.cast(acc, tf.float32))

In [None]:
def balanced_confusion_weights(y, y_true, scale_by_total=True):
    y = y.astype(np.bool)
    y_true = y_true.astype(np.bool)
    
    # Assuming labels in {0, 1}, count entries from confusion matrix
    TP = ( y &  y_true).sum()
    TN = (~y & ~y_true).sum()
    FP = ( y & ~y_true).sum()
    FN = (~y &  y_true).sum()
    confusion_counts = np.array([[TN, FN], [FP, TP]])
    
    # Scale up each sample by inverse of confusion weight
    weights = np.ones_like(y, dtype=float)
    weights[ y &  y_true] /= TP
    weights[~y & ~y_true] /= TN
    weights[ y & ~y_true] /= FP
    weights[~y &  y_true] /= FN
    if scale_by_total:
        weights *= y.size
    
    return weights, confusion_counts


def balanced_class_weights(y_true, scale_by_total=True):
    y = y_true.astype(np.bool)
    nP = y.sum()
    nN = y.size - y.sum()
    class_counts = np.array([nN, nP])
    
    weights = np.ones_like(y, dtype=float)
    weights[ y] /= nP
    weights[~y] /= nN
    if scale_by_total:
        weights *= y.size
    
    return weights, class_counts

In [None]:
# Restore checkpoint
saver.restore(session, ckpt_path)
lyapunov.update_values()
lyapunov.update_safe_set()
session.run(tf.variables_initializer(optimizer.variables()))  # TODO

outer_offset = 0
inner_offset = 0

obj          = []
loss_class   = []
loss_dec     = []
roa_estimate = np.copy(lyapunov.safe_set)

c_max        = [lyapunov.feed_dict[lyapunov.c_max], ]
safe_size    = [lyapunov.safe_set.sum() / lyapunov.discretization.nindex, ]
grid         = lyapunov.discretization
iters_to_converge = []


summaries = []
summaries.append(tf.summary.scalar('objective', tf_objective))

for param in lyapunov.lyapunov_function.parameters:
    summaries.append(tf.summary.histogram(param.name[:-2], param))

for grad, var in grads_and_vars:
    summaries.append(tf.summary.histogram(var.name[:-2] + '/gradient', grad))

# Merge all summaries into a single op
merged_summary_op = tf.summary.merge(summaries)

# TODO
summary_writer = tf.summary.FileWriter(OPTIONS.log_path, graph=tf.get_default_graph())


In [None]:
outer_iters      = 3
inner_iters      = 15
tol              = 1e-8
horizon          = 50
batch_size       = int(1e2)
test_size        = int(1e4)

feed_dict = {
    tf_states:               np.zeros((1, lyapunov.discretization.ndim)), # placeholder
    tf_idx_range:            1,                                           # placeholder
    tf_batch_size:           batch_size,
    tf_c_max:                1.,
    tf_lagrange_multiplier:  100,
    #
    tf_learning_rate:        6e-3,
    tf_epsilon:              1e-1,
    tf_level_multiplier:     3.,
}


In [None]:
print('Current metrics ...')
c = lyapunov.feed_dict[lyapunov.c_max]
num_safe = lyapunov.safe_set.sum()
print('c_max: {}'.format(c))
print('grid size: {}'.format(grid.nindex))
print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(num_safe), 100 * num_safe / grid.nindex, 100 * num_safe / roa.sum()))
print('')
time.sleep(0.5)

for i in range(outer_iters):
    outer_offset += 1
    c = lyapunov.feed_dict[lyapunov.c_max]
#     feed_dict[tf_c_max] = c
#     time.sleep(0.5)
    
    # Get states inside V(a * c_max), a > 1    
    idx_small = lyapunov.values.ravel() <= c
    idx_big = lyapunov.values.ravel() <= feed_dict[tf_level_multiplier] * c
    idx_gap = np.logical_and(idx_big, ~idx_small)
    
    #
    V_gap = grid.all_points[idx_gap]
    V_future = np.copy(V_gap)
    for _ in range(horizon):
        V_future = tf_future_states.eval({tf_states: V_future})
    V_future = tf_values.eval({tf_states: V_future})
    safe_in_future = (V_future <= c).ravel()
    
    roa_estimate[idx_gap] |= safe_in_future
    
    target_idx = np.logical_or(idx_big, roa_estimate)
    target_set = grid.all_points[target_idx]
    target_labels = roa_estimate[target_idx].astype(OPTIONS.np_dtype).reshape([-1, 1])
#     target_set = grid.all_points
#     target_labels = roa_estimate.astype(OPTIONS.np_dtype).reshape([-1, 1])
    feed_dict[tf_idx_range] = target_set.shape[0]
    
    # Test set
#     feed_dict[tf_batch_size] = test_size
#     idx_test = tf_idx_batch.eval(feed_dict)
#     test_set = target_set[idx_test]
#     test_labels = target_labels[idx_test]

    test_set = grid.all_points
    test_labels = roa.reshape([-1, 1])
    
    test_set = target_set
    test_labels = target_labels
    
    # SGD for classification
    converged = False
    feed_dict[tf_batch_size] = batch_size

    for j in tqdm(range(inner_iters)):
        inner_offset += 1
        # Training step
        idx_batch = tf_idx_batch.eval(feed_dict)
        feed_dict[tf_states] = target_set[idx_batch]
        feed_dict[tf_roa] = target_labels[idx_batch]
#         feed_dict[tf_weights], counts = balanced_confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))
        _, summary = session.run([training_update, merged_summary_op], feed_dict=feed_dict)
        summary_writer.add_summary(summary, inner_offset)

        # Record objectives
        feed_dict[tf_states] = test_set
        feed_dict[tf_roa] = test_labels
#         feed_dict[tf_weights], counts = balanced_confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))
    
        results = session.run([tf_classifier_loss, tf_decrease_loss], feed_dict)
        loss_class.append(results[0].mean())
        loss_dec.append(results[1].mean())
        obj.append(loss_class[-1] + feed_dict[tf_lagrange_multiplier] * loss_dec[-1])

        if obj[-1] < tol:
            converged = True
            break

    iters_to_converge.append(j + 1)
    if converged:
        print('Converged in {} iterations!'.format(j + 1))
    else:
        print('Did not converge!')

    print('Updating values ...')
    lyapunov.update_values()

    print('Updating c_max ...')
    lyapunov.update_safe_set()
    roa_estimate |= lyapunov.safe_set

    c_max.append(lyapunov.feed_dict[lyapunov.c_max])
    safe_size.append(lyapunov.safe_set.sum() / grid.nindex)
    print('Done!')
#     print(class_ratio)
    print(counts)
    print('c_max: {}'.format(c_max[-1]))
    print('grid size: {}'.format(grid.nindex))
    print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(safe_size[-1] * grid.nindex), 
                                                                         100 * safe_size[-1], 
                                                                         100 * safe_size[-1] * roa.size / roa.sum()))
    time.sleep(0.5)


### Neural Network: Results

In [None]:
plt.rc('font', size=6)
roa_fraction = roa.sum() / roa.size

#
fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=300)
ax.plot(loss_class, '.-r')
# ax.tick_params('y', colors='r')
# ax.set_ylim([None, 0.9])

ax.set_xlabel(r'SGD iteration (accumulated)')
ax.set_xticks(list(range(0, len(loss_class) + 1, inner_iters)))

# ax = ax.twinx()
ax.plot(feed_dict[tf_lagrange_multiplier] * np.asarray(loss_dec), '.-b')
# ax.tick_params('y', colors='b')
# ax.set_ylim([None, 0.0016])

ax.set_ylabel(r'Training loss')


proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in ['red', 'blue']]    
legend = ax.legend(proxy, ['Classification loss', 'Lyapunov decrease loss'], loc='upper right')
legend.get_frame().set_alpha(1.)

plt.show()

In [None]:
#
plt.rc('font', size=6)
fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=300)

ax.plot(c_max, '.-r')
ax.set_ylabel(r'$c_k$')
ax.tick_params('y', colors='r')
ax.set_ylim([0, 1])

ax.set_xlabel(r'Safe set update iteration $k$')
ax.set_xticks(list(range(0, len(c_max) + 1, 1)))

ax = ax.twinx()
ax.plot(np.array(safe_size) / roa_fraction, '.-b')
ax.set_ylabel(r'$|\mathcal{V}(c_k) \cap \mathcal{X}_\tau|\ /\ |\mathcal{R} \cap \mathcal{X}_\tau|$')
ax.tick_params('y', colors='b')
ax.set_ylim([0, 1])

plt.show()

print(np.array(safe_size) / roa_fraction)

### Neural Network: Visualization

In [None]:
c = lyapunov.feed_dict[lyapunov.c_max]
num_grid = lyapunov.discretization.nindex
num_safe = lyapunov.safe_set.sum()
num_safe_lqr = lyapunov_lqr.safe_set.sum()

print('c_max: {}'.format(c))
print('grid size: {}'.format(num_grid))
print('ROA size: {:.2f}%'.format(100 * roa.sum() / roa.size))
print('lqr safe set size: {} ({:.2f}%)'.format(num_safe_lqr, 100 * num_safe_lqr / num_grid))
print('nn safe set size: {} ({:.2f}%)'.format(num_safe, 100 * num_safe / num_grid))

# debug(lyapunov, dynamics, state_norm, plot='pendulum')
# debug(lyapunov_lqr, dynamics, state_norm, plot='pendulum')

print(num_safe_lqr / roa.sum())