# Lyapunov Neural Network vs. SOS Method

In [None]:
import numpy as np
import tensorflow as tf
import safe_learning
from scipy.linalg import solve_discrete_lyapunov
from utilities import VanDerPol, LyapunovNetwork

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.font_manager import FontProperties

import os
import time
from tqdm import tqdm

# TODO testing ****************************************#
class Options(object):
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        self.__dict__.update(kwargs)

OPTIONS = Options(np_dtype           = safe_learning.config.np_dtype,
                  tf_dtype           = safe_learning.config.dtype,
                  eps                = 1e-8,
                  use_zero_threshold = True,
                  pre_train          = False,
                  dpi                = 150,
                  fontproperties     = FontProperties(size=10),
                  save_figs          = False,
                  fig_path           = 'figures/sos_lyapunov/')
#******************************************************#

## SOS Lyapunov Function (from SOSTOOLS)

In [None]:
def monomials(x, deg):
    x = np.atleast_2d(x)
    # 1-D features (x, y)
    Z = np.copy(x)
    if deg >= 2:
        # 2-D features (x^2, x * y, y^2)
        temp = np.empty([len(x), 3])
        temp[:, 0] = x[:, 0] ** 2 
        temp[:, 1] = x[:, 0] * x[:, 1]
        temp[:, 2] = x[:, 1] ** 2
        Z = np.hstack((Z, temp))
    if deg >= 3:
        # 3-D features (x^3, x^2 * y, x * y^2, y^3)
        temp = np.empty([len(x), 4])
        temp[:, 0] = x[:, 0] ** 3
        temp[:, 1] = (x[:, 0] ** 2) * x[:, 1]
        temp[:, 2] = x[:, 0] * (x[:, 1] ** 2)
        temp[:, 3] = x[:, 1] ** 3
        Z = np.hstack((Z, temp))
    if deg >= 4:
        # 4-D features (x^4, x^3 * y, x^2 * y^2, x * y^3, y^4)
        temp = np.empty([len(x), 5])
        temp[:, 0] = x[:, 0] ** 4
        temp[:, 1] = (x[:, 0] ** 3) * x[:, 1]
        temp[:, 2] = (x[:, 0] ** 2) * (x[:, 1] ** 2)
        temp[:, 3] = x[:, 0] * (x[:, 1] ** 3)
        temp[:, 4] = x[:, 1] ** 4
        Z = np.hstack((Z, temp))
    return Z


def sos_lyapunov(x, deg):
    Z = monomials(x, deg)
    if deg==1:
        Q = np.array([[  2.706, -1.012],
                      [ -1.012,  2.675]])
    elif deg==2:
        Q = np.array([[     3.546,    -1.654,   2.75e-9, -2.662e-9,  1.46e-9],
                      [    -1.654,     3.136, -2.662e-9,   1.46e-9, 1.309e-9],
                      [   2.75e-9, -2.662e-9,      1.13,  -0.01511,    1.064],
                      [ -2.662e-9,   1.46e-9,  -0.01511,     1.064,   -1.318],
                      [   1.46e-9,  1.309e-9,     1.064,    -1.318,   0.9461]])
    elif deg==3:
        Q = np.array([
            [      6.301,     -3.172,   1.053e-7, -1.463e-10,    1.0e-8,   -0.2146,  -0.01918,    0.6756,   -0.4285],
            [     -3.172,      4.386, -1.463e-10,     1.0e-8, -6.955e-9,  -0.01918,    0.6756,   -0.4285,    0.2466],
            [   1.053e-7, -1.463e-10,    -0.2146,   -0.01918,    0.6756, -2.935e-8,  3.347e-9, -1.397e-8,  7.877e-9],
            [ -1.463e-10,     1.0e-8,   -0.01918,     0.6756,   -0.4285,  3.347e-9, -1.397e-8,  7.877e-9, -6.379e-9],
            [     1.0e-8,  -6.955e-9,     0.6756,    -0.4285,    0.2466, -1.397e-8,  7.877e-9, -6.379e-9,   2.45e-9],
            [    -0.2146,   -0.01918,  -2.935e-8,   3.347e-9, -1.397e-8,     0.341,    0.2417,   -0.1313,   -0.1159],
            [   -0.01918,     0.6756,   3.347e-9,  -1.397e-8,  7.877e-9,    0.2417,   -0.1313,   -0.1159,     0.193],
            [     0.6756,    -0.4285,  -1.397e-8,   7.877e-9, -6.379e-9,   -0.1313,   -0.1159,     0.193,   -0.1061],
            [    -0.4285,     0.2466,   7.877e-9,  -6.379e-9,   2.45e-9,   -0.1159,     0.193,   -0.1061,   0.01121]])
    elif deg==4:
        Q = np.array([
            [      24.26,      -14.7,   7.798e-9,  -2.459e-8,   2.035e-8,     -1.386,     0.8215,      0.292,   -0.06087,   2.203e-9,   3.081e-9,  -1.233e-9, -3.724e-10,  2.847e-11],
            [      -14.7,      15.45,  -2.459e-8,   2.035e-8,   -1.38e-8,     0.8215,      0.292,   -0.06087,   0.003462,   3.081e-9,  -1.233e-9, -3.724e-10,  2.847e-11, -1.962e-11],
            [   7.798e-9,  -2.459e-8,     -1.386,     0.8215,      0.292,   2.203e-9,   3.081e-9,  -1.233e-9, -3.724e-10,     0.1009,      0.072,    0.01911,   -0.01406,   0.002982],
            [  -2.459e-8,   2.035e-8,     0.8215,      0.292,   -0.06087,   3.081e-9,  -1.233e-9, -3.724e-10,  2.847e-11,      0.072,    0.01911,   -0.01406,   0.002982,  0.0002575],
            [   2.035e-8,   -1.38e-8,      0.292,   -0.06087,   0.003462,  -1.233e-9, -3.724e-10,  2.847e-11, -1.962e-11,    0.01911,   -0.01406,   0.002982,  0.0002575, -0.0001809],
            [     -1.386,     0.8215,   2.203e-9,   3.081e-9,  -1.233e-9,     0.1009,      0.072,    0.01911,   -0.01406,  -2.083e-9, -7.041e-10, -1.186e-10, -3.768e-10,  2.868e-10],
            [     0.8215,      0.292,   3.081e-9,  -1.233e-9, -3.724e-10,      0.072,    0.01911,   -0.01406,   0.002982, -7.041e-10, -1.186e-10, -3.768e-10,  2.868e-10, -1.317e-10],
            [      0.292,   -0.06087,  -1.233e-9, -3.724e-10,  2.847e-11,    0.01911,   -0.01406,   0.002982,  0.0002575, -1.186e-10, -3.768e-10,  2.868e-10, -1.317e-10,  4.177e-11],
            [   -0.06087,   0.003462, -3.724e-10,  2.847e-11, -1.962e-11,   -0.01406,   0.002982,  0.0002575, -0.0001809, -3.768e-10,  2.868e-10, -1.317e-10,  4.177e-11, -1.496e-10],
            [   2.203e-9,   3.081e-9,     0.1009,      0.072,    0.01911,  -2.083e-9, -7.041e-10, -1.186e-10, -3.768e-10,    0.07419,    0.03174,   -0.01252,    0.00105,   0.000431],
            [   3.081e-9,  -1.233e-9,      0.072,    0.01911,   -0.01406, -7.041e-10, -1.186e-10, -3.768e-10,  2.868e-10,    0.03174,   -0.01252,    0.00105,   0.000431,  0.0001552],
            [  -1.233e-9, -3.724e-10,    0.01911,   -0.01406,   0.002982, -1.186e-10, -3.768e-10,  2.868e-10, -1.317e-10,   -0.01252,    0.00105,   0.000431,  0.0001552, -0.0003108],
            [ -3.724e-10,  2.847e-11,   -0.01406,   0.002982,  0.0002575, -3.768e-10,  2.868e-10, -1.317e-10,  4.177e-11,    0.00105,   0.000431,  0.0001552, -0.0003108,   9.085e-5],
            [  2.847e-11, -1.962e-11,   0.002982,  0.0002575, -0.0001809,  2.868e-10, -1.317e-10,  4.177e-11, -1.496e-10,   0.000431,  0.0001552, -0.0003108,   9.085e-5,  -1.182e-5]])
    linear_form = np.matmul(Z, Q)
    quadratic = np.sum(linear_form * Z, axis=1, keepdims=True)
    return quadratic


gamma = [0, 2.938, 11.99, 23.17, 65.57]



In [None]:
def binary_cmap(color='red', alpha=1.):
    if color=='red':
        color_code = (1., 0., 0., alpha)
    elif color=='green':
        color_code = (0., 1., 0., alpha)
    elif color=='blue':
        color_code = (0., 0., 1., alpha)
    else:
        color_code = color
    transparent_code = (1., 1., 1., 0.)
    return ListedColormap([transparent_code, color_code])

HEAT_MAP = plt.get_cmap('inferno', lut=None)
HEAT_MAP.set_over('white')
HEAT_MAP.set_under('black')

LEVEL_MAP = plt.get_cmap('viridis', lut=21)
LEVEL_MAP.set_over('gold')
LEVEL_MAP.set_under('white')


def balanced_confusion_weights(y, y_true, scale_by_total=True):
    y = y.astype(np.bool)
    y_true = y_true.astype(np.bool)
    
    # Assuming labels in {0, 1}, count entries from confusion matrix
    TP = ( y &  y_true).sum()
    TN = (~y & ~y_true).sum()
    FP = ( y & ~y_true).sum()
    FN = (~y &  y_true).sum()
    confusion_counts = np.array([[TN, FN], [FP, TP]])
    
    # Scale up each sample by inverse of confusion weight
    weights = np.ones_like(y, dtype=float)
    weights[ y &  y_true] /= TP
    weights[~y & ~y_true] /= TN
    weights[ y & ~y_true] /= FP
    weights[~y &  y_true] /= FN
    if scale_by_total:
        weights *= y.size
    
    return weights, confusion_counts


def balanced_class_weights(y_true, scale_by_total=True):
    y = y_true.astype(np.bool)
    nP = y.sum()
    nN = y.size - y.sum()
    class_counts = np.array([nN, nP])
    
    weights = np.ones_like(y, dtype=float)
    weights[ y] /= nP
    weights[~y] /= nN
    if scale_by_total:
        weights *= y.size
    
    return weights, class_counts

## TensorFlow Session

In [None]:
MAX_CPU_COUNT = os.cpu_count()
NUM_CORES = 8
NUM_SOCKETS = 2

os.environ["KMP_BLOCKTIME"]    = str(0)
os.environ["KMP_SETTINGS"]     = str(1)
os.environ["KMP_AFFINITY"]     = 'granularity=fine,noverbose,compact,1,0'
os.environ["OMP_NUM_THREADS"]  = str(NUM_CORES)

config = tf.ConfigProto(intra_op_parallelism_threads  = NUM_CORES,
                        inter_op_parallelism_threads  = NUM_SOCKETS,
                        allow_soft_placement          = False,
#                         log_device_placement          = True,
                        device_count                  = {'CPU': MAX_CPU_COUNT},
                       )

# TODO manually for CPU-only?
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession(config=config)

## Dynamics

In [None]:
dt        = 0.01
state_dim = 2
x_max     = 2.5
y_max     = 3

state_norm   = (x_max, y_max)
state_limits = np.array([[-1., 1.]] * state_dim)
plot_limits  = np.array([[- x_max, x_max], [- y_max, y_max]])
vanderpol    = VanDerPol(dt, state_norm)

## State Discretization

In [None]:
def gridify(norms, maxes=None, num_points=25):    
    norms = np.asarray(norms).ravel()
    if maxes is None:
        maxes = norms
    else:
        maxes = np.asarray(maxes).ravel()
    limits = np.column_stack((- maxes / norms, maxes / norms))
    if isinstance(num_points, int):
        num_points = [num_points, ] * len(norms)
    grid = safe_learning.GridWorld(limits, num_points)
    return grid


grid = gridify(state_norm, num_points=201)
if OPTIONS.use_zero_threshold:
    tau = 0
else:
    tau = np.sum(grid.unit_maxes) / 2


## Neural Network Lyapunov Function

In [None]:
layer_dims             = [64, 64, 64]
activations            = [tf.tanh, tf.tanh, tf.tanh]
lyapunov_function      = LyapunovNetwork(state_dim, layer_dims, activations, OPTIONS.eps)
grad_lyapunov_function = lambda X: tf.gradients(lyapunov_function(X), X)[0]

L_f = lambda X: tf.maximum( np.abs(1 - 2 * dt), np.abs(1 + dt * (2 * tf.reduce_prod(X, axis=1, keepdims=True) + 1)) )
L_v = lambda X: tf.norm(grad_lyapunov_function(X), ord=1, axis=1, keepdims=True)

policy = lambda X: 0.0 * tf.reduce_sum(X, axis=1, keepdims=True)
initial_safe_set = np.linalg.norm(grid.all_points, axis=1) <= 0.1

# TODO need to use template before variables exist in the graph
tf_states = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')
temp = lyapunov_function(tf_states)
session.run(tf.variables_initializer(lyapunov_function.parameters))

lyapunov = safe_learning.Lyapunov(grid, lyapunov_function, vanderpol, L_f, L_v, tau, policy, initial_safe_set)

_STORAGE = {}

## TensorFlow Graph

In [None]:
storage = safe_learning.utilities.get_storage(_STORAGE)
if storage is None:
    tf_states        = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')
    tf_actions       = policy(tf_states) # dummy variable
    tf_future_states = vanderpol(tf_states, tf_actions)
    tf_values        = lyapunov.lyapunov_function(tf_states)
    tf_future_values = lyapunov.lyapunov_function(tf_future_states)
    tf_dv            = tf_future_values - tf_values
    tf_threshold     = lyapunov.threshold(tf_states, lyapunov.tau)
    tf_negative      = tf.squeeze(tf.less(tf_dv, tf_threshold), axis=1)
    
    storage = [('states', tf_states), 
               ('future_states', tf_future_states), 
               ('values', tf_values),
               ('future_values', tf_future_values),
               ('dv', tf_dv),
               ('threshold', tf_threshold), 
               ('negative', tf_negative)]
    safe_learning.utilities.set_storage(_STORAGE, storage)
else:
    tf_states, tf_future_states, tf_values, tf_future_values, tf_dv, tf_threshold, tf_negative = storage.values()

## True ROA

In [None]:
def compute_roa(grid, closed_loop_dynamics, horizon=250, tol=1e-3, cutoff=None):
    if isinstance(grid, np.ndarray):
        all_points = grid
        nindex = grid.shape[0]
        ndim = grid.shape[1]
    else:
        all_points = grid.all_points
        nindex = grid.nindex
        ndim = grid.ndim

    # Forward-simulate all trajectories from initial points in the discretization
    trajectories = np.empty((nindex, ndim, horizon))
    trajectories[:, :, 0] = all_points
    for t in range(1, horizon):
        trajectories[:, :, t] = closed_loop_dynamics(trajectories[:, :, t - 1])        
        if cutoff is not None:
            np.clip(trajectories[:, :, t], - cutoff, cutoff, out=trajectories[:, :, t])
    end_states = trajectories[:, :, -1]
    
    # Compute an approximate ROA as all states that end up "close" to 0
    dists = np.linalg.norm(end_states, ord=2, axis=1, keepdims=True).ravel()
    roa = (dists <= tol)
    return roa, trajectories


horizon  = 500
tol      = 0.3
cutoff   = 20
dynamics = lambda x: tf_future_states.eval({tf_states: x})
roa, trajectories = compute_roa(grid, dynamics, horizon, tol, cutoff)

# Sub-sample discretization for faster and clearer plotting later
N_traj = 11
skip = int(grid.num_points[0] / N_traj)
sub_idx = np.arange(grid.nindex).reshape(grid.num_points)
sub_idx = sub_idx[::skip, ::skip].ravel()
sub_trajectories = trajectories[sub_idx, :, :]


## Supervised Pre-Training

In [None]:
with tf.name_scope('supervised_lyapunov_learning'):
#     Q            = np.array([[1, -10], [-10, 1]])
#     P            = solve_discrete_lyapunov(vanderpol.linearize(), Q)
    P            = np.array([[1, 0], [-0.5, 1]])
    quad         = safe_learning.QuadraticFunction(P)
    tf_target    = quad(tf_states)
    tf_dv_lqr    = quad(tf_future_states) - tf_target
    
    tf_costs     = tf.abs(tf_values - tf_target) / tf.stop_gradient(tf_target + OPTIONS.eps)
    tf_objective = tf.reduce_mean(tf_costs, name='objective')
    
    tf_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
    optimizer        = tf.train.GradientDescentOptimizer(tf_learning_rate)
    training_update  = optimizer.minimize(tf_objective, var_list=lyapunov.lyapunov_function.parameters)
    
    tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
    tf_batch = tf.random_uniform([tf_batch_size, ], 0, lyapunov.initial_safe_set.sum(), dtype=tf.int32, name='batch_sample')

session.run(tf.variables_initializer(lyapunov_function.parameters))
lyapunov.update_values()
lyapunov.update_safe_set()

obj = []
level_states = grid.all_points[lyapunov.initial_safe_set]

In [None]:
# Test set
test_size = int(1e3)
idx = tf_batch.eval({tf_batch_size: int(1e3)})
test_set = level_states[idx, :]
    
feed_dict = {
    tf_states:         level_states,
    tf_learning_rate:  1e-3,
    tf_batch_size:     int(1e3),
}
max_iters = 100

if OPTIONS.pre_train:
    for i in tqdm(range(max_iters)):
        idx = tf_batch.eval(feed_dict)
        feed_dict[tf_states] = level_states[idx, :]
        session.run(training_update, feed_dict)

        feed_dict[tf_states] = test_set
        obj.append(tf_objective.eval(feed_dict))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 2), dpi=OPTIONS.dpi)
ax.set_xlabel(r'iteration')
ax.set_ylabel(r'objective')
ax.plot(obj, '.-r')

plt.show()

In [None]:
values, values_lqr, dv, dv_lqr = session.run([tf_values, tf_target, tf_dv, tf_dv_lqr], {tf_states: grid.all_points})
value_norm = np.amax([values.max(), values_lqr.max()])

fig, axes = plt.subplots(2, 2, figsize=(7, 6), dpi=OPTIONS.dpi)
fig.subplots_adjust(wspace=0.3, hspace=0.2)
for ax in axes.ravel():
    ax.set_xlabel(r'$x$')
    ax.set_ylabel(r'$y$')

z = values_lqr.reshape(grid.num_points) / values_lqr.max()
ax = axes[0, 0]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=LEVEL_MAP, vmin=0, vmax=1)  
ax.set_title('LQR Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')

z = values.reshape(grid.num_points) / values.max()
ax = axes[1, 0]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=LEVEL_MAP, vmin=0, vmax=1)   
ax.set_title('NN Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')

z = dv_lqr.reshape(grid.num_points)
ax = axes[0, 1]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=HEAT_MAP, vmax=0.0)   
ax.set_title('LQR Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')

z = dv.reshape(grid.num_points)
ax = axes[1, 1]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=HEAT_MAP, vmax=0.0)   
ax.set_title('NN Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')

plt.show()

## Training

In [None]:
# Save checkpoint for neural net weights
saver = tf.train.Saver(var_list=lyapunov.lyapunov_function.parameters)
ckpt_path = saver.save(session, "/tmp/spencerr_sos_lyapunov.ckpt")

In [None]:
with tf.name_scope('roa_classification'):
    # Current maximum level set we want to push the ROA in to
    tf_c_max            = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='c_max')
    tf_level_multiplier = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='level_multiplier')
    
    # True class labels, converted from ROA booleans {0, 1} to data labels {-1, 1}
    tf_roa     = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='roa')
    tf_labels  = 2 * tf_roa - 1

    # Classifier output (signed distance to decision boundary c_max = c)
    tf_decision_dist = tf_c_max - tf_values
    tf_y_est = 0.5 * (tf.sign(tf_decision_dist) + 1)
    
    
    # Use perceptron / hinge / logistic loss with class weights
    tf_weights         = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='class_weights')
    tf_classifier_loss = tf_weights * tf.maximum(- tf_labels * tf_decision_dist, 0, name='perceptron_loss')
#     tf_classifier_loss = tf_weights * tf.maximum(1 - tf_labels * tf_decision_dist, 0, name='hinge_loss')
#     tf_classifier_loss = tf_weights * tf.log(1 + tf.exp(- tf_labels * tf_decision_dist), name='logistic_loss')
        
    # Enforce decrease constraint with Lagrangian relaxation
    tf_lagrange_multiplier = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='lagrange_multiplier')
    tf_decrease_loss       = tf_roa * tf.maximum(tf_dv - tf_threshold, 0) / tf.stop_gradient(tf_values + OPTIONS.eps)
#     tf_decrease_loss       = tf_y_est * tf.maximum(tf_dv - tf_threshold, 0) / tf.stop_gradient(tf_values + OPTIONS.eps)
    
    # Define update step
    tf_objective     = tf.reduce_mean(tf_classifier_loss + tf_lagrange_multiplier * tf_decrease_loss, name='objective')
    tf_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
    optimizer        = tf.train.GradientDescentOptimizer(tf_learning_rate)
    training_update  = optimizer.minimize(tf_objective, var_list=lyapunov.lyapunov_function.parameters)
    
    # TODO
#     tf_dec_obj  = tf.reduce_mean(tf_decrease_loss, name='decrease_objective')
#     tf_dec_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
#     optimizer   = tf.train.GradientDescentOptimizer(tf_dec_rate)
#     dec_update  = optimizer.minimize(tf_dec_obj, var_list=lyapunov.lyapunov_function.parameters)
    

with tf.name_scope('sampling'):
    tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
    tf_idx_range  = tf.placeholder(tf.int32, shape=[], name='indices_to_sample')
    tf_idx_batch  = tf.random_uniform([tf_batch_size, ], 0, tf_idx_range, dtype=tf.int32, name='batch_sample')

In [None]:
# Restore checkpoint
saver.restore(session, ckpt_path)
lyapunov.update_values()
lyapunov.update_safe_set()
# session.run(tf.variables_initializer(optimizer.variables()))

obj          = []
loss_class   = []
loss_dec     = []
roa_estimate = np.copy(lyapunov.safe_set)
idx_visited  = np.zeros_like(lyapunov.safe_set)

c_max = [lyapunov.feed_dict[lyapunov.c_max], ]
safe_size = [lyapunov.safe_set.sum() / lyapunov.discretization.nindex, ]
grid = lyapunov.discretization

In [None]:
outer_iters = 10
inner_iters = 30

# dec_iters = 100
horizon   = 250

feed_dict = {
    tf_states:               np.zeros((1, lyapunov.discretization.ndim)), # placeholder
    tf_batch_size:           int(1e3),
    tf_c_max:                1,
    tf_lagrange_multiplier:  350,
    tf_idx_range:            grid.nindex,
    #
    tf_learning_rate:        3e-4,
    tf_level_multiplier:     3.,
    #
#     tf_dec_rate:             1e-3,
}

test_set = grid.all_points
test_labels = roa.reshape([-1, 1])

In [None]:
print('Current metrics ...')
c = lyapunov.feed_dict[lyapunov.c_max]
num_safe = lyapunov.safe_set.sum()
print('c_max: {}'.format(c))
print('grid size: {}'.format(grid.nindex))
print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(num_safe), 100 * num_safe / grid.nindex, 100 * num_safe / roa.sum()))
print('')
time.sleep(0.5)

for i in range(outer_iters):
    # Identify current safe set and gap states around it
    c         = lyapunov.feed_dict[lyapunov.c_max]
    idx_small = lyapunov.values.ravel() <= c
    idx_big   = lyapunov.values.ravel() <= feed_dict[tf_level_multiplier] * c
    idx_gap   = np.logical_and(idx_big, ~idx_small)
    
    # Update ROA estimate by propagating gap states forward
    propagated_states = grid.all_points[idx_gap]
    for _ in range(horizon):
        propagated_states = tf_future_states.eval({tf_states: propagated_states})
        np.clip(propagated_states, -1, 1, out=propagated_states)
    safe_in_future = (tf_values.eval({tf_states: propagated_states}) <= c).ravel()
    roa_estimate[idx_gap] |= safe_in_future
    
    # Train classifier on current ROA estimate and any states from scaled level set
    target_idx              = np.logical_or(idx_big, roa_estimate)
    target_set              = grid.all_points[target_idx]
    target_labels           = roa_estimate[target_idx].astype(OPTIONS.np_dtype).reshape([-1, 1])
    feed_dict[tf_idx_range] = target_set.shape[0]
    idx_visited |= target_idx
    
    # Test set
#     test_set = target_set
#     test_labels = target_labels

    # SGD for classification
    for _ in tqdm(range(inner_iters)):
        # Training step
        idx_batch                     = tf_idx_batch.eval(feed_dict)
        feed_dict[tf_states]          = target_set[idx_batch]
        feed_dict[tf_roa]             = target_labels[idx_batch]
#         feed_dict[tf_weights], counts = balanced_confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))
        session.run(training_update, feed_dict=feed_dict)

        # Record objectives
        feed_dict[tf_states]          = test_set
        feed_dict[tf_roa]             = test_labels
#         feed_dict[tf_weights], counts = balanced_confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))

        results = session.run([tf_classifier_loss, tf_decrease_loss], feed_dict)
        loss_class.append(results[0].mean())
        loss_dec.append(results[1].mean())
        obj.append(loss_class[-1] + feed_dict[tf_lagrange_multiplier] * loss_dec[-1])
    

    # TODO
#     lyapunov.update_values()
#     lyapunov.update_safe_set()
#     for _ in tqdm(range(dec_iters)):
#         idx_edge                      = lyapunov.values.ravel() >= lyapunov.feed_dict[lyapunov.c_max]
#         target_idx                    = np.logical_and(idx_edge, roa_estimate)
#         target_set                    = grid.all_points[target_idx]
#         target_labels                 = roa_estimate[target_idx].astype(OPTIONS.np_dtype).reshape([-1, 1])
#         feed_dict[tf_idx_range]       = target_set.shape[0]
#         idx_batch                     = tf_idx_batch.eval(feed_dict)
#         feed_dict[tf_states]          = target_set[idx_batch]
#         feed_dict[tf_roa]             = target_labels[idx_batch]
#         feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))
#         session.run(dec_update, feed_dict=feed_dict)
    

    lyapunov.update_values()
    lyapunov.update_safe_set()
    roa_estimate |= lyapunov.safe_set
    safe_size.append(lyapunov.safe_set.sum() / grid.nindex)
    c_max.append(lyapunov.feed_dict[lyapunov.c_max])
    
    print(counts)
    print('c_max: {}'.format(c_max[-1]))
    print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(safe_size[-1] * grid.nindex), 
                                                                         100 * safe_size[-1], 
                                                                         100 * safe_size[-1] * roa.size / roa.sum()))


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3), dpi=OPTIONS.dpi)

ax.plot(loss_class, '.-r')
ax.plot(feed_dict[tf_lagrange_multiplier] * np.asarray(loss_dec), '.-b')

ax.set_xlabel(r'SGD iteration (accumulated)')
ax.set_ylabel(r'Training loss')
# ax.set_xticks(list(range(0, len(loss_class) + 1, inner_iters)))

proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in ['red', 'blue']]    
legend = ax.legend(proxy, ['Classification loss', 'Lyapunov decrease loss'], loc='upper right', fontsize=8)
legend.get_frame().set_alpha(0.5)

plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3), dpi=OPTIONS.dpi)
roa_fraction = roa.sum() / roa.size

ax.plot(c_max, '.-r')
ax.set_ylabel(r'$c_k$')
ax.tick_params('y', colors='r')
# ax.set_ylim([0, 1])

ax.set_xlabel(r'Safe set update iteration $k$')
# ax.set_xticks(list(range(0, len(c_max) + 1, 1)))

ax = ax.twinx()
ax.plot(np.array(safe_size) / roa_fraction, '.-b')
ax.set_ylabel(r'$|\mathcal{V}(c_k) \cap \mathcal{X}_\tau|\ /\ |\mathcal{R} \cap \mathcal{X}_\tau|$')
ax.tick_params('y', colors='b')
# ax.set_ylim([0, 1])

plt.show()

print(np.array(safe_size) / roa_fraction)

## Plotting

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=OPTIONS.dpi)
ax.set_aspect(x_max / y_max)
ax.set_xlim([- x_max, x_max])
ax.set_ylim([- y_max, y_max])
    
# True ROA
z = roa.reshape(grid.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=binary_cmap('blue', 0.5), vmin=0)
# im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='darkgreen', linestyles='dashed')

# Safe set
z = lyapunov.safe_set.reshape(grid.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=binary_cmap('blue', 1), vmin=0)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), cmap=ListedColormap(['blue']))

# Decrease region
z = tf_dv.eval({tf_states: grid.all_points}).reshape(grid.num_points) < 0
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=binary_cmap('blue', 0.3), vmin=0)
# im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), cmap=ListedColormap(['blue']), levels=0, linestyles='dashed')

# Estimated ROA
alpha = 0.5
cmap = ListedColormap([(1., 1., 1., 0.), (1., 0., 0., alpha), (0., 1., 0., alpha)])
# z = tf_negative.eval({tf_states: grid.all_points}).reshape(grid.num_points)
z = roa_estimate.astype(int)
z[idx_visited] += 1
z = z.reshape(grid.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=x_max / y_max, cmap=cmap, vmin=0)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), cmap=ListedColormap(['blue', 'red', 'green']))

# Neural-network level sets
z = tf_values.eval({tf_states: grid.all_points}).reshape(grid.num_points)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='darkviolet', levels=1, linestyles='dashed')

# SOS Lyapunov function
deg = 4
temp = vanderpol.denormalize(grid.all_points).eval()
z = sos_lyapunov(temp, deg).reshape(grid.num_points)
# im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), cmap=plt.get_cmap('plasma'), levels=np.linspace(0, z.max(), 41))
# im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='blue', levels=gamma[deg])

# # Trajectories
for n in range(sub_trajectories.shape[0]):
    x = sub_trajectories[n, 0, :] * state_norm[0]
    y = sub_trajectories[n, 1, :] * state_norm[1]
    ax.plot(x, y, 'k--', linewidth=0.25)
sub_states = grid.all_points[sub_idx]
dx_dt = (dynamics(sub_states) - sub_states) / dt
dx_dt = dx_dt / np.linalg.norm(dx_dt, ord=2, axis=1, keepdims=True)
ax.quiver(sub_states[:, 0] * state_norm[0], sub_states[:, 1] * state_norm[1], dx_dt[:, 0], dx_dt[:, 1], 
          scale=None, pivot='mid', headwidth=3, headlength=6, color='k')

plt.show()