# Learning a Lyapunov Function for an Inverted Pendulum

In [None]:
import numpy as np
import tensorflow as tf
import safe_learning
from utilities import InvertedPendulum, LyapunovNetwork

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.font_manager import FontProperties

import os
import time
import pickle
from tqdm import tqdm

# TODO testing **********************************************************#

class Options(object):
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        self.__dict__.update(kwargs)

OPTIONS = Options(np_dtype              = safe_learning.config.np_dtype,
                  tf_dtype              = safe_learning.config.dtype,
                  eps                   = 1e-8,
                  saturate              = True,
                  use_zero_threshold    = True,
                  use_lipschitz_scaling = True,
                  pre_train             = True,
                  dpi                   = 150,
                  fontproperties        = FontProperties(size=10),
                  save_figs             = False,
                  fig_path              = 'figures/pendulum_lyapunov/')

#************************************************************************#


In [None]:
def binary_cmap(color='red', alpha=1.):
    if color=='red':
        color_code = (1., 0., 0., alpha)
    elif color=='green':
        color_code = (0., 1., 0., alpha)
    elif color=='blue':
        color_code = (0., 0., 1., alpha)
    else:
        color_code = color
    transparent_code = (1., 1., 1., 0.)
    return ListedColormap([transparent_code, color_code])

HEAT_MAP = plt.get_cmap('inferno', lut=None)
HEAT_MAP.set_over('white')
HEAT_MAP.set_under('black')

LEVEL_MAP = plt.get_cmap('viridis', lut=21)
LEVEL_MAP.set_over('gold')
LEVEL_MAP.set_under('white')


def confusion_weights(y, y_true, scale_by_total=True):
    y = y.astype(np.bool)
    y_true = y_true.astype(np.bool)
    
    # Assuming labels in {0, 1}, count entries from confusion matrix
    TP = ( y &  y_true).sum()
    TN = (~y & ~y_true).sum()
    FP = ( y & ~y_true).sum()
    FN = (~y &  y_true).sum()
    confusion_counts = np.array([[TN, FN], [FP, TP]])
    
    # Scale up each sample by inverse of confusion weight
    weights = np.ones_like(y, dtype=float)
    weights[ y &  y_true] /= TP
    weights[~y & ~y_true] /= TN
    weights[ y & ~y_true] /= FP
    weights[~y &  y_true] /= FN
    if scale_by_total:
        weights *= y.size
    
    return weights, confusion_counts


def class_weights(y_true, scale_by_total=True):
    y = y_true.astype(np.bool)
    nP = y.sum()
    nN = y.size - y.sum()
    class_counts = np.array([nN, nP])
    
    weights = np.ones_like(y, dtype=float)
    weights[ y] /= nP
    weights[~y] /= nN
    if scale_by_total:
        weights *= y.size
    
    return weights, class_counts


def tf_function_compose(tf_input, tf_function, num_compositions, output_name='function_composition', **kwargs):
    '''Apply a function multiple times to the input.'''
    
    def body(intermediate, idx):
        intermediate = tf_function(intermediate, **kwargs)
        idx = idx + 1
        return intermediate, idx

    def condition(rollout, states, idx):
        return idx < num_compositions

    initial_idx = tf.constant(0, dtype=TF_DTYPE)
    initial_intermediate = tf_input
    shape_invariants = [initial_intermediate.get_shape(), initial_idx.get_shape()]
    tf_output, _ = tf.while_loop(condition, body, [initial_intermediate, initial_idx], shape_invariants, name=output_name)
    
    return tf_output


from safe_learning import config, DeterministicFunction
from safe_learning.utilities import concatenate_inputs
from scipy import signal

class ApproxPendulum(DeterministicFunction):
    """."""
    def __init__(self, mass, length, friction=0, dt=1 / 80, normalization=None):
        super(ApproxPendulum, self).__init__()
        self.mass = mass
        self.length = length
        self.gravity = 9.81
        self.friction = friction
        self.dt = dt

        self.normalization = normalization
        if normalization is not None:
            self.normalization = [np.array(norm, dtype=OPTIONS.np_dtype)
                                  for norm in normalization]
            self.inv_norm = [norm ** -1 for norm in self.normalization]

    @property
    def inertia(self):
        """Return inertia of the pendulum."""
        return self.mass * self.length ** 2

    def normalize(self, state, action):
        """Normalize states and actions."""
        if self.normalization is None:
            return state, action

        Tx_inv, Tu_inv = map(np.diag, self.inv_norm)
        state = tf.matmul(state, Tx_inv)

        if action is not None:
            action = tf.matmul(action, Tu_inv)

        return state, action

    def denormalize(self, state, action):
        """De-normalize states and actions."""
        if self.normalization is None:
            return state, action

        Tx, Tu = map(np.diag, self.normalization)

        state = tf.matmul(state, Tx)
        if action is not None:
            action = tf.matmul(action, Tu)

        return state, action

    def linearize(self):
        gravity = self.gravity
        length = self.length
        friction = self.friction
        inertia = self.inertia

        A = np.array([[0, 1],
                      [gravity / length, -friction / inertia]],
                     dtype=OPTIONS.np_dtype)

        B = np.array([[0],
                      [1 / inertia]],
                     dtype=OPTIONS.np_dtype)

        if self.normalization is not None:
            Tx, Tu = map(np.diag, self.normalization)
            Tx_inv, Tu_inv = map(np.diag, self.inv_norm)

            A = np.linalg.multi_dot((Tx_inv, A, Tx))
            B = np.linalg.multi_dot((Tx_inv, B, Tu))

        sys = signal.StateSpace(A, B, np.eye(2), np.zeros((2, 1)))
        sysd = sys.to_discrete(self.dt)
        return sysd.A, sysd.B

    @concatenate_inputs(start=1)
    def build_evaluation(self, state_action):
        """Evaluate the dynamics."""
        # Denormalize
        state, action = tf.split(state_action, [2, 1], axis=1)
        state, action = self.denormalize(state, action)

        n_inner = 10
        dt = self.dt / n_inner
        for i in range(n_inner):
            state_derivative = self.ode(state, action)
            state = state + dt * state_derivative

        return self.normalize(state, None)[0]

    def ode(self, state, action):
        # Physical dynamics
        gravity = self.gravity
        length = self.length
        friction = self.friction
        inertia = self.inertia

        angle, angular_velocity = tf.split(state, 2, axis=1)

#         x_ddot = gravity / length * tf.sin(angle) + action / inertia
        sinx = angle - tf.pow(angle,3)/6 + tf.pow(angle,5)/120 - tf.pow(angle,7)/5040
        x_ddot = gravity / length * sinx + action / inertia

        if friction > 0:
            x_ddot -= friction / inertia * angular_velocity

        state_derivative = tf.concat((angular_velocity, x_ddot), axis=1)

        # Normalize
        return state_derivative

## TensorFlow Session

In [None]:
MAX_CPU_COUNT = os.cpu_count()
NUM_CORES = 8
NUM_SOCKETS = 2

os.environ["KMP_BLOCKTIME"]    = str(0)
os.environ["KMP_SETTINGS"]     = str(1)
os.environ["KMP_AFFINITY"]     = 'granularity=fine,noverbose,compact,1,0'
os.environ["OMP_NUM_THREADS"]  = str(NUM_CORES)

config = tf.ConfigProto(intra_op_parallelism_threads  = NUM_CORES,
                        inter_op_parallelism_threads  = NUM_SOCKETS,
                        allow_soft_placement          = False,
#                         log_device_placement          = True,
                        device_count                  = {'CPU': MAX_CPU_COUNT},
                       )

# TODO manually for CPU-only?
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession(config=config)

_STORAGE = {}


## Dynamics

In [None]:
# Constants
dt = 0.01   # sampling time
g  = 9.81    # gravity

# True system parameters
m = 0.15    # pendulum mass
L = 0.5     # pole length
b = 0.1     # rotational friction

# State and action normalizers
theta_max = np.deg2rad(180)
omega_max = np.deg2rad(360)
u_max     = g * m * L * np.sin(np.deg2rad(60))

# Dimensions and domains
state_dim     = 2
action_dim    = 1
state_limits  = np.array([[-1., 1.]] * state_dim)
action_limits = np.array([[-1., 1.]] * action_dim)

# Dynamics
pendulum = InvertedPendulum(m, L, b, dt, [(theta_max, omega_max), (u_max,)])
dynamics = pendulum.__call__


## State Discretization and Initial Safe Set

In [None]:
def gridify(norms, maxes=None, num_points=25):    
    norms = np.asarray(norms).ravel()
    if maxes is None:
        maxes = norms
    else:
        maxes = np.asarray(maxes).ravel()
    limits = np.column_stack((- maxes / norms, maxes / norms))
    if isinstance(num_points, int):
        num_points = [num_points, ] * len(norms)
    grid = safe_learning.GridWorld(limits, num_points)
    return grid


norms       = [theta_max, omega_max]
maxes       = np.copy(norms)
grid        = gridify(norms, maxes, 501)
plot_limits = np.column_stack((- np.rad2deg(maxes), np.rad2deg(maxes)))

if OPTIONS.use_zero_threshold:
    tau = 0
else:
    tau = np.sum(grid.unit_maxes) / 2

# Initial safe set (ball)
cutoff_radius    = 0.1
initial_safe_set = np.linalg.norm(grid.all_points, ord=2, axis=1)  <= cutoff_radius


## Policy

In [None]:
# State cost matrix
Q = np.diag([0.1, 0.1])

# Action cost matrix
R = 0.1 * np.identity(action_dim)

# Normalize cost matrices
cost_norm = np.amax([Q.max(), R.max()])
Q = Q / cost_norm
R = R / cost_norm

# Fix policy to the LQR solution for linearized system and some cost matrices
A, B = pendulum.linearize()
K, P_lqr = safe_learning.utilities.dlqr(A, B, Q, R)

# Normalize cost
P_lqr /= P_lqr.max()

# K[0, 0] = 2.05
# K[0, 1] = 0
# print(K)
policy = safe_learning.LinearSystem(-K, name='policy')

if OPTIONS.saturate:
    policy = safe_learning.Saturation(policy, -1, 1)


## Global Lipschitz Constants

In [None]:
# Policy (linear)
L_pol = lambda x: tf.constant(np.linalg.norm(-K, 1), dtype=OPTIONS.tf_dtype)

# Dynamics (linear approximation)
L_dyn = lambda x: np.linalg.norm(A, 1) + np.linalg.norm(B, 1) * L_pol(x)


## LQR Lyapunov Candidate

In [None]:
lyapunov_function      = safe_learning.QuadraticFunction(P_lqr)
grad_lyapunov_function = safe_learning.LinearSystem((2 * P_lqr,))

if OPTIONS.use_lipschitz_scaling:
    L_v = lambda x: tf.abs(grad_lyapunov_function(x))
else:
    L_v = lambda x: tf.norm(grad_lyapunov_function(x), ord=1, axis=1, keepdims=True)

# Initialize class
lyapunov_lqr = safe_learning.Lyapunov(grid, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)
lyapunov_lqr.update_values()
lyapunov_lqr.update_safe_set()


## SOS Lyapunov Candidate

In [None]:
P_sos = np.array([[  0.04883,  7.794e-5],
                  [ 7.794e-5,  0.0002801]])
lyapunov_function      = safe_learning.QuadraticFunction(P_sos)
grad_lyapunov_function = safe_learning.LinearSystem((2 * P_sos,))

if OPTIONS.use_lipschitz_scaling:
    L_v = lambda x: tf.abs(grad_lyapunov_function(x))
else:
    L_v = lambda x: tf.norm(grad_lyapunov_function(x), ord=1, axis=1, keepdims=True)

# Initialize class
lyapunov_sos = safe_learning.Lyapunov(grid, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)
lyapunov_sos.update_values()
lyapunov_sos.update_safe_set()


## Pre-Training Lyapunov Candidate

In [None]:
if OPTIONS.pre_train:
    # Quadratic candidate
    P = np.eye(state_dim)
    lyapunov_function      = safe_learning.QuadraticFunction(P)
    grad_lyapunov_function = safe_learning.LinearSystem((2 * P,))

    if OPTIONS.use_lipschitz_scaling:
        L_v = lambda x: tf.abs(grad_lyapunov_function(x))
    else:
        L_v = lambda x: tf.norm(grad_lyapunov_function(x), ord=1, axis=1, keep_dims=True)

    # Initialize class
    lyapunov_pre = safe_learning.Lyapunov(grid, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)
    lyapunov_pre.update_values()
    lyapunov_pre.update_safe_set()


## Neural Network Lyapunov Candidate

In [None]:
alpha      = 0.2
leaky_relu = lambda x, name: tf.nn.leaky_relu(x, alpha, name)

layer_dims  = [64, 64]
activations = [tf.tanh, tf.tanh]
# activations = [leaky_relu, leaky_relu, leaky_relu]

lyapunov_function      = LyapunovNetwork(state_dim, layer_dims, activations, OPTIONS.eps)
grad_lyapunov_function = lambda x: tf.gradients(lyapunov_function(x), x)[0]

if OPTIONS.use_lipschitz_scaling:
    L_v = lambda x: tf.abs(grad_lyapunov_function(x))
else:
    L_v = lambda x: tf.norm(grad_lyapunov_function(x), ord=1, axis=1, keepdims=True)

# TODO need to use template before variables exist in the graph
temp = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')
temp = lyapunov_function(temp)
session.run(tf.variables_initializer(lyapunov_function.parameters))

lyapunov_nn = safe_learning.Lyapunov(grid, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)
lyapunov_nn.update_values()
lyapunov_nn.update_safe_set()


## TensorFlow Graph

In [None]:
# Dynamics
tf_states           = tf.placeholder(OPTIONS.tf_dtype, shape=[None, state_dim], name='states')
tf_actions          = policy(tf_states)
tf_future_states    = dynamics(tf_states, tf_actions)

# Neural network
tf_values_nn        = lyapunov_nn.lyapunov_function(tf_states)
tf_future_values_nn = lyapunov_nn.lyapunov_function(tf_future_states)
tf_dv_nn            = tf_future_values_nn - tf_values_nn
tf_threshold        = lyapunov_nn.threshold(tf_states, lyapunov_nn.tau)
tf_negative         = tf.squeeze(tf.less(tf_dv_nn, tf_threshold), axis=1)

# LQR
tf_values_lqr        = lyapunov_lqr.lyapunov_function(tf_states)
tf_future_values_lqr = lyapunov_lqr.lyapunov_function(tf_future_states)
tf_dv_lqr            = tf_future_values_lqr - tf_values_lqr

# SOS
# tf_values_sos        = lyapunov_sos.lyapunov_function(tf_states)
# tf_future_values_sos = lyapunov_sos.lyapunov_function(tf_future_states)
# tf_dv_sos            = tf_future_values_sos - tf_values_sos

# Pre-training
if OPTIONS.pre_train:
    tf_values_pre        = lyapunov_pre.lyapunov_function(tf_states)
    tf_future_values_pre = lyapunov_pre.lyapunov_function(tf_future_states)
    tf_dv_pre            = tf_future_values_pre - tf_values_pre


## True Region of Attraction

In [None]:
def compute_roa(grid, closed_loop_dynamics, horizon=250, tol=1e-3, equilibrium=None, no_traj=True):
    if isinstance(grid, np.ndarray):
        all_points = grid
        nindex = grid.shape[0]
        ndim = grid.shape[1]
    else:
        all_points = grid.all_points
        nindex = grid.nindex
        ndim = grid.ndim
    
    # Forward-simulate all trajectories from initial points in the discretization
    if no_traj:
        end_states = all_points
        for t in range(1, horizon):
            end_states = closed_loop_dynamics(end_states)
    else:
        trajectories = np.empty((nindex, ndim, horizon))
        trajectories[:, :, 0] = all_points
        for t in range(1, horizon):
            trajectories[:, :, t] = closed_loop_dynamics(trajectories[:, :, t - 1])
        end_states = trajectories[:, :, -1]
            
    if equilibrium is None:
        equilibrium = np.zeros((1, ndim))
    
    # Compute an approximate ROA as all states that end up "close" to 0
    dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel()
    roa = (dists <= tol)
    if no_traj:
        return roa
    else:
        return roa, trajectories


closed_loop_dynamics = lambda x: tf_future_states.eval({tf_states: x})
horizon = 500
tol = 0.1
roa, trajectories = compute_roa(grid, closed_loop_dynamics, horizon, tol, no_traj=False)


### Neural Network: Pre-Training

In [None]:
obj = []
level_states = grid.all_points[initial_safe_set]

if OPTIONS.pre_train:
    with tf.name_scope('lyapunov_pre_training'):
        tf_losses        = tf.abs(tf_values_nn - tf_values_pre) / tf.stop_gradient(tf_values_pre + OPTIONS.eps)
        tf_objective     = tf.reduce_mean(tf_losses, name='objective')
        tf_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
        optimizer        = tf.train.GradientDescentOptimizer(tf_learning_rate)
        lyapunov_update  = optimizer.minimize(tf_objective, var_list=lyapunov_nn.lyapunov_function.parameters)
        
        tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
        tf_batch      = tf.random_uniform([tf_batch_size, ], 0, level_states.shape[0], dtype=tf.int32, name='batch_sample')

In [None]:
if OPTIONS.pre_train:
    # Test set
    test_size = int(1e3)
    idx       = tf_batch.eval({tf_batch_size: int(1e3)})
    test_set  = level_states[idx, :]

    feed_dict = {
        tf_states:         level_states,
        tf_learning_rate:  1e-3,
        tf_batch_size:     int(1e3),
    }
    max_iters = 200

    for i in tqdm(range(max_iters)):
        idx = tf_batch.eval(feed_dict)
        feed_dict[tf_states] = level_states[idx, :]
        session.run(lyapunov_update, feed_dict)

        feed_dict[tf_states] = test_set
        obj.append(tf_objective.eval(feed_dict))
        
    lyapunov_nn.update_values()
    lyapunov_nn.update_safe_set()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 2), dpi=OPTIONS.dpi)
ax.set_xlabel(r'iteration')
ax.set_ylabel(r'objective')
ax.plot(obj, '.-r')

plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)
ax.set_aspect(maxes[0] / maxes[1])
ax.set_xlim(plot_limits[0])
ax.set_ylim(plot_limits[1])
ax.set_xlabel(r'$\phi$ [deg]', fontproperties=OPTIONS.fontproperties)
ax.set_ylabel(r'$\dot{\phi}$ [deg/s]', fontproperties=OPTIONS.fontproperties)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
    label.set_fontproperties(OPTIONS.fontproperties)
    
# ROA
z = roa.reshape(grid.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('green'), vmin=0)

# Estimated safe level set
c = lyapunov_nn.feed_dict[lyapunov_nn.c_max]
z = (lyapunov_nn.values <= c).reshape(grid.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('red'), vmin=0)   

# Sub-sample discretization for faster and clearer plotting
N_traj = 14
skip = int(grid.num_points[0] / N_traj)
sub_idx = np.arange(grid.nindex).reshape(grid.num_points)
sub_idx = sub_idx[::skip, ::skip].ravel()
sub_trajectories = trajectories[sub_idx, :, :]
sub_states = grid.all_points[sub_idx]

# Trajectories
for n in range(sub_trajectories.shape[0]):
    theta = sub_trajectories[n, 0, :] * np.rad2deg(norms[0])
    omega = sub_trajectories[n, 1, :] * np.rad2deg(norms[1])
    ax.plot(theta, omega, 'k--', linewidth=0.6)
dx_dt = (tf_future_states.eval({tf_states: sub_states}) - sub_states) / dt
dx_dt = dx_dt / np.linalg.norm(dx_dt, ord=2, axis=1, keepdims=True)
ax.quiver(sub_states[:, 0] * norms[0], sub_states[:, 1] * norms[1], dx_dt[:, 0], dx_dt[:, 1], 
          scale=None, pivot='mid', headwidth=4, headlength=8, color='k')

proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0., 1., 0., 1), (1., 0., 0., 1)]]    
legend = ax.legend(proxy, [r'$\mathcal{R}$', r'$\mathcal{V}\!\ (c_0)$'], prop=OPTIONS.fontproperties, loc='upper right')
legend.get_frame().set_alpha(1.)

plt.show()

### Neural Network: Training

In [None]:
# Save checkpoint for neural net weights
saver = tf.train.Saver(var_list=lyapunov_nn.lyapunov_function.parameters)
ckpt_prefix = "/tmp/spencerr/pendulum_lyapunov"
saver.save(session, ckpt_prefix)

In [None]:
with tf.name_scope('roa_classification'):
    # Current maximum level set we want to push the ROA in to
    tf_c_max            = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='c_max')
    tf_level_multiplier = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='level_multiplier')
    
    # True class labels, converted from ROA booleans {0, 1} to data labels {-1, 1}
    tf_roa     = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='roa')
    tf_labels  = 2 * tf_roa - 1

    # Classifier output (signed distance to decision boundary c_max = c)
    tf_decision_dist = tf_c_max - tf_values_nn
    tf_y_est         = 0.5 * (tf.sign(tf_decision_dist) + 1)
    
    # Use perceptron / hinge / logistic loss with class weights
    tf_weights         = tf.placeholder(OPTIONS.tf_dtype, shape=[None, 1], name='class_weights')
    tf_classifier_loss = tf_weights * tf.maximum(- tf_labels * tf_decision_dist, 0, name='perceptron_loss')
#     tf_classifier_loss = tf_weights * tf.maximum(1 - tf_labels * tf_decision_dist, 0, name='hinge_loss')
#     tf_classifier_loss = tf_weights * tf.log(1 + tf.exp(- tf_labels * tf_decision_dist), name='logistic_loss')
    
    # Enforce decrease constraint with Lagrangian relaxation
    tf_lagrange_multiplier = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='lagrange_multiplier')
    tf_decrease_loss       = tf_roa * tf.maximum(tf_dv_nn - tf_threshold, 0) / (tf_values_nn + OPTIONS.eps)
#     tf_decrease_loss       = tf_y_est * tf.maximum(tf_dv_nn - tf_threshold, 0) / tf.stop_gradient(tf_values_nn + OPTIONS.eps)
    
    # Define update step
    tf_objective     = tf.reduce_mean(tf_classifier_loss + tf_lagrange_multiplier * tf_decrease_loss, name='objective')
    tf_learning_rate = tf.placeholder(OPTIONS.tf_dtype, shape=[], name='learning_rate')
    optimizer        = tf.train.GradientDescentOptimizer(tf_learning_rate)
    training_update  = optimizer.minimize(tf_objective, var_list=lyapunov_nn.lyapunov_function.parameters)

with tf.name_scope('sampling'):
    tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
    tf_idx_range  = tf.placeholder(tf.int32, shape=[], name='indices_to_sample')
    tf_idx_batch  = tf.random_uniform([tf_batch_size, ], 0, tf_idx_range, dtype=tf.int32, name='batch_sample')


In [None]:
# Restore checkpoint
saver.restore(session, ckpt_prefix)
lyapunov_nn.update_values()
lyapunov_nn.update_safe_set()
# session.run(tf.variables_initializer(optimizer.variables()))

obj          = []
loss_class   = []
loss_dec     = []
roa_estimate = np.copy(lyapunov_nn.safe_set)
idx_visited  = np.zeros_like(lyapunov_nn.safe_set)

c_max     = [lyapunov_nn.feed_dict[lyapunov_nn.c_max], ]
safe_size = [lyapunov_nn.safe_set.sum() / grid.nindex, ]


In [None]:
outer_iters         = 3
inner_iters         = 10
horizon             = 50
lagrange_multiplier = 1000

feed_dict = {
    tf_states:               np.zeros((1, grid.ndim)), # placeholder
    tf_batch_size:           int(1e2),
    tf_c_max:                1,
    tf_lagrange_multiplier:  lagrange_multiplier,
    tf_idx_range:            grid.nindex,
    #
    tf_learning_rate:        1e-2,
    tf_level_multiplier:     2.,
}

test_set = grid.all_points
test_labels = roa.reshape([-1, 1])

In [None]:
print('Current metrics ...')
c = lyapunov_nn.feed_dict[lyapunov_nn.c_max]
num_safe = lyapunov_nn.safe_set.sum()
print('c_max: {}'.format(c))
print('grid size: {}'.format(grid.nindex))
print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(num_safe), 100 * num_safe / grid.nindex, 100 * num_safe / roa.sum()))
print('')
time.sleep(0.5)

for _ in range(outer_iters):
    # Identify current safe set and gap states around it
    c         = lyapunov_nn.feed_dict[lyapunov_nn.c_max]
    idx_small = lyapunov_nn.values.ravel() <= c
    idx_big   = lyapunov_nn.values.ravel() <= feed_dict[tf_level_multiplier] * c
    idx_gap   = np.logical_and(idx_big, ~idx_small)
    
    # Update ROA estimate by propagating gap states forward
    propagated_states = grid.all_points[idx_gap]
    for _ in range(horizon):
        propagated_states = tf_future_states.eval({tf_states: propagated_states})
#         np.clip(propagated_states, -1, 1, out=propagated_states)
    safe_in_future = (tf_values_nn.eval({tf_states: propagated_states}) <= c).ravel()
    roa_estimate[idx_gap] |= safe_in_future
    
    # Train classifier on current ROA estimate and any states from scaled level set
    target_idx              = np.logical_or(idx_big, roa_estimate)
    target_set              = grid.all_points[target_idx]
    target_labels           = roa_estimate[target_idx].astype(OPTIONS.np_dtype).reshape([-1, 1])
    feed_dict[tf_idx_range] = target_set.shape[0]
    idx_visited |= target_idx
    
    # Test set
    test_set = target_set
    test_labels = target_labels
    
    # SGD for classification
    for _ in tqdm(range(inner_iters)):
        # Training step
        idx_batch                     = tf_idx_batch.eval(feed_dict)
        feed_dict[tf_states]          = target_set[idx_batch]
        feed_dict[tf_roa]             = target_labels[idx_batch]
#         feed_dict[tf_weights], counts = confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = class_weights(feed_dict[tf_roa].astype(bool))
        session.run(training_update, feed_dict=feed_dict)

        # Record objectives
        feed_dict[tf_states]          = test_set
        feed_dict[tf_roa]             = test_labels
#         feed_dict[tf_weights], counts = confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = class_weights(feed_dict[tf_roa].astype(bool))

        results = session.run([tf_classifier_loss, tf_decrease_loss], feed_dict)
        loss_class.append(results[0].mean())
        loss_dec.append(results[1].mean())
        obj.append(loss_class[-1] + feed_dict[tf_lagrange_multiplier] * loss_dec[-1])

    lyapunov_nn.update_values()
    lyapunov_nn.update_safe_set()
    roa_estimate |= lyapunov_nn.safe_set
    safe_size.append(lyapunov_nn.safe_set.sum() / grid.nindex)
    c_max.append(lyapunov_nn.feed_dict[lyapunov_nn.c_max])
    
    print(counts)
    print('c_max: {}'.format(c_max[-1]))
    print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(safe_size[-1] * grid.nindex), 
                                                                         100 * safe_size[-1], 
                                                                         100 * safe_size[-1] * roa.size / roa.sum()))


### Neural Network: Results

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3), dpi=OPTIONS.dpi)

ax.plot(loss_class, '.-r')
ax.plot(feed_dict[tf_lagrange_multiplier] * np.asarray(loss_dec), '.-b')

ax.set_xlabel(r'SGD iteration (accumulated)')
ax.set_ylabel(r'Training loss')
# ax.set_xticks(list(range(0, len(loss_class) + 1, inner_iters)))

proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in ['red', 'blue']]    
legend = ax.legend(proxy, ['Classification loss', 'Lyapunov decrease loss'], loc='upper right', fontsize=8)
legend.get_frame().set_alpha(0.5)

plt.show()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3), dpi=OPTIONS.dpi)
roa_fraction = roa.sum() / roa.size

ax.plot(c_max, '.-r')
ax.set_ylabel(r'$c_k$')
ax.tick_params('y', colors='r')
# ax.set_ylim([0, 1])

ax.set_xlabel(r'Safe set update iteration $k$')
# ax.set_xticks(list(range(0, len(c_max) + 1, 1)))

ax = ax.twinx()
ax.plot(np.array(safe_size) / roa_fraction, '.-b')
ax.set_ylabel(r'$|\mathcal{V}(c_k) \cap \mathcal{X}_\tau|\ /\ |\mathcal{R} \cap \mathcal{X}_\tau|$')
ax.tick_params('y', colors='b')
ax.set_ylim([0, 1])

plt.show()

print(np.array(safe_size) / roa_fraction)

### Neural Network: Visualization with Phase Portrait and ROA

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=OPTIONS.dpi)
ax.set_aspect(theta_max / omega_max)
ax.set_xlim(plot_limits[0])
ax.set_ylim(plot_limits[1])
    
# True ROA
z = roa.reshape(grid.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=theta_max / omega_max, cmap=binary_cmap('blue', 0.5), vmin=0)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='darkgreen', linestyles='dashed')

# Safe set
z = lyapunov_nn.safe_set.reshape(grid.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=theta_max / omega_max, cmap=binary_cmap('blue', 1), vmin=0)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='blue')

# Decrease region
z = tf_dv_nn.eval({tf_states: grid.all_points}).reshape(grid.num_points) < 0
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=theta_max / omega_max, cmap=binary_cmap('blue', 0.3), vmin=0)
# im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), cmap=ListedColormap(['blue']), levels=0, linestyles='dashed')

# Estimated ROA
alpha = 0.5
cmap = ListedColormap([(1., 1., 1., 0.), (1., 0., 0., alpha), (0., 1., 0., alpha)])
# z = tf_negative.eval({tf_states: grid.all_points}).reshape(grid.num_points)
z = roa_estimate.astype(int)
z[idx_visited] += 1
z = z.reshape(grid.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=theta_max / omega_max, cmap=cmap, vmin=0)
# im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), cmap=ListedColormap(['blue', 'red', 'green']))

# Neural-network level sets
z = tf_values_nn.eval({tf_states: grid.all_points}).reshape(grid.num_points)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='darkviolet', levels=1, linestyles='dashed')
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='darkviolet', 
                levels=2 * lyapunov_nn.feed_dict[lyapunov_nn.c_max], linestyles='dashed')

# SOS Lyapunov function
z = lyapunov_sos.safe_set.reshape(grid.num_points)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='tomato', linestyles='dashed')

# LQR Lyapunov function
z = lyapunov_lqr.safe_set.reshape(grid.num_points)
im = ax.contour(z.T, origin='lower', extent=plot_limits.ravel(), colors='hotpink', linestyles='dashed')

# # Trajectories
for n in range(sub_trajectories.shape[0]):
    x = sub_trajectories[n, 0, :] * np.rad2deg(theta_max)
    y = sub_trajectories[n, 1, :] * np.rad2deg(omega_max)
    ax.plot(x, y, 'k--', linewidth=0.25)
# sub_states = grid.all_points[sub_idx]
# dx_dt = (dynamics(sub_states) - sub_states) / dt
# dx_dt = dx_dt / np.linalg.norm(dx_dt, ord=2, axis=1, keepdims=True)
# ax.quiver(sub_states[:, 0] * theta_max, sub_states[:, 1] * omega_max, dx_dt[:, 0], dx_dt[:, 1], 
#           scale=None, pivot='mid', headwidth=3, headlength=6, color='k')

plt.show()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=OPTIONS.dpi)
ax.set_aspect(theta_max / omega_max)
ax.set_xlim(plot_limits[0])
ax.set_ylim(plot_limits[1])
    
a = tf_dv_nn.eval({tf_states: grid.all_points}).reshape(grid.num_points)
b = lyapunov_nn.threshold(tf_states, state_dim / 2000).eval({tf_states: grid.all_points}).reshape(grid.num_points)

z = a - b < 0
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=theta_max / omega_max, cmap=binary_cmap('blue', 0.3), vmin=0)

## Save Model (FOR PAPER ONLY)

In [None]:
# saver       = tf.train.Saver(var_list=lyapunov_nn.lyapunov_function.parameters)
# ckpt_prefix = "./tf_checkpoints/pendulum"
# saver.save(session, ckpt_prefix)

# var_list = [grid, tau, initial_safe_set, roa, lagrange_multiplier, obj, loss_class, loss_dec, 
#             roa_estimate, idx_visited, c_max, safe_size, sub_trajectories, sub_states, dx_dt]

# # Saving the objects:
# with open(ckpt_prefix + '.pkl', 'wb') as file:
#     pickle.dump(var_list, file)
