# Learning a Lyapunov Function for an Inverted Pendulum

In [None]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
import gpflow
import safe_learning
import matplotlib.pyplot as plt
import time
import pandas
import mosek
import cvxpy as cvx
import os

from scipy.linalg import block_diag
from utilities import InvertedPendulum, debug, LyapunovNetwork
from safe_learning.utilities import get_storage, set_storage
from tqdm import tqdm
from tensorflow.python.client import timeline
from matplotlib.colors import ListedColormap
from matplotlib.font_manager import FontProperties

%matplotlib inline

# TODO testing ****************************************#
class Options(object):
    def __init__(self, **kwargs):
        super(Options, self).__init__()
        self.__dict__.update(kwargs)

OPTIONS = Options(np_dtype               = safe_learning.config.np_dtype,
                  tf_dtype               = safe_learning.config.dtype,
                  saturate               = True,
                  eps                    = 1e-8,
                  use_linear_dynamics    = False,
                  dpi                    = 150,
                  fontproperties         = FontProperties(size=10),
                  save_figs              = False,
                  use_bad_lyapunov_start = True,
                  use_zero_threshold     = True,
                  fig_path               = 'figures/pendulum_lyapunov/')
#******************************************************#
    
NP_DTYPE = safe_learning.config.np_dtype
TF_DTYPE = safe_learning.config.dtype
_STORAGE = {}
EPS = 1e-8

HEAT_MAP = plt.get_cmap('inferno', lut=None)
HEAT_MAP.set_over('white')
HEAT_MAP.set_under('black')

LEVEL_MAP = plt.get_cmap('viridis', lut=21)
LEVEL_MAP.set_over('gold')
LEVEL_MAP.set_under('white')

BINARY_MAP = ListedColormap([(1., 1., 1., 0.), (0., 1., 0., 1)])

def binary_cmap(color='red', alpha=1.):
    if color=='red':
        color_code = (1., 0., 0., alpha)
    elif color=='green':
        color_code = (0., 1., 0., alpha)
    elif color=='blue':
        color_code = (0., 0., 1., alpha)
    else:
        color_code = color
    transparent_code = (1., 1., 1., 0.)
    return ListedColormap([transparent_code, color_code])

pandas.options.display.float_format = '{:,.4f}'.format
pandas.set_option('expand_frame_repr', False)
np.set_printoptions(precision=4)

plt.rc('font', size=5)

## TensorFlow Session

In [None]:
MAX_CPU_COUNT = os.cpu_count()
NUM_CORES = 8
NUM_SOCKETS = 2

os.environ["KMP_BLOCKTIME"]    = str(0)
os.environ["KMP_SETTINGS"]     = str(1)
os.environ["KMP_AFFINITY"]     = 'granularity=fine,noverbose,compact,1,0'
os.environ["OMP_NUM_THREADS"]  = str(NUM_CORES)

config = tf.ConfigProto(intra_op_parallelism_threads  = NUM_CORES,
                        inter_op_parallelism_threads  = NUM_SOCKETS,
                        allow_soft_placement          = False,
#                         log_device_placement          = True,
                        device_count                  = {'CPU': MAX_CPU_COUNT},
                       )

# TODO manually for CPU-only?
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession(config=config)

# print('Found MAX_CPU_COUNT =', MAX_CPU_COUNT)
# for dev in session.list_devices():
#     print(dev)

## Debug Flags

In [None]:
# Saturate the action so that it lies in [-1, 1]
SATURATE = True

# Use the true physical parameters in the GP model
USE_TRUE_PARAMETERS = False

# Use the linearized discrete-time model as the true underlying dynamics
USE_LINEAR_DYNAMICS = False

#
USE_LIPSCHITZ_SCALING = True


In [None]:
def tf_function_compose(tf_input, tf_function, num_compositions, output_name='function_composition', **kwargs):
    '''Apply a function multiple times to the input.'''
    
    def body(intermediate, idx):
        intermediate = tf_function(intermediate, **kwargs)
        idx = idx + 1
        return intermediate, idx

    def condition(rollout, states, idx):
        return idx < num_compositions

    initial_idx = tf.constant(0, dtype=TF_DTYPE)
    initial_intermediate = tf_input
    shape_invariants = [initial_intermediate.get_shape(), initial_idx.get_shape()]
    tf_output, _ = tf.while_loop(condition, body, [initial_intermediate, initial_idx], shape_invariants, name=output_name)
    
    return tf_output

## Dynamics

In [None]:
# Constants
dt = 0.01   # sampling time
g = 9.81    # gravity

# True system parameters
m = 0.15    # pendulum mass
L = 0.5     # pole length
b = 0.1     # rotational friction

# State and action normalizers
theta_max = np.deg2rad(120)
omega_max = np.sqrt(g / L)
u_max = g * m * L * np.sin(theta_max)

state_norm = (theta_max, omega_max)
action_norm = (u_max, )

# Constraints for initial 'safe' states
theta_safe = 0.2 * theta_max
omega_safe = 0.5 * omega_max

# Dimensions and domains
state_dim = 2
action_dim = 1
state_limits = np.array([[-1., 1.]]*state_dim)
action_limits = np.array([[-1., 1.]]*action_dim)

# True system
pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])
A, B = pendulum.linearize()

if USE_LINEAR_DYNAMICS:
    dynamics = safe_learning.functions.LinearSystem((A, B), name='true_dynamics')
else:
    dynamics = pendulum.__call__

## State Discretization

In [None]:
# Number of states along each dimension
num_states = 501

# State grid
grid_limits = np.array([[-1., 1.], [-1., 1.]])
state_discretization = safe_learning.GridWorld(grid_limits, num_states)

# Discretization constant
if OPTIONS.use_zero_threshold:
    tau = 0.0
else:
    tau = np.sum(state_discretization.unit_maxes) / 2

print('Grid size: {}'.format(state_discretization.nindex))
print('Discretization constant: {}'.format(tau))

## Cost Function

In [None]:
# State cost matrix
Q = np.diag([0.1, 0.1])

# Action cost matrix
R = 0.1 * np.identity(action_dim)

# Normalize cost matrices
cost_norm = np.amax([Q.max(), R.max()])
Q = Q / cost_norm
R = R / cost_norm

# Quadratic cost function
cost_function = safe_learning.QuadraticFunction(block_diag(Q, R), name='cost_function')

## Policy

In [None]:
# Fix policy to the LQR solution for the true system
K, P = safe_learning.utilities.dlqr(A, B, Q, R)
policy = safe_learning.LinearSystem(-K, name='policy')

if SATURATE:
    policy = safe_learning.Saturation(policy, -1, 1)

    
def plot_policy(policy, discretization, state_norm=None):
    plt.rc('font', size=5)
    fig, ax = plt.subplots(1, 1, figsize=(3, 2), dpi=300)
    ticks = np.linspace(-1., 1., 9)
    cutoff = 1. - 1e-10

    if state_norm is not None:
        theta_max, omega_max = state_norm
        scale = np.array([np.rad2deg(theta_max), np.rad2deg(omega_max)]).reshape((-1, 1))
        limits = scale * discretization.limits
    else:
        limits = discretization.limits

    z = policy(discretization.all_points).eval()
    z = z.reshape(discretization.num_points)
    im = ax.imshow(z.T, origin='lower', extent=limits.ravel(), aspect=limits[0, 0] / limits[1, 0], cmap=HEAT_MAP, vmin=-cutoff, vmax=cutoff)
    cbar = fig.colorbar(im, ax=ax, label=r'$u = \pi(x)$', ticks=ticks)
    ax.set_xlabel(r'$\phi$ [deg]')
    ax.set_ylabel(r'$\dot{\phi}$ [deg/s]')
    plt.show()

    
# Visualize policy
# plot_policy(policy, state_discretization, state_norm)

## LQR Lyapunov Candidate

In [None]:
if OPTIONS.use_bad_lyapunov_start:
    P = np.eye(state_dim)

# Define the Lyapunov function corresponding to the known policy
lyapunov_function = safe_learning.QuadraticFunction(P)
grad_lyapunov_function = safe_learning.LinearSystem((2*P,))

# initial_safe_set = np.all(state_discretization.all_points == 0.0, axis=1)
values = session.run(lyapunov_function(state_discretization.all_points))
cutoff = 6e-2 * np.max(values)
initial_safe_set = np.squeeze(values, axis=1) <= cutoff

# Scaling
lyapunov_function = safe_learning.QuadraticFunction(P / np.max(values))
grad_lyapunov_function = safe_learning.LinearSystem((2 * P / np.max(values),))

# Lipschitz constants
L_pol = lambda s: tf.constant(np.linalg.norm(-K, 1), dtype=TF_DTYPE)
L_dyn = lambda s: np.linalg.norm(A, 1) + np.linalg.norm(B, 1)*L_pol(s)

if USE_LIPSCHITZ_SCALING:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)

# Initialize class
lyapunov_lqr = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)

In [None]:
# Compare safe set before and after checking the decrease condition for the first time
c_max = lyapunov_lqr.feed_dict[lyapunov_lqr.c_max]
init_safe_set_size = np.sum(lyapunov_lqr.safe_set)

print('Before update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}\n'.format(init_safe_set_size))
debug(lyapunov_lqr, dynamics, state_norm, plot='pendulum')

# old_safe_set = np.copy(lyapunov_lqr.safe_set)
# lyapunov_lqr.update_safe_set()

# c_max = lyapunov_lqr.feed_dict[lyapunov_lqr.c_max]
# init_safe_set_size = np.sum(lyapunov_lqr.safe_set)

# print('After update ...')
# print('c_max: {}'.format(c_max))
# print('Safe set size: {}'.format(init_safe_set_size))
# debug(lyapunov_lqr, true_dynamics, state_norm, plot='pendulum')

In [None]:
lyapunov_lqr.update_values()
lyapunov_lqr.update_safe_set()

## Neural Network Lyapunov Candidate

In [None]:
layer_dims = [64, 64]
activations = [tf.tanh, tf.tanh]

# leaky = lambda x, name: tf.nn.leaky_relu(x, 0.3, name)
# activations = [leaky, leaky]

lyapunov_function = LyapunovNetwork(state_dim, layer_dims, activations, OPTIONS.eps)

grad_lyapunov_function = lambda s: tf.gradients(lyapunov_function(s), s)[0]
if USE_LIPSCHITZ_SCALING:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keepdims=True)

# TODO need to use template before variables exist in the graph
tf_states = tf.placeholder(TF_DTYPE, shape=[None, state_dim], name='states')
temp = lyapunov_function(tf_states)
session.run(tf.variables_initializer(lyapunov_function.parameters))

lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, L_dyn, L_v, tau, policy, initial_safe_set)

In [None]:
# count = 0
# for p in lyapunov_function.parameters:
#     count += p.shape[0].value * p.shape[1].value
# print(count)

## TensorFlow Graph

In [None]:
storage = get_storage(_STORAGE)
if storage is None:
    tf_states = tf.placeholder(TF_DTYPE, shape=[None, state_dim], name='states')
    tf_actions = policy(tf_states)
    tf_future_states = dynamics(tf_states, tf_actions)
    
    tf_values_lqr = lyapunov_lqr.lyapunov_function(tf_states)
    tf_future_values_lqr = lyapunov_lqr.lyapunov_function(tf_future_states)
    tf_dv_lqr = tf_future_values_lqr - tf_values_lqr

    tf_values = lyapunov.lyapunov_function(tf_states)
    tf_future_values = lyapunov.lyapunov_function(tf_future_states)
    tf_dv = tf_future_values - tf_values
    
    tf_threshold = lyapunov.threshold(tf_states, lyapunov.tau)
    tf_negative = tf.squeeze(tf.less(tf_dv, tf_threshold), axis=1)
    
    storage = [('states', tf_states), 
               ('future_states', tf_future_states), 
               ('values_lqr', tf_values_lqr), 
               ('values', tf_values), 
               ('future_values_lqr', tf_future_values_lqr), 
               ('future_values', tf_future_values),
               ('dv_lqr', tf_dv_lqr),
               ('dv', tf_dv),
               ('threshold', tf_threshold), 
               ('negative', tf_negative)]
    set_storage(_STORAGE, storage)
else:
    (tf_states, tf_future_states, tf_values_lqr, tf_values, tf_future_values_lqr, tf_future_values, 
     tf_dv_lqr, tf_dv, tf_threshold, tf_negative)  = storage.values()

## True Region of Attraction

In [None]:
def gridify(norms, maxes=None, num_points=25):    
    norms = np.asarray(norms).ravel()
    if maxes is None:
        maxes = norms
    else:
        maxes = np.asarray(maxes).ravel()
    limits = np.column_stack((- maxes / norms, maxes / norms))
    
    if isinstance(num_points, int):
        num_points = [num_points, ] * len(norms)
    grid = safe_learning.GridWorld(limits, num_points)
    return grid


def compute_roa(grid, closed_loop_dynamics, horizon=250, tol=1e-3, equilibrium=None, no_traj=True):
    if isinstance(grid, np.ndarray):
        all_points = grid
        nindex = grid.shape[0]
        ndim = grid.shape[1]
    else:
        all_points = grid.all_points
        nindex = grid.nindex
        ndim = grid.ndim
    
    # Forward-simulate all trajectories from initial points in the discretization
    if no_traj:
        end_states = all_points
        for t in range(1, horizon):
            end_states = closed_loop_dynamics(end_states)
    else:
        trajectories = np.empty((nindex, ndim, horizon))
        trajectories[:, :, 0] = all_points
        for t in range(1, horizon):
            trajectories[:, :, t] = closed_loop_dynamics(trajectories[:, :, t - 1])
        end_states = trajectories[:, :, -1]
            
    if equilibrium is None:
        equilibrium = np.zeros((1, ndim))
    
    # Compute an approximate ROA as all states that end up "close" to 0
    dists = np.linalg.norm(end_states - equilibrium, ord=2, axis=1, keepdims=True).ravel()
    roa = (dists <= tol)
    if no_traj:
        return roa, dists
    else:
        return roa, dists, trajectories


norms = np.rad2deg(state_norm)
maxes = np.copy(norms)
plot_limits = np.column_stack((- maxes, maxes))
# N = 501
# grid = gridify(norms, maxes, N)
grid = lyapunov.discretization

closed_loop_dynamics = lambda x: tf_future_states.eval({tf_states: x})
horizon = 500
tol = 0.1

roa, dists, trajectories = compute_roa(grid, closed_loop_dynamics, horizon, tol, no_traj=False)


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)
ax.set_aspect(maxes[0] / maxes[1])
ax.set_xlim(plot_limits[0])
ax.set_ylim(plot_limits[1])
ax.set_xlabel(r'$\phi$ [deg]', fontproperties=OPTIONS.fontproperties)
ax.set_ylabel(r'$\dot{\phi}$ [deg/s]', fontproperties=OPTIONS.fontproperties)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
    label.set_fontproperties(OPTIONS.fontproperties)
    
# ROA
z = roa.reshape(grid.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('green'), vmin=0)

# Sub-sample discretization for faster and clearer plotting
N_traj = 14
skip = int(lyapunov.discretization.num_points[0] / N_traj)
sub_idx = np.arange(grid.nindex).reshape(grid.num_points)
sub_idx = sub_idx[::skip, ::skip].ravel()
sub_trajectories = trajectories[sub_idx, :, :]

# Trajectories
for n in range(sub_trajectories.shape[0]):
    theta = sub_trajectories[n, 0, :] * norms[0]
    omega = sub_trajectories[n, 1, :] * norms[1]
    ax.plot(theta, omega, 'k--', linewidth=0.6)
sub_states = grid.all_points[sub_idx]
dx_dt = (tf_future_states.eval({tf_states: sub_states}) - sub_states) / dt
dx_dt = dx_dt / np.linalg.norm(dx_dt, ord=2, axis=1, keepdims=True)
ax.quiver(sub_states[:, 0] * norms[0], sub_states[:, 1] * norms[1], dx_dt[:, 0], dx_dt[:, 1], 
          scale=None, pivot='mid', headwidth=4, headlength=8, color='k')

plt.show()

### Neural Network: Supervised Training with LQR Solution

In [None]:
with tf.name_scope('supervised_lyapunov_learning'):
    tf_costs = tf.abs(tf_values_lqr - tf_values) / tf.stop_gradient(tf_values_lqr + EPS)
    tf_objective = tf.reduce_mean(tf_costs, name='objective')
    
    tf_learning_rate = tf.placeholder(TF_DTYPE, shape=[], name='learning_rate')
    optimizer = tf.train.GradientDescentOptimizer(tf_learning_rate)
    lyapunov_update = optimizer.minimize(tf_objective, var_list=lyapunov.lyapunov_function.parameters)

session.run(tf.variables_initializer(lyapunov_function.parameters))
lyapunov.update_values()
lyapunov.update_safe_set()
debug(lyapunov, dynamics, state_norm, plot='pendulum')

obj = []
level_states = lyapunov_lqr.discretization.all_points[lyapunov.initial_safe_set]

In [None]:
# Training batch from level set
tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
tf_batch = tf.random_uniform([tf_batch_size, ], 0, level_states.shape[0], dtype=tf.int32, name='batch_sample')

# Test set
test_size = int(1e3)
idx = tf_batch.eval({tf_batch_size: int(1e3)})
test_set = level_states[idx, :]
    
feed_dict = {
    tf_states:         level_states,
    tf_learning_rate:  1e-3,
    tf_batch_size:     int(1e3),
}
max_iters = 200

for i in tqdm(range(max_iters)):
    idx = tf_batch.eval(feed_dict)
    feed_dict[tf_states] = level_states[idx, :]
    session.run(lyapunov_update, feed_dict)

    feed_dict[tf_states] = test_set
    obj.append(tf_objective.eval(feed_dict))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 2), dpi=OPTIONS.dpi)
ax.set_xlabel(r'iteration')
ax.set_ylabel(r'objective')
ax.plot(obj, '.-r')

plt.show()

In [None]:
values, values_lqr, dv, dv_lqr = session.run([tf_values, tf_values_lqr, tf_dv, tf_dv_lqr], {tf_states: grid.all_points})
value_max = np.amax([values.max(), values_lqr.max()])
value_min = 0.0

fig, axes = plt.subplots(2, 2, figsize=(7, 6), dpi=OPTIONS.dpi)
fig.subplots_adjust(wspace=0.3, hspace=0.2)
for ax in axes.ravel():
    ax.set_xlabel(r'$\phi$ [deg]')
    ax.set_ylabel(r'$\dot{\phi}$ [deg/s]')

z = values_lqr.reshape(grid.num_points)
ax = axes[0, 0]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=LEVEL_MAP, vmin=value_min, vmax=value_max)  
ax.set_title('LQR Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')

z = values.reshape(grid.num_points)
ax = axes[1, 0]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=LEVEL_MAP, vmin=value_min, vmax=value_max)   
ax.set_title('NN Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')

z = dv_lqr.reshape(grid.num_points)
ax = axes[0, 1]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=HEAT_MAP, vmax=0.0)   
ax.set_title('LQR Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')

z = dv.reshape(grid.num_points)
ax = axes[1, 1]
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=HEAT_MAP, vmax=0.0)   
ax.set_title('NN Lyapunov function')
cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')

plt.show()

In [None]:
lyapunov.update_values()
lyapunov.update_safe_set()

print('c_max: {}'.format(lyapunov.feed_dict[lyapunov.c_max]))
print('safe set size: {}'.format(lyapunov.safe_set.sum()))
debug(lyapunov, dynamics, state_norm, plot='pendulum')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=OPTIONS.dpi)
ax.set_aspect(maxes[0] / maxes[1])
ax.set_xlim(plot_limits[0])
ax.set_ylim(plot_limits[1])
ax.set_xlabel(r'$\phi$ [deg]', fontproperties=OPTIONS.fontproperties)
ax.set_ylabel(r'$\dot{\phi}$ [deg/s]', fontproperties=OPTIONS.fontproperties)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
    label.set_fontproperties(OPTIONS.fontproperties)
    
# ROA
z = roa.reshape(grid.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('green'), vmin=0)

# Estimated safe level set
z = (values_lqr <= lyapunov_lqr.feed_dict[lyapunov_lqr.c_max]).reshape(grid.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('red'), vmin=0)   

# Sub-sample discretization for faster and clearer plotting
N_traj = 14
skip = int(lyapunov.discretization.num_points[0] / N_traj)
sub_idx = np.arange(grid.nindex).reshape(grid.num_points)
sub_idx = sub_idx[::skip, ::skip].ravel()
sub_trajectories = trajectories[sub_idx, :, :]

# Trajectories
for n in range(sub_trajectories.shape[0]):
    theta = sub_trajectories[n, 0, :] * norms[0]
    omega = sub_trajectories[n, 1, :] * norms[1]
    ax.plot(theta, omega, 'k--', linewidth=0.6)
sub_states = grid.all_points[sub_idx]
dx_dt = (tf_future_states.eval({tf_states: sub_states}) - sub_states) / dt
dx_dt = dx_dt / np.linalg.norm(dx_dt, ord=2, axis=1, keepdims=True)
ax.quiver(sub_states[:, 0] * norms[0], sub_states[:, 1] * norms[1], dx_dt[:, 0], dx_dt[:, 1], 
          scale=None, pivot='mid', headwidth=4, headlength=8, color='k')

proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0., 1., 0., 1), (1., 0., 0., 1)]]    
legend = ax.legend(proxy, [r'$\mathcal{R}$', r'$\mathcal{V}\!\ (c_0)$'], prop=OPTIONS.fontproperties, loc='upper right')
legend.get_frame().set_alpha(1.)

plt.show()


if OPTIONS.save_figs:
    if OPTIONS.use_bad_lyapunov_start:
        save_path = OPTIONS.fig_path + 'pendulum_initial_roa_badstart.pdf'
    else:
        save_path = OPTIONS.fig_path + 'pendulum_initial_roa.pdf'
    fig.savefig(save_path, bbox_inches='tight')

In [None]:
def balanced_confusion_weights(y, y_true, scale_by_total=True):
    y = y.astype(np.bool)
    y_true = y_true.astype(np.bool)
    
    # Assuming labels in {0, 1}, count entries from confusion matrix
    TP = ( y &  y_true).sum()
    TN = (~y & ~y_true).sum()
    FP = ( y & ~y_true).sum()
    FN = (~y &  y_true).sum()
    confusion_counts = np.array([[TN, FN], [FP, TP]])
    
    # Scale up each sample by inverse of confusion weight
    weights = np.ones_like(y, dtype=float)
    weights[ y &  y_true] /= TP
    weights[~y & ~y_true] /= TN
    weights[ y & ~y_true] /= FP
    weights[~y &  y_true] /= FN
    if scale_by_total:
        weights *= y.size
    
    return weights, confusion_counts


def balanced_class_weights(y_true, scale_by_total=True):
    y = y_true.astype(np.bool)
    nP = y.sum()
    nN = y.size - y.sum()
    class_counts = np.array([nN, nP])
    
    weights = np.ones_like(y, dtype=float)
    weights[ y] /= nP
    weights[~y] /= nN
    if scale_by_total:
        weights *= y.size
    
    return weights, class_counts
    

### Neural Network: Training

In [None]:
# Save checkpoint for neural net weights
saver = tf.train.Saver(var_list=lyapunov.lyapunov_function.parameters)
ckpt_path = saver.save(session, "/tmp/spencerr_pendulum_lyapunov.ckpt")

In [None]:
with tf.name_scope('roa_classification'):
    # Current maximum level set we want to push the ROA in to
    tf_level_multiplier = tf.placeholder(TF_DTYPE, shape=[], name='level_multiplier')
    tf_c_max = tf.placeholder(TF_DTYPE, shape=[], name='c_max')
    
    # True class labels, converted from Boolean ROA labels {0, 1} to {-1, 1}
    tf_weights = tf.placeholder(TF_DTYPE, shape=[None, 1], name='class_weights')
    tf_roa = tf.placeholder(TF_DTYPE, shape=[None, 1], name='labels')
    tf_labels = 2 * tf_roa - 1

    # Construct classifier with output (-1, 1)
#     tf_classifier_output = tf.tanh(100 * (tf_c_max - tf_values))
#     tf_classifier_output = (tf_c_max - tf_values) / (tf.abs(tf_c_max - tf_values) + OPTIONS.eps)
    tf_classifier_output = tf_c_max - tf_values
    
    # Use hinge or perceptron loss for the classification performance
    tf_classifier_loss = tf_weights * tf.maximum(- tf_labels * tf_classifier_output, 0, name='hinge_loss')
#     tf_classifier_loss = tf_weights * tf.log(1 + tf.exp(- tf_labels * tf_classifier_output))
    
        
    # Enforce decrease constraint with Lagrangian relaxation
    tf_lagrange_multiplier = tf.placeholder(TF_DTYPE, shape=[], name='lagrange_multiplier')
    tf_decrease_loss = tf_roa * tf.maximum((tf_dv - tf_threshold) / tf.stop_gradient(tf_values + OPTIONS.eps), 0)
#     tf_decrease_loss = tf_roa * tf.log(1 + tf.exp(- tf_dv / tf.stop_gradient(tf_values + OPTIONS.eps)))
    
    # Construct objective and optimizer
    tf_objective = tf.reduce_mean(tf_classifier_loss + tf_lagrange_multiplier * tf_decrease_loss, name='objective')
    tf_learning_rate = tf.placeholder(TF_DTYPE, shape=[], name='learning_rate')
    tf_epsilon = tf.placeholder(TF_DTYPE, shape=[], name='adam_epsilon')
    optimizer = tf.train.GradientDescentOptimizer(tf_learning_rate)
#     optimizer = tf.train.AdamOptimizer(tf_learning_rate, epsilon=tf_epsilon)
    training_update = optimizer.minimize(tf_objective, var_list=lyapunov.lyapunov_function.parameters)
    

with tf.name_scope('sampling'):
    tf_batch_size = tf.placeholder(tf.int32, [], 'batch_size')
    tf_idx_range = tf.placeholder(tf.int32, shape=[], name='indices_to_sample')
    tf_idx_batch = tf.random_uniform([tf_batch_size, ], 0, tf_idx_range, dtype=tf.int32, name='batch_sample')

In [None]:
# Restore checkpoint
saver.restore(session, ckpt_path)
lyapunov.update_values()
lyapunov.update_safe_set()
session.run(tf.variables_initializer(optimizer.variables()))  # TODO

obj          = []
loss_class   = []
loss_dec     = []
roa_estimate = np.copy(lyapunov.safe_set)

c_max = [lyapunov.feed_dict[lyapunov.c_max], ]
safe_size = [lyapunov.safe_set.sum() / lyapunov.discretization.nindex, ]
iters_to_converge = []
grid = lyapunov.discretization

In [None]:
outer_iters = 3
inner_iters = 15
tol         = 1e-8
horizon     = 50
batch_size  = int(1e2)
test_size   = int(1e4)

feed_dict = {
    tf_states:               np.zeros((1, lyapunov.discretization.ndim)), # placeholder
    tf_batch_size:           batch_size,
#     tf_c_max:                lyapunov.feed_dict[lyapunov.c_max],
    tf_c_max:                1.,
    tf_lagrange_multiplier:  1000,
    tf_idx_range:            1,
    #
    tf_learning_rate:        1e-2,
    tf_epsilon:              1e-1,
    tf_level_multiplier:     3.,
}

In [None]:
print('Current metrics ...')
c = lyapunov.feed_dict[lyapunov.c_max]
num_safe = lyapunov.safe_set.sum()
print('c_max: {}'.format(c))
print('grid size: {}'.format(grid.nindex))
print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(num_safe), 100 * num_safe / grid.nindex, 100 * num_safe / roa.sum()))
print('')
time.sleep(0.5)

for i in range(outer_iters):
    
    c = lyapunov.feed_dict[lyapunov.c_max]
#     feed_dict[tf_c_max] = c
#     time.sleep(0.5)
    
    # Get states inside V(a * c_max), a > 1    
    idx_small = lyapunov.values.ravel() <= c
    idx_big = lyapunov.values.ravel() <= feed_dict[tf_level_multiplier] * c
    idx_gap = np.logical_and(idx_big, ~idx_small)
    
    #
    V_gap = grid.all_points[idx_gap]
    V_future = np.copy(V_gap)
    for _ in range(horizon):
        V_future = tf_future_states.eval({tf_states: V_future})
    V_future = tf_values.eval({tf_states: V_future})
    safe_in_future = (V_future <= c).ravel()
    
    roa_estimate[idx_gap] |= safe_in_future
    
    target_idx = np.logical_or(idx_big, roa_estimate)
    target_set = grid.all_points[target_idx]
    target_labels = roa_estimate[target_idx].astype(OPTIONS.np_dtype).reshape([-1, 1])
#     target_set = grid.all_points
#     target_labels = roa_estimate.astype(OPTIONS.np_dtype).reshape([-1, 1])
    feed_dict[tf_idx_range] = target_set.shape[0]
    
    # Test set
#     feed_dict[tf_batch_size] = test_size
#     idx_test = tf_idx_batch.eval(feed_dict)
#     test_set = target_set[idx_test]
#     test_labels = target_labels[idx_test]

    test_set = grid.all_points
    test_labels = roa.reshape([-1, 1])
    
    test_set = target_set
    test_labels = target_labels
    
    # SGD for classification
    converged = False
    feed_dict[tf_batch_size] = batch_size

    for j in tqdm(range(inner_iters)):
        # Training step
        idx_batch = tf_idx_batch.eval(feed_dict)
        feed_dict[tf_states] = target_set[idx_batch]
        feed_dict[tf_roa] = target_labels[idx_batch]
#         feed_dict[tf_weights], counts = balanced_confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))
        session.run(training_update, feed_dict=feed_dict)

        # Record objectives
        feed_dict[tf_states] = test_set
        feed_dict[tf_roa] = test_labels
#         feed_dict[tf_weights], counts = balanced_confusion_weights(tf_values.eval(feed_dict) <= feed_dict[tf_c_max], feed_dict[tf_roa].astype(bool))
        feed_dict[tf_weights], counts = balanced_class_weights(feed_dict[tf_roa].astype(bool))
    
        results = session.run([tf_classifier_loss, tf_decrease_loss], feed_dict)
        loss_class.append(results[0].mean())
        loss_dec.append(results[1].mean())
        obj.append(loss_class[-1] + feed_dict[tf_lagrange_multiplier] * loss_dec[-1])

        if obj[-1] < tol:
            converged = True
            break

    iters_to_converge.append(j + 1)
    if converged:
        print('Converged in {} iterations!'.format(j + 1))
    else:
        print('Did not converge!')

    print('Updating values ...')
    lyapunov.update_values()

    print('Updating c_max ...')
    lyapunov.update_safe_set()
    roa_estimate |= lyapunov.safe_set

    c_max.append(lyapunov.feed_dict[lyapunov.c_max])
    safe_size.append(lyapunov.safe_set.sum() / grid.nindex)
    print('Done!')
#     print(class_ratio)
    print(counts)
    print('c_max: {}'.format(c_max[-1]))
    print('grid size: {}'.format(grid.nindex))
    print('safe set size: {} ({:.2f}% of grid, {:.2f}% of ROA)\n'.format(int(safe_size[-1] * grid.nindex), 
                                                                         100 * safe_size[-1], 
                                                                         100 * safe_size[-1] * roa.size / roa.sum()))
    time.sleep(0.5)


### Neural Network: Results

In [None]:
plt.rc('font', size=6)
roa_fraction = roa.sum() / roa.size

#
fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=300)
ax.plot(loss_class, '.-r')
# ax.tick_params('y', colors='r')
# ax.set_ylim([None, 0.9])

ax.set_xlabel(r'SGD iteration (accumulated)')
ax.set_xticks(list(range(0, len(loss_class) + 1, inner_iters)))

# ax = ax.twinx()
ax.plot(feed_dict[tf_lagrange_multiplier] * np.asarray(loss_dec), '.-b')
# ax.tick_params('y', colors='b')
# ax.set_ylim([None, 0.0016])

ax.set_ylabel(r'Training loss')


proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in ['red', 'blue']]    
legend = ax.legend(proxy, ['Classification loss', 'Lyapunov decrease loss'], loc='upper right')
legend.get_frame().set_alpha(1.)

plt.show()

if OPTIONS.save_figs:
    if OPTIONS.use_bad_lyapunov_start:
        save_path = OPTIONS.fig_path + 'pendulum_training_loss_badstart.pdf'
    else:
        save_path = OPTIONS.fig_path + 'pendulum_training_loss.pdf'
    fig.savefig(save_path, bbox_inches='tight')
    

In [None]:
#
plt.rc('font', size=6)
fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=300)

ax.plot(c_max, '.-r')
ax.set_ylabel(r'$c_k$')
ax.tick_params('y', colors='r')
# ax.set_ylim([0, 1])

ax.set_xlabel(r'Safe set update iteration $k$')
ax.set_xticks(list(range(0, len(c_max) + 1, 1)))

ax = ax.twinx()
ax.plot(np.array(safe_size) / roa_fraction, '.-b')
ax.set_ylabel(r'$|\mathcal{V}(c_k) \cap \mathcal{X}_\tau|\ /\ |\mathcal{R} \cap \mathcal{X}_\tau|$')
ax.tick_params('y', colors='b')
# ax.set_ylim([0, 1])

plt.show()
    
if OPTIONS.save_figs:
    if OPTIONS.use_bad_lyapunov_start:
        save_path = OPTIONS.fig_path + 'pendulum_training_roafrac_badstart.pdf'
    else:
        save_path = OPTIONS.fig_path + 'pendulum_training_roafrac.pdf'
    fig.savefig(save_path, bbox_inches='tight')


print(np.array(safe_size) / roa_fraction)

### Neural Network: Visualization with Phase Portrait and ROA

In [None]:
grid = lyapunov.discretization
values, dv = session.run([tf_values, tf_dv], {tf_states: grid.all_points})
value_max = values.max()
value_min = 0.0

plt.rc('font', size=14)
fig, axes = plt.subplots(1, 2, figsize=(12, 6), dpi=OPTIONS.dpi)
fig.subplots_adjust(wspace=0.6, hspace=0.2)
for ax in axes.ravel():
    ax.set_xlabel(r'$\theta$ [deg]')
    ax.set_ylabel(r'$\omega$ [deg/s]')

# Change in v(x)
ax   = axes[0]
z    = dv.reshape(grid.num_points)
im   = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=HEAT_MAP, vmax=0.0)        
cbar = fig.colorbar(im, ax=ax, label=r'$v(f(x)) - v(x)$')

#
# temp = (dv < 0).astype(int)
# temp[dv >= 0] = -1

temp = (dv < 0) & (values <= 1)
temp = temp.astype(int)
temp[values > 1] = -1
temp[dv >= 0] = -1

ax = axes[1]

z    = values.reshape(grid.num_points) * temp.reshape(grid.num_points)
im   = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=LEVEL_MAP, vmin=value_min, vmax=value_max) 
cbar = fig.colorbar(im, ax=ax, label=r'$v(x)$')

z   = roa.reshape(lyapunov.discretization.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=BINARY_MAP, alpha=0.2)


# ROAs
fig, ax = plt.subplots(1, 1, figsize=(6, 6), dpi=OPTIONS.dpi)

# True ROA
z = roa.reshape(lyapunov.discretization.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('green'), alpha=1.)

# Estimated ROA
z = roa_estimate.reshape(lyapunov.discretization.num_points)
# im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('yellow'), alpha=1.)

# Safe level set
z = lyapunov.safe_set.reshape(lyapunov.discretization.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('blue'), alpha=1.)

# Initial ROA
# z = lyapunov.initial_safe_set.reshape(lyapunov.discretization.num_points)
z = lyapunov_lqr.safe_set.reshape(lyapunov.discretization.num_points)
im = ax.imshow(z.T, origin='lower', extent=plot_limits.ravel(), aspect=maxes[0] / maxes[1], cmap=binary_cmap('red'), alpha=1.)

# Trajectories
for n in range(sub_trajectories.shape[0]):
    theta = sub_trajectories[n, 0, :] * norms[0]
    omega = sub_trajectories[n, 1, :] * norms[1]
    ax.plot(theta, omega, 'k--', linewidth=0.6)
sub_states = grid.all_points[sub_idx]
dx_dt = (tf_future_states.eval({tf_states: sub_states}) - sub_states) / dt
dx_dt = dx_dt / np.linalg.norm(dx_dt, ord=2, axis=1, keepdims=True)
ax.quiver(sub_states[:, 0] * norms[0], sub_states[:, 1] * norms[1], 
          dx_dt[:, 0], dx_dt[:, 1],
          scale=None, pivot='mid', headwidth=4, headlength=8, color='k')
ax.set_xlim(plot_limits[0])
ax.set_ylim(plot_limits[1])

proxy = [plt.Rectangle((0,0), 1, 1, fc=c) for c in [(0,1,0,1), (1,0,0,1), (0,0,1,1)]]    
legend = ax.legend(proxy, [r'$\mathcal{R}$', r'$\mathcal{V}\!\ (c_0)$', r'$\mathcal{V}\!\ (c_k)$'], loc='upper right')
legend.get_frame().set_alpha(1.)

ax.set_xlabel(r'$\phi$ [deg]')
ax.set_ylabel(r'$\dot{\phi}$ [deg/s]')

plt.show()

if OPTIONS.save_figs:
    if OPTIONS.use_bad_lyapunov_start:
        save_path = OPTIONS.fig_path + 'pendulum_final_roa_badstart.pdf'
    else:
        save_path = OPTIONS.fig_path + 'pendulum_final_roa.pdf'
    fig.savefig(save_path, bbox_inches='tight')


In [None]:
c = lyapunov.feed_dict[lyapunov.c_max]
num_grid = lyapunov.discretization.nindex
num_safe = lyapunov.safe_set.sum()
num_safe_lqr = lyapunov_lqr.safe_set.sum()

print('c_max: {}'.format(c))
print('grid size: {}'.format(num_grid))
print('ROA size: {:.2f}%'.format(100 * roa.sum() / roa.size))
print('lqr safe set size: {} ({:.2f}%)'.format(num_safe_lqr, 100 * num_safe_lqr / num_grid))
print('nn safe set size: {} ({:.2f}%)'.format(num_safe, 100 * num_safe / num_grid))

# debug(lyapunov, dynamics, state_norm, plot='pendulum')
# debug(lyapunov_lqr, dynamics, state_norm, plot='pendulum')

print(num_safe_lqr / roa.sum())

## TODO

In [None]:
# lyapunov_lqr.update_safe_set()

# safe = lyapunov_lqr.safe_set
# # safe = np.logical_xor(lyapunov.safe_set, lyapunov.initial_safe_set)
# X = lyapunov_lqr.discretization.all_points[safe]
# Y = tf_future_states.eval({tf_states: X})

# w = cvx.Variable(state_dim, 1)
# c = cvx.Variable(1)

# obj = cvx.Maximize(c)
# constraints = [(Y - X) * w <= -EPS, X * w <= c]

# mosek_params = {mosek.dparam.ana_sol_infeas_tol:     1e-32,  # print if a constraint violates more than this, 1e-6
#                 mosek.dparam.intpnt_qo_tol_mu_red:   1e-12,  # relative complementarity gap feasibility tolerance, 1e-8
#                 mosek.iparam.ana_sol_print_violated: True,
#                }

# prob = cvx.Problem(obj, constraints)
# result = prob.solve(solver=cvx.MOSEK,
#                     verbose=True,
#                     warm_start=False,
#                     mosek_params=mosek_params)

# print("\nStatus:", prob.status)
# print("Optimal objective value:", prob.value)
# print("Optimal variable value:\n", w.value, c.value)

In [None]:
# # Identify shape matrix of suitable Lyapunov function
# print(P / lyapunov.feed_dict[lyapunov.c_max])

# def phi(X):
#     num_samples = X.shape[0]
#     Phi = np.hstack((np.ones([num_samples, 1]), X))
#     return Phi

# def phi(X):
#     Phi = X
#     return Phi

# # CVXPY 1.0
# # def quadratic(X, P, convention='cvx'):
# #     if convention=='cvx':
# #         linear_form = X * P
# #         quadratic = cvx.multiply(linear_form, X)
# #         result = cvx.sum(quadratic, axis=1, keepdims=True)
# #     else:
# #         linear_form = np.matmul(X, P)
# #         quadratic = linear_form * X
# #         result = np.sum(quadratic, axis=1, keepdims=True)
# #     return result

# # CVXPY 0.4
# def quadratic(X, P, convention='cvx'):
#     if convention=='cvx':
#         linear_form = X * P
#         quadratic = cvx.mul_elemwise(X, linear_form)
#         result = cvx.sum_entries(quadratic, axis=1)
#     else:
#         linear_form = np.matmul(X, P)
#         quadratic = np.multiply(linear_form, X)
#         result = np.sum(np.array(quadratic), axis=1, keepdims=True)
#     return result

# # Enforce decrease condition, excluding the initial safe set
# # safe = lyapunov.safe_set
# safe = np.logical_xor(lyapunov.safe_set, lyapunov.initial_safe_set)
# X = lyapunov.discretization.all_points[safe, :]

# # remove zero-state
# idx = ~np.all(X == 0, axis=1)
# X_full = X[idx]
# F_full = session.run(lyapunov.dynamics(tf_states, tf_actions), {tf_states: X_full})

# # Try only considering states near the boundary
# V = tf_values.eval({tf_states: X_full}).ravel()
# cutoff = 0.*V.max()
# X = X_full[V >= cutoff, :]
# F = F_full[V >= cutoff, :]

# print(X_full.shape)
# print(X.shape)

# X = phi(X)
# F = phi(F)
# n = X.shape[1]

# # M = cvx.Variable(n, n, PSD=True)  # CVXPY 1.0
# M = cvx.Semidef(n)                # CVXPY 0.4

# eps_mat = 1e-6
# eps_con = 1e-8

# obj = cvx.Minimize(cvx.trace(M))
# # obj = cvx.Maximize(- cvx.log(cvx.trace(M)))
# # obj = cvx.Maximize(- cvx.log_det(M))
# # obj = cvx.Minimize(- cvx.log_det(M))
# # print(obj.is_dcp())

# # constraints = [quadratic(F, M) - quadratic(X, M) <= -eps_con,
# #                M >> eps_mat]

# constraints = [quadratic(F, M) - quadratic(X, M) <= -eps_con,
#                cvx.lambda_min(M) >= eps_mat]

# mosek_params = {mosek.dparam.ana_sol_infeas_tol:     1e-32,  # print if a constraint violates more than this, 1e-6
#                 mosek.dparam.intpnt_qo_tol_mu_red:   1e-12,  # relative complementarity gap feasibility tolerance, 1e-8
#                 mosek.iparam.ana_sol_print_violated: True,
#                }

# prob = cvx.Problem(obj, constraints)
# result = prob.solve(solver=cvx.MOSEK,
#                     verbose=True,
#                     warm_start=False,
#                     mosek_params=mosek_params)

# print("\nStatus:", prob.status)
# print("Optimal objective value:", prob.value)
# print("Optimal variable value:\n", M.value)
# print('\nEigenvalues:', np.linalg.eigvals(M.value))

In [None]:
# prob.get_problem_data('MOSEK')

In [None]:
# X_full = phi(X_full)
# F_full = phi(F_full)

# z = quadratic(F_full, M.value, 'np') - quadratic(X_full, M.value, 'np') < 0
# idx = ~z.ravel()

# dV = quadratic(F_full[idx, :], M.value, 'np') - quadratic(X_full[idx, :], M.value, 'np')

# print('Previously safe states, now unsafe:')
# print(X_full[idx, :], '\n')

# print('Value change:')
# print(dV)

In [None]:
# # Scale new cost function to compare with the old one
# P_new = P.max() * M.value / M.value.max()
# print('Previous cost matrix:\n{}\n'.format(P))
# print('Computed cost matrix:\n{}\n'.format(P_new))
# print('Ratio:\n{}\n'.format(P_new / P))

# lyapunov_function = safe_learning.QuadraticFunction(P_new)
# grad_lyapunov_function = safe_learning.LinearSystem((2*P_new,))

# if USE_LIPSCHITZ_SCALING:
#     L_v = lambda s: tf.abs(grad_lyapunov_function(s))
# else:
#     L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)

# initial_safe_set = np.all(state_discretization.all_points == 0.0, axis=1)
# new_lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, true_dynamics, 
#                                       L_dyn, L_v, tau, policy, initial_safe_set)
# new_lyapunov.update_safe_set()

In [None]:
# debug(lyapunov, true_dynamics, state_norm, plot='pendulum')
# debug(new_lyapunov, true_dynamics, state_norm, plot='pendulum')