# Optimizing for a Lyapunov Function

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
import gpflow
from scipy.linalg import block_diag
import cvxpy as cvx
import mosek
import matplotlib.pyplot as plt
%matplotlib inline

import safe_learning
from utilities import InvertedPendulum, debug

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x: x
    
np_dtype = safe_learning.config.np_dtype
tf_dtype = safe_learning.config.dtype

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession()

# TODO debug flags ***************************************#

import pandas
pandas.options.display.float_format = '{:,.4f}'.format
pandas.set_option('expand_frame_repr', False)
np.set_printoptions(precision=4)

# Saturate the action so that it lies in [-1, 1]
SATURATE = True

# Use the true physical parameters in the GP model
USE_TRUE_PARAMETERS = False

# Use the linearized discrete-time model as the true underlying dynamics
USE_LINEAR_DYNAMICS = False

# Use a threshold of zero when checking for stability
USE_ZERO_THRESHOLD = True

#
USE_LIPSCHITZ_SCALING = True

#******************************************************#


## Dynamics

In [None]:
# Constants
dt = 0.01   # sampling time
g = 9.81    # gravity

# True system parameters
m = 0.15    # pendulum mass
L = 0.5     # pole length
b = 0.1     # rotational friction

# State and action normalizers
theta_max = np.deg2rad(30)
omega_max = np.sqrt(g / L)
u_max = g * m * L * np.sin(theta_max)

state_norm = (theta_max, omega_max)
action_norm = (u_max, )

# Constraints for initial 'safe' states
theta_safe = np.deg2rad(8)
omega_safe = 0.5*np.sqrt(g / L)

# Dimensions and domains
state_dim = 2
action_dim = 1
state_limits = np.array([[-1., 1.]]*state_dim)
action_limits = np.array([[-1., 1.]]*action_dim)

# True system
true_pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])
A_true, B_true = true_pendulum.linearize()

if USE_LINEAR_DYNAMICS:
    true_dynamics = safe_learning.functions.LinearSystem((A_true, B_true), name='true_dynamics')
else:
    true_dynamics = true_pendulum.__call__

## State Discretization

In [None]:
# Number of states along each dimension
num_states = [1501, 1501]
# num_states = [51,]*state_dim

# State grid
grid_limits = np.array([[-1., 1.], [-1., 1.]])
state_discretization = safe_learning.GridWorld(grid_limits, num_states)

# Discretization constant
if USE_ZERO_THRESHOLD:
    tau = 0.0
else:
    tau = np.sum(state_discretization.unit_maxes) / 2

print('Grid size: {}'.format(state_discretization.nindex))
print('Discretization constant: {}'.format(tau))

## Cost Function

In [None]:
# State cost matrix
Q = np.diag([1., 2.])

# Action cost matrix
R = 1.2*np.identity(action_dim)

# Normalize cost matrices
cost_norm = np.amax([Q.max(), R.max()])
Q = Q / cost_norm
R = R / cost_norm

# Quadratic cost function
cost_function = safe_learning.QuadraticFunction(block_diag(Q, R), name='cost_function')

## Policy

In [None]:
# Fix policy to the LQR solution for the true system
K, P = safe_learning.utilities.dlqr(A_true, B_true, Q, R)
policy = safe_learning.LinearSystem(-K, name='policy')

if SATURATE:
    policy = safe_learning.Saturation(policy, -1, 1)
    
# TensorFlow variables
tf_states = tf.placeholder(tf_dtype, shape=[None, state_dim], name='states')
tf_actions = policy(tf_states)

## Lyapunov Function

In [None]:
# Define the Lyapunov function corresponding to the known policy
lyapunov_function = safe_learning.QuadraticFunction(P)
grad_lyapunov_function = safe_learning.LinearSystem((2*P,))

# Lipschitz constants
L_pol = lambda s: tf.constant(np.linalg.norm(-K, 1), dtype=tf_dtype)
L_dyn = lambda s: np.linalg.norm(A_true, 1) + np.linalg.norm(B_true, 1)*L_pol(s)

if USE_LIPSCHITZ_SCALING:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)

# Initialize class
initial_safe_set = np.all(state_discretization.all_points == 0.0, axis=1)
lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, true_dynamics, 
                                  L_dyn, L_v, tau, policy, initial_safe_set)

In [None]:
def plot_policy(lyapunov, tf_states, state_norm=None, show=True):
    fig, ax = plt.subplots(1, 1, figsize=(6, 5), dpi=100)
#     fig.subplots_adjust(wspace=0.4, hspace=0.2)
    cmap = plt.get_cmap('viridis')
    cmap.set_under('indigo')
    cmap.set_over('gold')
    ticks = np.linspace(-1., 1., 9)
    cutoff = 1. - 1e-10
    
    if state_norm is not None:
        theta_max, omega_max = state_norm
        scale = np.array([np.rad2deg(theta_max), np.rad2deg(omega_max)]).reshape((-1, 1))
        limits = scale * lyapunov.discretization.limits
    else:
        limits = lyapunov.discretization.limits
    
    z = session.run(lyapunov.policy(tf_states), feed_dict={tf_states: lyapunov.discretization.all_points})
    z = z.reshape(lyapunov.discretization.num_points)
    im = ax.imshow(z.T, 
                   origin='lower', 
                   extent=limits.ravel(), 
                   aspect=limits[0, 0] / limits[1, 0],
                   cmap=cmap,
                   vmin=-cutoff,
                   vmax=cutoff)
    cbar = fig.colorbar(im, ax=ax, label=r'$u = \pi(x)$', ticks=ticks)
    ax.set_xlabel(r'$\theta$ [deg]')
    ax.set_ylabel(r'$\omega$ [deg/s]')
    
    if show:
        plt.show()

# Visualize policy
plot_policy(lyapunov, tf_states, state_norm)

## Initial Safe Set Visualization

In [None]:
# Compare safe set before and after checking the decrease condition for the first time
c_max = lyapunov.feed_dict[lyapunov.c_max]
init_safe_set_size = np.sum(lyapunov.safe_set)

print('Before update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}\n'.format(init_safe_set_size))

old_safe_set = np.copy(lyapunov.safe_set)
lyapunov.update_safe_set()

c_max = lyapunov.feed_dict[lyapunov.c_max]
init_safe_set_size = np.sum(lyapunov.safe_set)

print('After update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}'.format(init_safe_set_size))

debug(lyapunov, true_dynamics, state_norm, plot='pendulum')

In [None]:
# Identify shape matrix of suitable Lyapunov function
print(P / lyapunov.feed_dict[lyapunov.c_max])

## TODO

In [None]:
# M = cvx.Variable(2, 2, PSD=True)  # CVXPY 1.0
M = cvx.Semidef(2)                # CVXPY 0.4

# def quadratic(X, P, convention='cvx'):
#     if convention=='cvx':
#         linear_form = X * P
#         quadratic = cvx.multiply(linear_form, X)
#         result = cvx.sum(quadratic, axis=1, keepdims=True)
#     else:
#         linear_form = np.matmul(X, P)
#         quadratic = linear_form * X
#         result = np.sum(quadratic, axis=1, keepdims=True)
#     return result

def quadratic(X, P, convention='cvx'):
    if convention=='cvx':
        linear_form = X * P
        quadratic = cvx.mul_elemwise(X, linear_form)
        result = cvx.sum_entries(quadratic, axis=1)
    else:
        linear_form = np.matmul(X, P)
        quadratic = np.multiply(linear_form, X)
        result = np.sum(np.array(quadratic), axis=1, keepdims=True)
    return result

# Enforce decrease condition, excluding the initial safe set
# safe = lyapunov.safe_set
safe = np.logical_xor(lyapunov.safe_set, lyapunov.initial_safe_set)
X = lyapunov.discretization.all_points[safe, :]

# remove zero-state
idx = ~np.all(X == 0, axis=1)
X = X[idx]
F = session.run(lyapunov.dynamics(tf_states, tf_actions), {tf_states: X})

eps_mat = 1e-32
eps_con = 1e-5

obj = cvx.Minimize(cvx.trace(M))
# obj = cvx.Maximize(- cvx.log(cvx.trace(M)))
# obj = cvx.Maximize(- cvx.log_det(M))
# obj = cvx.Minimize(- cvx.log_det(M))
# print(obj.is_dcp())

# constraints = [quadratic(F, M) - quadratic(X, M) <= -eps_con,
#                M >> eps_mat]

constraints = [quadratic(F, M) - quadratic(X, M) <= -eps_con,
               cvx.lambda_min(M) >= eps_mat]

mosek_params = {mosek.dparam.ana_sol_infeas_tol:     1e-32,  # print if a constraint violates more than this, 1e-6
                mosek.dparam.intpnt_qo_tol_mu_red:   1e-12,  # relative complementarity gap feasibility tolerance, 1e-8
                mosek.iparam.ana_sol_print_violated: True,
               }

prob = cvx.Problem(obj, constraints)
result = prob.solve(solver=cvx.MOSEK,
                    verbose=True,
                    warm_start=False,
                    mosek_params=mosek_params)

print("\nStatus:", prob.status)
print("Optimal objective value:", prob.value)
print("Optimal variable value:\n", M.value)
print('\nEigenvalues:', np.linalg.eigvals(M.value))

In [None]:
z = quadratic(F, M.value, 'np') - quadratic(X, M.value, 'np') <= -eps_con
idx = ~z.ravel()

vals = quadratic(F[idx, :], M.value, 'np') - quadratic(X[idx, :], M.value, 'np')

print('Previously safe states, now unsafe:\n', X[idx, :])

print(vals)

In [None]:
# Scale new cost function to compare with the old one
P_new = P.max() * M.value / M.value.max()
print('Previous cost matrix:\n{}\n'.format(P))
print('Computed cost matrix:\n{}\n'.format(P_new))
print('Ratio:\n{}\n'.format(P_new / P))

lyapunov_function = safe_learning.QuadraticFunction(P_new)
grad_lyapunov_function = safe_learning.LinearSystem((2*P_new,))

if USE_LIPSCHITZ_SCALING:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)

initial_safe_set = np.all(state_discretization.all_points == 0.0, axis=1)
new_lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, true_dynamics, 
                                      L_dyn, L_v, tau, policy, initial_safe_set)
new_lyapunov.update_safe_set()

In [None]:
debug(lyapunov, true_dynamics, state_norm, plot='pendulum')
debug(new_lyapunov, true_dynamics, state_norm, plot='pendulum')