# Stability Verification for an Inverted Pendulum

In [None]:
from __future__ import division, print_function

import numpy as np
import tensorflow as tf
import gpflow
from scipy.linalg import block_diag
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

import safe_learning
from utilities import InvertedPendulum

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x: x
    
np_dtype = safe_learning.config.np_dtype
tf_dtype = safe_learning.config.dtype

try:
    session.close()
except NameError:
    pass
session = tf.InteractiveSession()

initialized = False

# TODO debugging **************************************#

import pandas
from collections import OrderedDict
pandas.options.display.float_format = '{:,.4f}'.format
pandas.set_option('expand_frame_repr', False)
np.set_printoptions(precision=4)

# Saturate the action so that it lies in [-1, 1]
saturate = True

# Use the true physical parameters in the GP model
use_true_parameters = True

# Use the linearized discrete-time model as the true underlying dynamics
use_linear_dynamics = True

# Ignore the non-zero threshold when checking for stability
ignore_threshold = True

#
use_simplified_kernels = True

#
use_lipschitz_scaling = False

# Scaling factor for GP confidence intervals
beta = 2.

#******************************************************#


## Helper Functions

In [None]:
def plot_policy(tf_actions, tf_true_actions, n_points, colors=['r','b'], show=True):
    fig = plt.figure(figsize=(3, 3), dpi=200)
    fig.subplots_adjust(wspace=0.1, hspace=0.2)
    xx, yy = np.mgrid[-1:1:np.complex(0, n_points[0]), -1:1:np.complex(0, n_points[1])]

    grid = np.column_stack((xx.ravel(), yy.ravel()))
    learned_control = session.run(tf_actions, feed_dict={tf_states: grid}).reshape(n_points)
    ax = fig.add_subplot(1, 1, 1, projection='3d')
    ax.plot_surface(xx, yy, learned_control, color=colors[0], alpha=0.75)
    ax.set_title(r'Control', fontsize=16)
    ax.set_xlabel(r'$x$', fontsize=14)
    ax.set_ylabel(r'$\theta$', fontsize=14)
    ax.set_zlabel(r'$u$', fontsize=14)
#     ax.view_init(elev=20., azim=15.)   
    if tf_true_actions is not None:
        true_control = session.run(tf_true_actions, feed_dict={tf_states: grid}).reshape(n_points)
        ax.plot_surface(xx, yy, true_control, color=colors[1], alpha=0.5)

    if show:
        plt.show()


def plot_safe_set(lyapunov, old_safe_set=None, show=True):
    """Plot the safe set for a given Lyapunov function."""
    safe_set = lyapunov.safe_set.reshape(num_states).astype(np_dtype)
    if old_safe_set is not None:
        old_safe_set = old_safe_set.reshape(num_states).astype(np_dtype)
        safe_set += old_safe_set

    fig = plt.figure(figsize=(5, 5), dpi=100)
    fig.subplots_adjust(wspace=0.5, hspace=0.5)

    ax = fig.add_subplot(111)
    ax.set_title(r'Safe set')
    ax.set_xlabel(r'$\theta$')
    ax.set_ylabel(r'$\omega$')
    im = ax.imshow(safe_set.T,
                   origin='lower',
                   extent=lyapunov.discretization.limits.ravel())
    fig.colorbar(im)
    
    if isinstance(lyapunov.dynamics, safe_learning.UncertainFunction):
        X = lyapunov.dynamics.functions[0].X
        plt.plot(X[:, 0], X[:, 1], 'rx')
    
    if show:
        plt.show()


def debug(newly_safe_only=True):
    print('Policy Lipschitz constant (L_pi): {}'.format(L_pol(0).eval()))
    print('Dynamics Lipschitz constant (L_f*(L_pi + 1)): {}'.format(L_dyn(0).eval()))
    print('beta: {}'.format(beta))
    print('tau: {}'.format(tau))
    print('c_n: {}'.format(lyapunov.feed_dict[lyapunov.c_max]))

    # True quantities
    tf_future_states = true_dynamics(tf_states, tf_actions)             # f(x,u)
    tf_vals = lyapunov.lyapunov_function(tf_states)                     # v(x)
    tf_true_future_vals = lyapunov.lyapunov_function(tf_future_states)  # v(f(x,u))
    tf_true_decrease = lyapunov_function(tf_future_states) - tf_vals    # v(f(x,u)) - v(x)
    
    # Only consider those states that become safe by updating the model
    if newly_safe_only:
        safe_set = np.logical_xor(lyapunov.safe_set, lyapunov.initial_safe_set)
    else:
        safe_set = lyapunov.safe_set
    
    # Current safe set
    safe_states = state_discretization.all_points[safe_set, :]      # D_n
    
    # Values and confidence intervals
    tf_mean, tf_var = lyapunov.dynamics(tf_states, tf_actions)      # mu_n(x,u), diag(Sigma_n(x,u))**1/2
    tf_mean_future_vals = lyapunov.lyapunov_function(tf_mean)       # v(mu_n(x,u))
    tf_lv = lyapunov.lipschitz_lyapunov(tf_mean)                    # L_v(mu_n(x,u))
    tf_bound = tf.reduce_sum(tf_var, axis=1, keep_dims=True)        # beta * sigma_n(x,u)
    
    if tf_lv.shape[1] == 1:
        tf_error = tf_lv * tf_bound                                 # L_v * beta * sigma_n(x,u)
    else:
        tf_error = tf.reduce_sum(tf_lv * tf_var, axis=1, keep_dims=True)
                                        
    # Check decrease condition for stability
    tf_upper_future_vals = tf_mean_future_vals + tf_error           # u_n(x,u)
    tf_decrease = tf_upper_future_vals - tf_vals                    # u_n(x,u) - v(x)
    tf_threshold = lyapunov.threshold(tf_states)                    # -L_dv * tau
    tf_stable = tf.less(tf_decrease, tf_threshold)                  # u_n(x,u) - v(x) < -L_dv * tau
    
    # Check if state-action pairs map back into safe set
    tf_maps_inside = tf.less(tf_upper_future_vals, lyapunov.c_max)  # S_n

    # Update feed dict and compute
    lyapunov.feed_dict.update({tf_states: safe_states})
    (var, lv, bound, error, true_decrease, decrease, threshold, stable, upper_future_values, maps_inside, 
     values, future_values) = session.run(
        [tf_var,
         tf_lv,
         tf_bound,
         tf_error,
         tf_true_decrease,
         tf_decrease,
         tf_threshold,
         tf_stable,
         tf_upper_future_vals,
         tf_maps_inside,
         tf_vals,
         tf_true_future_vals],
        lyapunov.feed_dict)
    
    order = np.argsort(values.ravel())
        
    # Use pandas frame for nice printing
    data = OrderedDict()
    data['v(x)'] = values[order].ravel()
    data['v(f(x,u))'] = future_values[order].ravel()
#     data['bound'] = bound[order].ravel()
    data['std'] = np.around(var[order, :], decimals=5).tolist()
    data['L_v'] = np.around(lv[order, :], decimals=2).tolist()
    data['err/beta'] = error[order].ravel() / beta
    data['u(x,u)'] = upper_future_values[order].ravel()
#     data['threshold'] = threshold[order].ravel()    
    data['stable'] = stable[order].ravel()
#     data['maps in'] = maps_inside[order].ravel()
        
    if len(safe_states) == 0:
        print('\nNo new safe states! Try collecting more data to improve model.')
    else:
        total = len(values.ravel())
        
        max_decrease = np.max(decrease.ravel())
        min_decrease = np.min(decrease.ravel())
        print('\nMax decrease: {}'.format(max_decrease))
        print('Min decrease: {}\n'.format(min_decrease))
        
        frame = pandas.DataFrame(data)
        print(frame, '\n')
        
        all_stable = np.bool(np.prod(stable))
        num_stable = np.sum(stable)
        print('All stable?', all_stable)
        if not all_stable:
            print('Unstable: {}/{}'.format(total - num_stable, total))
            data = OrderedDict()
            data['x'] = np.around(safe_states[np.logical_not(stable).ravel(), :], decimals=4).tolist()
            data['decrease'] = decrease[np.logical_not(stable).ravel(), :].ravel()
            data['threshold'] = threshold[np.logical_not(stable).ravel(), :].ravel()   
            frame = pandas.DataFrame(data)
            print(frame)
        
        all_map_inside = np.bool(np.prod(maps_inside))
        num_map_inside = np.sum(maps_inside)
        print('\nAll map inside?', all_map_inside)
        if not all_map_inside:
            print('Map outside: {}/{}'.format(total - num_map_inside, total))
            data = OrderedDict()
            data['x'] = np.around(safe_states[np.logical_not(maps_inside).ravel(), :], decimals=4).tolist()
            data['v(x)'] = values[np.logical_not(maps_inside).ravel(), :].ravel()
            data['v(f(x,u))'] = future_values[np.logical_not(maps_inside).ravel(), :].ravel()
            data['u_n(x,u)'] = upper_future_values[np.logical_not(maps_inside).ravel(), :].ravel()  
            frame = pandas.DataFrame(data)
            print(frame)

## Dynamics

In [None]:
# Constants
dt = 0.01   # sampling time
g = 9.81    # gravity

# True system parameters
m = 0.15    # pendulum mass
L = 0.5     # pole length
b = 0.1     # rotational friction

# State and action normalizers
theta_max = np.deg2rad(30)
omega_max = np.sqrt(g / L)
u_max = g * m * L * np.sin(theta_max)

state_norm = (theta_max, omega_max)
action_norm = (u_max, )

# Dimensions and domains
state_dim = 2
action_dim = 1
state_limits = np.array([[-1., 1.]]*state_dim)
action_limits = np.array([[-1., 1.]]*action_dim)

# True system
true_pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])
A_true, B_true = true_pendulum.linearize()

if use_linear_dynamics:
    true_dynamics = safe_learning.functions.LinearSystem((A_true, B_true), name='true_dynamics')
else:
    true_dynamics = true_pendulum.__call__

# "Wrong" system
m = 0.1     # pendulum mass
L = 0.5     # pole length
b = 0.0     # rotational friction
pendulum = InvertedPendulum(m, L, b, dt, [state_norm, action_norm])
A, B = pendulum.linearize()

if use_true_parameters:
    A = A_true
    B = B_true
mean_dynamics = safe_learning.LinearSystem((A, B), name='mean_dynamics')

## GP Model

In [None]:
m_true = np.hstack((A_true, B_true))
m = np.hstack((A, B))
variances = (m_true - m) ** 2

# Make sure at least some non-zero prior variance is maintained
np.clip(variances, 1e-3, None, out=variances)

# Measurement noise
noise_var = 0.001 ** 2

# Input to GP is of the form (x,u)
full_dim = state_dim + action_dim

# Kernels
if use_simplified_kernels:
    kernel_theta = gpflow.kernels.Linear(full_dim, variance=variances[0, :], ARD=True)

    kernel_omega = gpflow.kernels.Linear(full_dim, variance=variances[1, :], ARD=True)

else:
    kernel_theta = (gpflow.kernels.Linear(full_dim, variance=variances[0, :], ARD=True)
                    + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])
                    * gpflow.kernels.Linear(1, variance=variances[0, 1]))

    kernel_omega = (gpflow.kernels.Linear(full_dim, variance=variances[1, :], ARD=True)
                    + gpflow.kernels.Matern32(1, lengthscales=1, active_dims=[0])
                    * gpflow.kernels.Linear(1, variance=variances[1, 1]))

# Mean dynamics
mean_function_theta = safe_learning.LinearSystem((A[[0], :], B[[0], :]), name='mean_dynamics_theta')
mean_function_omega = safe_learning.LinearSystem((A[[1], :], B[[1], :]), name='mean_dynamics_omega')

# Define a GP model over the dynamics
# gpflow.gpr.GPR
gp_theta = safe_learning.GPRCached(np.empty((0, full_dim), dtype=np_dtype),
                                   np.empty((0, 1), dtype=np_dtype),
                                   kernel_theta,
                                   mean_function_theta)

gp_omega = safe_learning.GPRCached(np.empty((0, full_dim), dtype=np_dtype),
                                   np.empty((0, 1), dtype=np_dtype),
                                   kernel_omega,
                                   mean_function_omega)

gp_theta.likelihood.variance = noise_var
gp_omega.likelihood.variance = noise_var

#
gp_theta_fun = safe_learning.GaussianProcess(gp_theta, beta)
gp_omega_fun = safe_learning.GaussianProcess(gp_omega, beta)

# Stack GP functions => block-diagonal kernel matrix
dynamics = safe_learning.FunctionStack((gp_theta_fun, gp_omega_fun))

## State Discretization

In [None]:
# Number of states along each dimension
num_states = [2001, 1501]
# num_states = [51,]*state_dim

# State grid
grid_limits = np.array([[-2., 2.], [-1.5, 1.5]])
state_discretization = safe_learning.GridWorld(grid_limits, num_states)

# Discretization constant
if ignore_threshold:
    tau = 0.0
else:
    tau = np.min(state_discretization.unit_maxes)

print('Grid size: {}'.format(state_discretization.nindex))
print('Discretization constant: {}'.format(tau))

## Cost Function

In [None]:
# State cost matrix
Q = np.diag([1., 2.])

# Action cost matrix
R = 1.2*np.identity(action_dim)

# Quadratic cost function
cost_function = safe_learning.QuadraticFunction(block_diag(Q, R), name='cost_function')

## Lyapunov Function

In [None]:
# Fix policy to the LQR solution for the "wrong" system
K, P = safe_learning.utilities.dlqr(A, B, Q, R)
policy = safe_learning.LinearSystem(-K, name='policy')
if saturate:
    policy = safe_learning.Saturation(policy, -1, 1)

# TensorFlow variables
tf_states = tf.placeholder(tf_dtype, shape=[None, state_dim], name='states')
tf_actions = policy(tf_states)

# Define the Lyapunov function corresponding to the known policy
lyapunov_function = safe_learning.QuadraticFunction(P)
grad_lyapunov_function = safe_learning.LinearSystem((2*P,))

# Lipschitz constants
L_pol = lambda s: tf.constant(np.linalg.norm(-K, 1), dtype=tf_dtype)
L_dyn = lambda s: np.linalg.norm(A_true, 1) + np.linalg.norm(B_true, 1)*L_pol(s)
if use_lipschitz_scaling:
    L_v = lambda s: tf.abs(grad_lyapunov_function(s))
else:
    L_v = lambda s: tf.norm(grad_lyapunov_function(s), ord=1, axis=1, keep_dims=True)
    
# Set initial safe set as a level set of the Lyapunov function
values = session.run(lyapunov_function(tf_states), {tf_states: state_discretization.all_points})
cutoff = 5e-3 * np.max(values)
initial_safe_set = np.squeeze(values, axis=1) <= cutoff

# Initialize class
lyapunov = safe_learning.Lyapunov(state_discretization, lyapunov_function, dynamics, 
                                  L_dyn, L_v, tau, policy, initial_safe_set)

# Visualize policy
n_points = [81, 81]
plot_policy(tf_actions, None, n_points)

## Initial Safe Set Visualization

In [None]:
# Compare safe set before and after checking the decrease condition for the first time
c_max = lyapunov.feed_dict[lyapunov.c_max]
init_safe_set_size = np.sum(lyapunov.safe_set)

print('Before update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}\n'.format(init_safe_set_size))

old_safe_set = np.copy(lyapunov.safe_set)
lyapunov.update_safe_set()

c_max = lyapunov.feed_dict[lyapunov.c_max]
init_safe_set_size = np.sum(lyapunov.safe_set)

print('After update ...')
print('c_max: {}'.format(c_max))
print('Safe set size: {}'.format(init_safe_set_size))

plot_safe_set(lyapunov, old_safe_set)

In [None]:
debug()

## Online Learning and Exploration

In [None]:
# action_variation = np.array([-0.02, 0.0, 0.02], dtype=np_dtype).reshape((-1, 1))
action_variation = np.array([-0.01, -0.001, 0.0, 0.001, 0.01], dtype=np_dtype).reshape((-1, 1))
# action_variation = np.array([[0.]], dtype=np_dtype)

with tf.name_scope('add_new_measurement'):
    full_dim = state_dim + action_dim 
    tf_max_state_action = tf.placeholder(tf_dtype, shape=[1, full_dim])
    tf_measurement = true_dynamics(tf_max_state_action)
    
def update_gp():
    """Update the GP model based on an actively selected data point."""
    
    # Get a new sample location
    max_state_action, _ = safe_learning.get_safe_sample(lyapunov,
                                                        action_variation,
                                                        action_limits,
                                                        positive=True,
                                                        num_samples=1000)
    
    # Obtain a measurement of the true dynamics
    lyapunov.feed_dict[tf_max_state_action] = max_state_action
    measurement = tf_measurement.eval(feed_dict=lyapunov.feed_dict)
    
    # Add the measurement to our GP dynamics
    lyapunov.dynamics.add_data_point(max_state_action, measurement)

In [None]:
data_per_update = 10
safe_set_updates = 1


for i in range(safe_set_updates):
    
#     print('Iteration {} with c_max: {}'.format(i, lyapunov.feed_dict[lyapunov.c_max]))
    old_safe_set = np.copy(lyapunov.safe_set)

    for _ in tqdm(range(data_per_update)):
        update_gp()

    lyapunov.update_safe_set()
    plot_safe_set(lyapunov, old_safe_set)


current_safe_set_size = np.sum(lyapunov.safe_set)
print('Safe set size: {}'.format(current_safe_set_size))
print('Growth: {}'.format(current_safe_set_size - init_safe_set_size))

In [None]:
# Accepted "safe" state-action pairs
data = lyapunov.dynamics.functions[0].X
print("Data points collected: {}\n".format(data.shape[0]))
print(data)

In [None]:
# print(gp_theta.predict_f(data[0,:].reshape(1, -1)))
# print(gp_theta.likelihood.variance.value)

In [None]:
debug()