In [None]:
from __future__ import division, print_function

import GPy
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
%matplotlib inline

# Try to import safe_rl from system
# if it fails get it from the main folder directly instead.
import utilities
from plotting import plot_lyapunov_1d

# If library not installed, import it from '../'
safe_learning = utilities.import_from_directory('safe_learning', '../')

We start by defining a discretization of the space $[-1, 1]$ with discretization constant $\tau$

In [None]:
# Discretization constant
tau = 0.001

# x_min, x_max, discretization
grid_param = [-1., 1., tau]
extent = np.array(grid_param[:2])

# Create a grid
grid = np.arange(*grid_param)[:, None]
num_samples = len(grid)

print('Grid size: {0}'.format(len(grid)))

### Draw system dynamics from a GP

We define a kernel, $k(x,x') = k_{\mathrm{linear}}(x, x') * k_{\mathrm{Matern}}(x, x')$, which models a nonlinear, 2-times differentiable function with linearly increasing amplitude. We draw a sample from this kernel in order to define the dynamics.

The following plot shows the kind of functions that this kernel implies

In [None]:
kernel = GPy.kern.Matern32(1, lengthscale=0.2, variance=0.5**2) * GPy.kern.Linear(1)

for i in range(10):
    f = safe_learning.utilities.sample_gp_function(
        kernel,
        [extent],
        num_samples=100,
        noise_var=0.1,
        interpolation='kernel')
    plt.plot(grid, f(grid, noise=False))

plt.ylabel('$g(x)$')
plt.xlabel('x')
plt.title('Samples drawn from the GP model of the dynamics')
plt.show()

We define the GP model using one particular sample of the GP, in addition to a stable, closed-loop, linear model.
$$x_{l+1} = 0.25 x_k + g_\pi(x),$$

The prior dynamics are locally asymptotically stable. Moreover, in the one-dimensional case, the dynamics are stable as long as $|x_{k+1}| \leq |x_{k}|$.

In [None]:
# Observation noise
noise_var = 0.01 ** 2

# Mean dynamics
mf = GPy.core.Mapping(1, 1)
mf.f = lambda x: 0.25 * x
mf.update_gradients = lambda a, b: None

# Define one sample as the true dynamics
np.random.seed(5)
true_dynamics = safe_learning.utilities.sample_gp_function(
    kernel,
    [extent],
    num_samples=100,
    noise_var=noise_var,
    interpolation='kernel',
    mean_function=mf.f)

# Define a GP model over the dynamics
gp = GPy.models.GPRegression(np.array([[0]]),
                             np.array([[0]]),
                             kernel,
                             noise_var=noise_var,
                             mean_function=mf)

# Plot the basic model
gp.plot_f(plot_limits=extent)
plt.plot(grid, true_dynamics(grid, noise=False), color='black', alpha=0.8)
plt.title('GP model of the dynamics')
plt.show()

Additionally we define a random lyapunov function. Unlike for multiple dimensions, in the one-dimensional case all radially increasing functions are equivalent. Here we pick
$$V(x) = x^2$$
The previous GP model defines a GP model over $\dot{V}(x) = \frac{\partial V(x)}{\partial x} f(x)$. In the following, we only consider the 2-$\sigma$ upper confidence bound of this model. Since the dynamics are Lipschitz continuous, $\dot{V}$ is Lipschitz continuous as well.

In particular, we use Lemma 5 to determine an appropriate Lipschitz constant. For the sample path of the GP, we use the high-probability Lipschitz constant encoded by the kernel.

In [None]:
# Initialize to quadratic function
lyapunov_function = safe_learning.Triangulation(extent[None, :], 10)
lyapunov_function.vertex_values = lyapunov_function.index_to_state(np.arange(lyapunov_function.nindex)).squeeze() ** 2

dynamics = safe_learning.GPyGaussianProcess(gp, beta=2.)

# Lipschitz constant
L_dyn = 0.25 + dynamics.beta(0) * np.sqrt(gp.kern.Mat32.variance) / gp.kern.Mat32.lengthscale * np.max(np.abs(extent))
L_V = np.max(lyapunov_function.gradient(grid))

lyapunov = safe_learning.LyapunovDiscrete(grid, lyapunov_function, dynamics, L_dyn, L_V, tau, initial_set=None)

# Specify the desired accuracy
accuracy = np.max(lyapunov.V) / 1e10

lyapunov.initial_safe_set = np.abs(lyapunov.discretization.squeeze()) < 0.1

In [None]:
lyapunov.update_safe_set(accuracy=accuracy)
plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)

In [None]:
# min x_i 
# x_i >= 0
# l_v = max gradient
# for all discrete points:
#     V(mu_x) - V(x) + l_v * v_dot_error + l_v (1 + l_f) eps <= xi_i

import cvxpy

def optimize_lyapunov(lyapunov):
    discretization = lyapunov.discretization
    dynamics_mean, dynamics_error = dynamics.evaluate(discretization)

    # Optimization variables
    values = cvxpy.Variable(lyapunov_function.nindex)
    slack = cvxpy.Variable(len(discretization))

    # Get the gradients at each triangle.
    simplex_indeces = np.arange(lyapunov_function.nsimplex)
    gradient_mat = lyapunov_function.gradient_constraint(indices=simplex_indeces)
    gradient_mat = cvxpy.Constant(gradient_mat)
    gradients = gradient_mat * values
    l_v = cvxpy.norm(gradients, 'inf')

    # V(mu(x))
    values_at_mu_mat = lyapunov_function.evaluate_constraint(dynamics_mean)
    values_at_mu_mat = cvxpy.Constant(values_at_mu_mat)
    values_at_mu = values_at_mu_mat * values

    # V(x)
    values_at_x_mat = lyapunov_function.evaluate_constraint(discretization)
    values_at_x_mat = cvxpy.Constant(values_at_x_mat)
    values_at_x = values_at_x_mat * values

    # The errors
    interpolation_error = l_v * np.sum(dynamics_error, axis=1)
    discretization_error = l_v * (1 + lyapunov.lipschitz_dynamics) * lyapunov.epsilon
    value_function_diff = values_at_mu - values_at_x + interpolation_error + discretization_error

    # Value at the origin
    value_at_zero = lyapunov_function.evaluate_constraint(np.array([[0]]))
    value_at_zero = cvxpy.Constant(value_at_zero)
    value_at_zero = value_at_zero * values

    safe_values = lyapunov_function.evaluate_constraint(lyapunov.discretization[lyapunov.initial_safe_set])
    safe_values = cvxpy.Constant(safe_values)
    safe_values = safe_values * values

    # Alternative
    # zero_index = lyapunov_function.state_to_index(np.array([[0]]))
    # value_at_zero = values[zero_index]

    # Discretized values
    value_discretization = lyapunov_function.index_to_state(np.arange(lyapunov_function.nindex))
    lyapunov_lower_bound = 0.0001 * np.sum(np.abs(value_discretization), axis=1)

    # Create problem instance
    constraints = [slack >= 0,
                   value_function_diff <= slack,
                   values >= lyapunov_lower_bound,
                   value_at_zero == 0]

    objective = cvxpy.Minimize(cvxpy.sum_entries(slack))
    # objective = cvxpy.Minimize()

    prob = cvxpy.Problem(objective, constraints)
    prob.solve()

    if prob.status != cvxpy.OPTIMAL:
        print(prob.status)
    else:
        # Update the lyapunov function's inherent properties
        lyapunov.lyapunov_function.vertex_values = np.array(values.value).squeeze()
        lyapunov.V = lyapunov.lyapunov_function.evaluate(lyapunov.discretization).squeeze()
        lyapunov.lipschitz_lyapunov = l_v.value
        
optimize_lyapunov(lyapunov)

In [None]:
lyapunov.update_safe_set(accuracy=accuracy)
plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)

In [None]:
def update_gp():
    """Update the GP model based on an actively selected data point."""
    lyapunov.update_safe_set(accuracy=accuracy)
    safe_set = lyapunov.safe_set
    
    # Maximum uncertainty in safe set
    dynamics_mean, dynamics_std = lyapunov.dynamics.evaluate(grid)
    max_id = np.argmax(dynamics_std[safe_set])
    max_state = grid[safe_set][[max_id], :].copy()
    
    # Add newly obtained data point to the GP
    measurement = true_dynamics(max_state, noise=True)[:, [0]]
    lyapunov.dynamics.add_data_point(max_state, measurement)
    return safe_set

In [None]:
for i in range(10):
    update_gp()

In [None]:
lyapunov.update_safe_set(accuracy=accuracy)
plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)