In [None]:
from __future__ import division, print_function

import GPflow
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
%matplotlib inline

# Try to import safe_rl from system
# if it fails get it from the main folder directly instead.
import utilities
from plotting import plot_lyapunov_1d

# If library not installed, import it from '../'
safe_learning = utilities.import_from_directory('safe_learning', '../')


try:
    session.close()
except NameError:
    pass
finally:
    session = tf.InteractiveSession()
    session.run(tf.global_variables_initializer())

We start by defining a discretization of the space $[-1, 1]$ with discretization constant $\tau$

In [None]:
# Discretization constant
# tau = 0.001

# x_min, x_max, discretization
# grid_param = [-1., 1., tau]
# extent = np.array(grid_param[:2])

discretization = safe_learning.GridWorld([-1, 1], 1001)
tau = 1 / discretization.nindex

print('Grid size: {0}'.format(discretization.nindex))

We define the GP model using one particular sample of the GP, in addition to a stable, closed-loop, linear model.
$$x_{l+1} = 0.25 x_k + g_\pi(x),$$

The prior dynamics are locally asymptotically stable. Moreover, in the one-dimensional case, the dynamics are stable as long as $|x_{k+1}| \leq |x_{k}|$.

In [None]:
# Observation noise
noise_var = 0.01 ** 2

# Mean dynamics
mean_function = safe_learning.LinearSystem(0.25, 0.)

kernel = (GPflow.kernels.Matern32(1, lengthscales=0.2, variance=0.2**2, active_dims=[0])
          * GPflow.kernels.Linear(1, active_dims=[0]))

gp = GPflow.gpr.GPR(np.empty((0, 2), dtype=np.float),
                    np.empty((0, 1), dtype=np.float),
                    kernel,
                    mean_function=mean_function)

gpfun = safe_learning.GaussianProcess(gp)

# Define one sample as the true dynamics
np.random.seed(5)

# Set up a discretization
sample_disc = np.hstack((np.linspace(-1, 1, 50)[:, None],
                         np.zeros((50, 1))))

# Draw samples
fs = safe_learning.sample_gp_function(sample_disc, gpfun, number=10, return_function=False)
plt.plot(sample_disc[:, 0], fs.T)

plt.ylabel('$g(x)$')
plt.xlabel('x')
plt.title('Samples drawn from the GP model of the dynamics')
plt.show()


true_dynamics = safe_learning.sample_gp_function(
    sample_disc,
    gpfun)[0]

# # Plot the basic model
# gp.plot_f(plot_limits=extent)
true_y = true_dynamics(sample_disc, noise=False).eval(feed_dict=true_dynamics.feed_dict)
plt.plot(sample_disc[:, 0], true_y, color='black', alpha=0.8)
plt.title('GP model of the dynamics')
plt.show()

In [None]:
# lyapunov_function = safe_learning.QuadraticFunction(np.array([[1]]))
# n_linear = 10
# lyapunov_function = safe_learning.Triangulation(extent[None, :], n_linear)
# points = lyapunov_function.index_to_state(np.arange(lyapunov_function.nindex))
# lyapunov_function.vertex_values = points.squeeze() ** 2
lyapunov_disc = safe_learning.GridWorld([-1., 1.], 3)
lyapunov_function = safe_learning.Triangulation(lyapunov_disc, [1, 0, 1])

dynamics = gpfun

# Lipschitz constant
# L_dyn = 0.25 + dynamics.beta(0) * np.sqrt(gp.kern.Mat32.variance) / gp.kern.Mat32.lengthscale * np.max(np.abs(extent))
# L_V = np.max(lyapunov_function.gradient(grid))

# lyapunov = safe_learning.LyapunovDiscrete(grid, lyapunov_function, dynamics, L_dyn, L_V, tau, initial_set=None)

# Specify the desired accuracy
# accuracy = np.max(lyapunov.V) / 1e10

## Safety based on GP model

Let's start by plotting the prior over the dynamics and the associated prior over $\dot{V}(x)$.

In [None]:
lyapunov.update_safe_set(accuracy=accuracy)
plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)

Clearly the model does not allow us to classify any states as safe ($\dot{V} < -L \tau$). However, as a starting point, we assume that we know that the system is asymptotially stable within some initial set, $\mathcal{S}_0$:

$$\mathcal{S}_0 = \{ x \in \mathbb{R} \,|\, |x| < 0.2 \}$$

In [None]:
lyapunov.initial_safe_set = np.abs(lyapunov.discretization.squeeze()) < 0.2

## Online learning
As we sample within this initial safe set, we gain more knowledge about the system. In particular, we iteratively select the state withing the safe set, $\mathcal{S}_n$, where the dynamics are the most uncertain (highest variance).

In [None]:
def update_gp():
    """Update the GP model based on an actively selected data point."""
    lyapunov.update_safe_set(accuracy=accuracy)
    safe_set = lyapunov.safe_set
    
    # Maximum uncertainty in safe set
    dynamics_mean, dynamics_std = lyapunov.dynamics.evaluate(grid)
    max_id = np.argmax(dynamics_std[safe_set])
    max_state = grid[safe_set][[max_id], :].copy()
    
    # Add newly obtained data point to the GP
    measurement = true_dynamics(max_state, noise=True)[:, [0]]
    lyapunov.dynamics.add_data_point(max_state, measurement)
    return safe_set

In [None]:
# Update the GP model a couple of times
for i in range(4):
    update_gp()

In [None]:
# Plot the new safe set
lyapunov.update_safe_set(accuracy=accuracy)
plot_lyapunov_1d(lyapunov, true_dynamics, legend=True)

We continue to sample like this, until we find the maximum safe set

In [None]:
for i in range(20):
    update_gp()

lyapunov.update_safe_set(accuracy=accuracy)
plot_lyapunov_1d(lyapunov, true_dynamics, legend=False)