In [None]:
from __future__ import division, print_function

import GPy
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
%matplotlib inline

# Try to import safe_rl from system
# if it fails get it from the main folder directly instead.
import utilities
from plotting import plot_lyapunov_1d

# If library not installed, import it from '../'
safe_learning = utilities.import_from_directory('safe_learning', '../')

In [None]:
# x_min, x_max, discretization
state_limits = np.array([-1., 1.])
action_limits = np.array([-1., 1.])
num_states = 100
num_actions = 100

# action_space = np.linspace(action_limits[0], action_limits[1], num_actions)[:, None]

# Create a grid
discretization = safe_learning.GridWorld([state_limits, action_limits], [num_states, num_actions])
grid = discretization.all_points

state_space = safe_learning.GridWorld([state_limits], num_states).all_points
action_space = safe_learning.GridWorld([action_limits], num_actions).all_points

In [None]:
kernel = GPy.kern.Matern32(1, lengthscale=0.1, variance=0.1**2, active_dims=[1]) * GPy.kern.Linear(1, active_dims=[1])
noise_var = 0.01 ** 2

# Mean dynamics
mf = GPy.core.Mapping(2, 1)
mf.f = lambda x: 1.2 * x[:, [0]] + 0.5 * x[:, [1]]
mf.update_gradients = lambda a, b: None

# Define one sample as the true dynamics
dynamics = safe_learning.sample_gp_function(
    kernel,
    [state_limits, action_limits],
    num_samples=[20, 20],
    noise_var=noise_var,
    interpolation='kernel',
    mean_function=mf.f)

In [None]:
dyns = dynamics(grid)

In [None]:
img = plt.imshow(dyns.reshape(num_states, num_actions).T, origin='lower', extent=[-1, 1, -1, 1])
plt.xlabel('state')
plt.ylabel('action')
plt.colorbar(img)

In [None]:
lyapunov_function = safe_learning.Triangulation(state_limits[None, :], 3, vertex_values=[1, 0, 1])

# Lipschitz constant
L_dyn = 0.
L_V = 0.
tau = 0
policy = np.zeros((len(state_space), 1))

lyapunov = safe_learning.LyapunovDiscrete(state_space, lyapunov_function, dynamics, L_dyn, L_V, tau, initial_set=None, policy=policy)


In [None]:
safe = np.empty((num_states, num_actions), dtype=np.bool)

policy = np.array([0], dtype=np.float)
policy_array = np.broadcast_to(policy, (num_states, 1))

for i, u in enumerate(action_space):
    policy[:] = u
    safe[:, i] = lyapunov.safety_constraint(policy_array)

In [None]:
img = plt.imshow(safe.T, origin='lower', extent=[-1, 1, -1, 1])
plt.xlabel('state')
plt.ylabel('action')
plt.colorbar(img)