# Using SafeOpt

In [1]:
import GPy, safeopt

from RLBench.algo import SafeOptSwarm
from RLBench.envs import Quadrocopter, LinearCar
from RLBench.policy import NonLinearQuadrocopterController, LinearPolicy

from RLBench.measure import BestPerformance, SafetyMeasure

from RLBench import Bench

# set up logging
from RLBench import config

config.logger_set_level(config.INFO)
config.logger_add_stream_handler()
config.monitor_set_verbosity(2)

#### Linear Car

In [2]:
noise_var = 0.05 ** 2

bounds = [(-1., 0.), (-1., 0.), (0., 1.)]

algos = [(SafeOptSwarm, [{
    'policy': LinearPolicy(2, 1, par=[-1, 0, 1]),
    'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=.4, ARD=True),
    'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),
    'max_it': 20,
    'avg_reward': -20,
    'window': 3,
    'fmin': -100,
    'bounds': bounds, 
    'info': std
} for std in [30, 35, 40, 45, 50]])]

envs = [(LinearCar, {})]

bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(-100)])

In [3]:
bench()

1916 - 2018-05-11 01:28:06,375 - RLBench.monitor - INFO - Starting optimization of SafeOptSwarm...




1916 - 2018-05-11 01:28:19,830 - RLBench.monitor - INFO - Computing traces for SafeOptSwarm run...


1916 - 2018-05-11 01:28:20,123 - RLBench.monitor - INFO - Starting optimization of SafeOptSwarm...


Below we output the results of the safety measure. List comprehension is used to get a more readable format for the
tuples.
The first element shows the standard deviation used, the second the number of violations and the last one the sum over
all violations, just as documented in the `SafetyMeasure` class.

We can see that increasing the standard deviation will ensure that the safty constraints will not be violated.

In [4]:
print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])

[(30, 1, 1535.1940602232758), (35, 1, 1.2256673801121565), (40, 1, 2.252525021806804), (45, 1, 1.2200024660389488), (50, 0, 0)]


#### Quadrocopter

In [5]:
noise_var = 0.05 ** 2

# Set fixed Gaussian measurement noise
likelihood = GPy.likelihoods.gaussian.Gaussian(variance=noise_var)

# Bounds on the inputs variable
bounds = [(0., 1.), (0., 1.), (0., 1.), (0., 1.), (0., 1.)]

# Define Kernel
kernel = GPy.kern.RBF(input_dim=len(bounds), variance=1000.*2, lengthscale=1.0, ARD=True)

In [6]:
noise_var = 0.05 ** 2

fmin = -2400

# Bounds on the inputs variable
# bounds = [(1e-2, .9), (1e-2, .9), (1e-1, .9), (.2, .7), (1e-2, .9)]
bounds = [(1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.)]

algos = [(SafeOptSwarm, [{
    'policy': NonLinearQuadrocopterController(),
    'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=0.2, ARD=True),
    'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),
    'max_it': 20,
    'avg_reward': -1500,
    'window': 3,
    'fmin': fmin,
    'bounds': bounds,
    'swarm_size': 1000,
    'info': std
} for std in [1000, 1250, 1500, 1750, 2000]])]

envs = [(Quadrocopter, {})]

bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(fmin)])

In [7]:
bench()

5434 - 2018-05-10 23:58:32,249 - SafeRLBench.monitor - INFO - Starting optimization of SafeOptSwarm...
















5434 - 2018-05-10 23:58:55,870 - SafeRLBench.monitor - INFO - Computing traces for SafeOptSwarm run...


5434 - 2018-05-10 23:59:05,358 - SafeRLBench.monitor - INFO - Starting optimization of SafeOptSwarm...




5434 - 2018-05-10 23:59:28,485 - SafeRLBench.monitor - INFO - Computing traces for SafeOptSwarm run...


5434 - 2018-05-10 23:59:38,304 - SafeRLBench.monitor - INFO - Starting optimization of SafeOptSwarm...




5434 - 2018-05-11 00:00:01,495 - SafeRLBench.monitor - INFO - Computing traces for SafeOptSwarm run...


5434 - 2018-05-11 00:00:12,244 - SafeRLBench.monitor - INFO - Starting optimization of SafeOptSwarm...


5434 - 2018-05-11 00:00:37,912 - SafeRLBench.monitor - INFO - Computing traces for SafeOptSwarm run...


5434 - 2018-05-11 00:00:49,124 - SafeRLBench.monitor - INFO - Starting optimization of SafeOptSwarm...


5434 - 2018-05-11 00:01:11,105 - SafeRLBench.monitor - INFO - Computing traces for SafeOptSwarm run...


Below we output the results of the safety measure and performance. List comprehension is used to get a more readable format for the tuples.
The first element shows the standard deviation used, the second the number of violations and the last one the sum over
all violations, just as documented in the `SafetyMeasure` class.

In [8]:
print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])

In [9]:
print([(t[0].alg_conf['info'], int(t[1])) for t in bench.measures[0].result])

[(1000, -1630), (1500, -1745), (1250, -1800), (1750, -1805), (2000, -1910)]
