In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pprint

import sys
import os

sys.path.append(os.path.abspath('..'))

from SafeRLBench import config
from SafeRLBench import Bench, BenchConfig, BestPerformance
from SafeRLBench.algo import PolicyGradient, SafeOpt
from SafeRLBench.envs import LinearCar, GeneralMountainCar
from SafeRLBench.policy import LinearPolicy, NoisyLinearPolicy
from SafeRLBench.spaces import BoundedSpace
from SafeRLBench import SRBConfig

import GPy
import safeopt

from copy import copy

import logging
    
np.set_printoptions(precision=5)

In [None]:
config.logger_set_level(logging.DEBUG)
config.logger_add_stream_handler()
# config.logger_add_file_handler('BenchTestLog.log')
config.monitor_set_verbosity(2)
config.jobs_set(4)

In [None]:
noise_var = 0.05 ** 2

# Set fixed Gaussian measurement noise
likelihood = GPy.likelihoods.gaussian.Gaussian(variance=noise_var)
likelihood.constrain_fixed(warning=False);

# Bounds on the inputs variable
bounds = [(-5., 5.), (-5., 5.)]

# set of parameters
parameter_set = safeopt.linearly_spaced_combinations(bounds, 100)

# Define Kernel
kernel = GPy.kern.RBF(input_dim=len(bounds), variance=2., lengthscale=1.0,
                      ARD=True)

In [None]:
# configure policy
policy = LinearPolicy(2, 1)
np1 = NoisyLinearPolicy(2, 1, sigma=1, par_space=BoundedSpace(-1, 1, (3,)))
np2 = NoisyLinearPolicy(2, 1, sigma=1, par_space=BoundedSpace(-1, 1, (3,)))
np3 = NoisyLinearPolicy(2, 1, sigma=1, par_space=BoundedSpace(-1, 1, (3,)))
# configure environments
algs = [[
    (PolicyGradient, [{'policy': np1, 'max_it': 100, 'eps': 0.0000000001, 'estimator': 'reinforce', 'rate': 4, 'var': 0.5},
                      {'policy': np2, 'max_it': 100, 'eps': 0.0000000001, 'estimator': 'reinforce', 'rate': 4, 'var': 0.5},
                      {'policy': policy, 'max_it': 100, 'eps': 0.0000001, 'estimator': 'central_fd', 'var': 5}]),
    (SafeOpt, [{'policy': np3, 'max_it': 200, 'avg_reward': 20, 'window': 4, 'kernel': kernel, 'likelihood': likelihood, 'parameter_set': parameter_set, 'fmin': 0, 'threshold': 0.2}])
]]

env = [[
    (LinearCar, {'horizon': 100})
]]


test_config = BenchConfig(algs, env)

In [None]:
benchmark = Bench(test_config, [BestPerformance()])

In [None]:
benchmark()

In [None]:
benchmark.measures[0].result

In [None]:
monitor = benchmark.measures[0].result[0][0].get_alg_monitor()
rewards = monitor.rewards
traces = monitor.traces

trace = traces[rewards.index(max(rewards))]
y = [t[1][0] for t in trace]
x = range(len(y))

plt.plot(x, y)
plt.show()

In [None]:
monitor = benchmark.measures[0].result[1][0].get_alg_monitor()
rewards = monitor.rewards
traces = monitor.traces

trace = traces[rewards.index(max(rewards))]
y = [t[1][0] for t in trace]
x = range(len(y))

plt.plot(x, y)
plt.show()

In [None]:
monitor = benchmark.measures[0].result[2][0].get_alg_monitor()
rewards = monitor.rewards
traces = monitor.traces

trace = traces[rewards.index(max(rewards))]
y = [t[1][0] for t in trace]
x = range(len(y))

plt.plot(x, y)
plt.show()