In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pprint

import sys
import os

sys.path.append(os.path.abspath('..'))

from SafeRLBench import config
from SafeRLBench import Bench, BenchConfig
from SafeRLBench.algo import PolicyGradient
from SafeRLBench.envs import LinearCar, GeneralMountainCar
from SafeRLBench import envs, tools, algo
    
np.set_printoptions(precision=5)

In [None]:
config.loggerSetLevel()
config.loggerAddStreamHandler()
config.monitorSetVerbosity(2)

In [None]:
# algorithms and environments
algorithms = [PolicyGradient]
environments = [LinearCar, GeneralMountainCar]

test_config = BenchConfig()

# configure a policy
policy_par = lambda par: (lambda x: np.exp(par[0] - par[1:].dot(x)) - 1.5)
policy = tools.Policy(policy_par,(3))

policy_par_rif = lambda par: (lambda x: np.exp(par[0] - par[1:].dot(x)) - 1.5)
policy_grad_rif = lambda x, a, par: np.array([1., -x[0], -x[1]]) * policy_par(par)(x)

policy_rif = tools.Policy(policy_par_rif,(3), gradient=policy_grad_rif, sigma=0.2)

# configure environments
pg_config = [
    {
        'policy': policy,
        'max_it': 1000,
        'eps': 0.00001,
        'estimator': 'central_fd'
    },
    {
        'policy': policy_rif,
        'max_it': 1000,
        'eps': 0.00001,
        'estimator': 'reinforce'
    }
]

test_config.addAlgConfig(PolicyGradient, LinearCar, pg_config, {})
test_config.addAlgConfig(PolicyGradient, GeneralMountainCar, pg_config, {})

In [None]:
benchmark = Bench(algorithms, environments, test_config)

In [None]:
benchmark()

In [None]:
pprint.pprint(benchmark.tests[0].__dict__, depth=2)