In [1]:
import torch
import gym
from garage import wrap_experiment
from garage.envs import GymEnv
from garage.experiment.deterministic import set_seed
from garage.sampler import LocalSampler
from garage.torch.algos import TRPO

# from garage.torch.policies import GaussianMLPPolicy

from policies.gaussian_mlp_policy import GaussianMLPPolicy

from garage.torch.value_functions import GaussianMLPValueFunction
from garage.trainer import Trainer

from TRPO_DRSOM import TRPO_DRSOM

In [2]:
@wrap_experiment(log_dir='drsom_test')
def trpo_pendulum(ctxt=None, seed=1):
    set_seed(seed)
    env = GymEnv('MountainCarContinuous-v0')
    trainer = Trainer(ctxt)
    policy = GaussianMLPPolicy(env.spec,
                               hidden_sizes=[32, 32],
                               hidden_nonlinearity=torch.tanh,
                               output_nonlinearity=None)


    value_function = GaussianMLPValueFunction(env_spec=env.spec,
                                              hidden_sizes=(32, 32),
                                              hidden_nonlinearity=torch.tanh,
                                              output_nonlinearity=None)
    sampler = LocalSampler(agents=policy,
                           envs=env,
                           max_episode_length=env.spec.max_episode_length)
    algo = TRPO_DRSOM(env_spec=env.spec,
                      policy=policy,
                      value_function=value_function,
                      sampler=sampler,
                      discount=0.99,
                      center_adv=False)
    trainer.setup(algo, env)
    trainer.train(n_epochs=500, batch_size=1024)

In [3]:
trpo_pendulum(seed=1234)

2022-08-23 10:27:37 | [trpo_pendulum] Logging to drsom_test_37




2022-08-23 10:27:37 | [trpo_pendulum] Obtaining samples...
g vector is:
tensor([ 1.4871e-01, -1.1883e-02,  7.6975e-05,  ...,  5.0943e-02,
         2.5105e-02, -3.7175e-01])
Fg is: 
tensor([ 2.9742e-01, -6.3435e-02, -2.2587e-05,  ...,  2.7213e-01,
         1.3270e-01, -2.4280e+00])
m vector is:
tensor([ 4.8839e-02, -1.0417e-02, -3.7090e-06,  ...,  4.4686e-02,
         2.1791e-02, -3.9871e-01])
Fm is: 
tensor([ 9.7679e-02, -6.4000e-02, -2.1365e-05,  ...,  2.7456e-01,
         1.3388e-01, -2.4697e+00])
gFg is:
tensor(6.0309)
mg is:
tensor(0.9904)
G is:
tensor([[6.0309, 6.0897],
        [6.0897, 6.1719]])
eig is:
tensor([[1.1329e-02, 0.0000e+00],
        [1.2192e+01, 0.0000e+00]])
inverse is:
tensor([[ 44.6859, -44.0905],
        [-44.0905,  43.6650]])
x is:
tensor([ 1.0208, -0.8468])
xTGx is: 
tensor(0.1822)
step size is:
tensor(0.3313)
loss before mean is
tensor(-8.6596, grad_fn=<MeanBackward0>)
constraint value is: 
tensor(0.0093, grad_fn=<MeanBackward0>)
loss mean is
tensor(-8.6084, gr

  step_size = np.sqrt(2 * radius * (1. / ( torch.dot(x, G @ x) )))


g vector is:
tensor([-1.2074e-01, -6.8355e-03, -4.0551e-05,  ...,  4.5193e-02,
         9.4712e-03, -4.0050e-01])
Fg is: 
tensor([-2.4148e-01, -2.7755e+00,  1.4132e-04,  ...,  1.6872e+01,
         5.1642e+00, -1.4768e+02])
m vector is:
tensor([-6.4793e-04, -7.4472e-03,  3.7919e-07,  ...,  4.5269e-02,
         1.3856e-02, -3.9625e-01])
Fm is: 
tensor([-1.2959e-03, -2.7984e+00,  1.4121e-04,  ...,  1.7009e+01,
         5.2084e+00, -1.4887e+02])
gFg is:
tensor(369.7228)
mg is:
tensor(0.9920)
G is:
tensor([[369.7228, 372.6954],
        [372.6954, 375.7220]])
eig is:
tensor([[1.4923e-02, 0.0000e+00],
        [7.4543e+02, 0.0000e+00]])
inverse is:
tensor([[ 33.6954, -33.4239],
        [-33.4239,  33.1574]])
x is:
tensor([ 0.5380, -0.5310])
xTGx is: 
tensor(0.0112)
step size is:
tensor(1.3361)
loss before mean is
tensor(0.3697, grad_fn=<MeanBackward0>)
constraint value is: 
tensor(0.0108, grad_fn=<MeanBackward0>)
loss mean is
tensor(0.3702, grad_fn=<MeanBackward0>)
84
2022-08-23 10:28:27 | [tr