In [1]:
## Import All Needed Libraries

import os
import gym
import ray
from animalai.envs.arena_config import ArenaConfig
from animalai.envs.gym.environment import AnimalAIGym
from ray.rllib.agents import ppo

from ray.tune import register_env, tune

from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.models import ModelCatalog
from ray.tune.logger import pretty_print

from cache_model import *
from config import get_cfg
from custom_model import *

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
## Reuse Wrapper for AnimalAI Environment

class UnityEnvWrapper(gym.Env):
    def __init__(self, env_config):
        self.vector_index = env_config.vector_index
        self.worker_index = env_config.worker_index
        self.worker_id = env_config["unity_worker_id"] + env_config.worker_index
        self.env = AnimalAIGym(
            environment_filename = "../examples/env/AnimalAI",
            worker_id = self.worker_id,
            flatten_branched = True,
            uint8_visual = True,
            arenas_configurations = ArenaConfig(env_config['arena_to_train'])
        )
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        
    def reset(self):
        return self.env.reset()
    
    def step(self, action):
        return self.env.step(action)

In [3]:
## Setup configuration to use

conf = {
    "num_workers": 0,
    "env_config": {
        "unity_worker_id": 67,
        "arena_to_train": '../examples/configurations/curriculum/5.yml'
        },
        "model": {
            "custom_model": 'my_cnn_rnn_model',
            "custom_model_config": {},
        },
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "2")),
        "num_workers": 1,  # parallelism
        "framework": "torch",
        "train_batch_size": 500,
        
       }
conf

{'num_workers': 1,
 'env_config': {'unity_worker_id': 67,
  'arena_to_train': '../examples/configurations/curriculum/5.yml'},
 'model': {'custom_model': 'my_cnn_rnn_model', 'custom_model_config': {}},
 'num_gpus': 2,
 'framework': 'torch',
 'train_batch_size': 500}

In [4]:
## Setup and register environment
ray.shutdown()
ray.init(num_cpus=4, num_gpus=2)

# Register custom models so that we can give the ID to the policy trainer
# ModelCatalog.register_custom_model("my_fc_model", MyFCForwardModel)
ModelCatalog.register_custom_model("my_rnn_model", MyRNNModel)
ModelCatalog.register_custom_model("my_convgru_model", MyConvGRUModel)  # NOTE: Only works with image observations.
ModelCatalog.register_custom_model("my_cnn_rnn_model", MyCNNRNNModel)

register_env("unity_env", lambda config: UnityEnvWrapper(config))

2021-04-06 19:35:22,976	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m


In [5]:
trainer = PPOTrainer(config=conf, env= "unity_env")

2021-04-06 19:35:27,631	INFO trainer.py:643 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=18850)[0m Instructions for updating:
[2m[36m(pid=18850)[0m non-resource variables are not supported in the long term


[2m[36m(pid=18850)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=18850)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=18850)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=18850)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=18850)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=18850)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=18850)[0m Display 0 'C32F391 32"': 1920x1080 (primary device).
[2m[36m(pid=18850)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log


[2m[36m(pid=18850)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=18850)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=18850)[0m AnimalAI?team=0
[2m[36m(pid=18850)[0m INFO:gym_unity:1 agents within environment.


In [6]:
## Train the model
checkpoint = ""
for i in range(1002):
   # Perform one iteration of training the policy with PPO
   result = trainer.train()
   print(pretty_print(result))

   if i % 200 == 0:
       checkpoint = trainer.save()
       print("checkpoint saved at", checkpoint)



custom_metrics: {}
date: 2021-04-06_19-36-16
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.2
      cur_lr: 5.0e-05
      entropy: 2.137380027770996
      entropy_coeff: 0.0
      kl: 0.025680503621697427
      policy_loss: -0.08258256614208222
      total_loss: -0.07367224097251893
      vf_explained_var: 0.6808925271034241
      vf_loss: 0.003774236887693405
  num_steps_sampled: 600
  num_steps_trained: 600
iterations_since_restore: 1
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 11.401666666666669
  ram_util_percent: 43.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  mean

custom_metrics: {}
date: 2021-04-06_19-36-54
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 16
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125000000000002
      cur_lr: 5.0e-05
      entropy: 2.098140776157379
      entropy_coeff: 0.0
      kl: 0.014817873015999794
      policy_loss: -0.35353048518300056
      total_loss: -0.3361012227833271
      vf_explained_var: 0.7000163793563843
      vf_loss: 0.002426172693958506
  num_steps_sampled: 4200
  num_steps_trained: 4200
iterations_since_restore: 7
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.31111111111111
  ram_util_percent: 43.7
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_19-37-32
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 31
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 2.0195152282714846
      entropy_coeff: 0.0
      kl: 0.01767448764294386
      policy_loss: -0.04557156562805176
      total_loss: -0.016058385372161865
      vf_explained_var: 0.6289110779762268
      vf_loss: 0.002670068759471178
  num_steps_sampled: 7800
  num_steps_trained: 7800
iterations_since_restore: 13
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.288888888888888
  ram_util_percent: 43.51111111111111
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_re

custom_metrics: {}
date: 2021-04-06_19-38-10
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 45
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 2.0090863704681396
      entropy_coeff: 0.0
      kl: 0.015280828066170215
      policy_loss: -0.10137143731117249
      total_loss: -0.07410805821418762
      vf_explained_var: 0.7234721779823303
      vf_loss: 0.004055599588900805
  num_steps_sampled: 11400
  num_steps_trained: 11400
iterations_since_restore: 19
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.988888888888887
  ram_util_percent: 43.53333333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_

custom_metrics: {}
date: 2021-04-06_19-38-48
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 60
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.9640355110168457
      entropy_coeff: 0.0
      kl: 0.015743762627243994
      policy_loss: -0.09148939847946166
      total_loss: -0.06465781331062317
      vf_explained_var: 0.7506099343299866
      vf_loss: 0.0029207180719822644
  num_steps_sampled: 15000
  num_steps_trained: 15000
iterations_since_restore: 25
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.822222222222223
  ram_util_percent: 43.54444444444445
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy

custom_metrics: {}
date: 2021-04-06_19-39-26
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 74
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 2.057873773574829
      entropy_coeff: 0.0
      kl: 0.0173580726608634
      policy_loss: -0.1436479866504669
      total_loss: -0.1139593929052353
      vf_explained_var: 0.7224838137626648
      vf_loss: 0.0033260118681937454
  num_steps_sampled: 18600
  num_steps_trained: 18600
iterations_since_restore: 31
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.577777777777776
  ram_util_percent: 43.52222222222222
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_rewa

custom_metrics: {}
date: 2021-04-06_19-40-03
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 88
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.9626084566116333
      entropy_coeff: 0.0
      kl: 0.01432687509804964
      policy_loss: -0.30095381289720535
      total_loss: -0.27462293580174446
      vf_explained_var: 0.6248574256896973
      vf_loss: 0.0045719298068434
  num_steps_sampled: 22200
  num_steps_trained: 22200
iterations_since_restore: 37
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.37777777777778
  ram_util_percent: 43.588888888888896
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_rew

custom_metrics: {}
date: 2021-04-06_19-40-41
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 103
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.9559794664382935
      entropy_coeff: 0.0
      kl: 0.013791496492922306
      policy_loss: -0.07738381624221802
      total_loss: -0.05288960337638855
      vf_explained_var: 0.5735092759132385
      vf_loss: 0.003548376215621829
  num_steps_sampled: 25800
  num_steps_trained: 25800
iterations_since_restore: 43
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.033333333333335
  ram_util_percent: 43.544444444444444
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
polic

custom_metrics: {}
date: 2021-04-06_19-41-19
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 117
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.9785435438156127
      entropy_coeff: 0.0
      kl: 0.014411423169076443
      policy_loss: -0.14539361149072647
      total_loss: -0.11912910342216491
      vf_explained_var: 0.604255199432373
      vf_loss: 0.0043771585915237665
  num_steps_sampled: 29400
  num_steps_trained: 29400
iterations_since_restore: 49
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.58888888888889
  ram_util_percent: 43.57777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_

custom_metrics: {}
date: 2021-04-06_19-41-58
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 132
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.9207234621047973
      entropy_coeff: 0.0
      kl: 0.012975145131349564
      policy_loss: -0.10386908426880836
      total_loss: -0.0793587438762188
      vf_explained_var: 0.43563681840896606
      vf_loss: 0.004804341122508049
  num_steps_sampled: 33000
  num_steps_trained: 33000
iterations_since_restore: 55
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.220000000000002
  ram_util_percent: 43.74
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min:

custom_metrics: {}
date: 2021-04-06_19-42-37
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 146
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.8808651685714721
      entropy_coeff: 0.0
      kl: 0.014189101941883563
      policy_loss: -0.12422436773777008
      total_loss: -0.09925947338342667
      vf_explained_var: 0.49766770005226135
      vf_loss: 0.0034151888452470303
  num_steps_sampled: 36600
  num_steps_trained: 36600
iterations_since_restore: 61
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.555555555555557
  ram_util_percent: 43.6
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_19-43-16
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 160
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.8744187951087952
      entropy_coeff: 0.0
      kl: 0.012604044750332832
      policy_loss: -0.14436555840075016
      total_loss: -0.12155075743794441
      vf_explained_var: 0.6804580092430115
      vf_loss: 0.0036723993835039437
  num_steps_sampled: 40200
  num_steps_trained: 40200
iterations_since_restore: 67
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.855555555555554
  ram_util_percent: 43.611111111111114
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
poli

custom_metrics: {}
date: 2021-04-06_19-43-54
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 175
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.9132969617843627
      entropy_coeff: 0.0
      kl: 0.01493607573211193
      policy_loss: -0.13897525668144226
      total_loss: -0.11313604414463044
      vf_explained_var: 0.7276285290718079
      vf_loss: 0.003155038389377296
  num_steps_sampled: 43800
  num_steps_trained: 43800
iterations_since_restore: 73
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.133333333333333
  ram_util_percent: 43.6
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {

custom_metrics: {}
date: 2021-04-06_19-44-33
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 189
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.8556715726852417
      entropy_coeff: 0.0
      kl: 0.016490033455193044
      policy_loss: -0.08837213814258575
      total_loss: -0.05829399824142456
      vf_explained_var: 0.6194588541984558
      vf_loss: 0.005033883592113852
  num_steps_sampled: 47400
  num_steps_trained: 47400
iterations_since_restore: 79
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.4
  ram_util_percent: 43.63333333333334
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {

custom_metrics: {}
date: 2021-04-06_19-45-12
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 204
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.765511155128479
      entropy_coeff: 0.0
      kl: 0.01382423061877489
      policy_loss: -0.08525381386280059
      total_loss: -0.0613165944814682
      vf_explained_var: 0.6190167665481567
      vf_loss: 0.002941651176661253
  num_steps_sampled: 51000
  num_steps_trained: 51000
iterations_since_restore: 85
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.766666666666666
  ram_util_percent: 43.6
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}


custom_metrics: {}
date: 2021-04-06_19-45-51
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 218
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.6889704465866089
      entropy_coeff: 0.0
      kl: 0.017968325689435006
      policy_loss: -0.2352360725402832
      total_loss: -0.20435777306556702
      vf_explained_var: 0.5096589922904968
      vf_loss: 0.003588908677920699
  num_steps_sampled: 54600
  num_steps_trained: 54600
iterations_since_restore: 91
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.87777777777778
  ram_util_percent: 43.63333333333334
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_r

custom_metrics: {}
date: 2021-04-06_19-46-29
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 232
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.7861400842666626
      entropy_coeff: 0.0
      kl: 0.02015077043324709
      policy_loss: -0.08981500566005707
      total_loss: -0.056244418025016785
      vf_explained_var: 0.718684196472168
      vf_loss: 0.00296660017920658
  num_steps_sampled: 58200
  num_steps_trained: 58200
iterations_since_restore: 97
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 22.38888888888889
  ram_util_percent: 43.788888888888884
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_r

custom_metrics: {}
date: 2021-04-06_19-47-08
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 247
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.7523252964019775
      entropy_coeff: 0.0
      kl: 0.014123204909265042
      policy_loss: -0.12446635439991952
      total_loss: -0.08938439786434174
      vf_explained_var: 0.5618361234664917
      vf_loss: 0.002907524723559618
  num_steps_sampled: 61800
  num_steps_trained: 61800
iterations_since_restore: 103
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.800000000000004
  ram_util_percent: 43.76
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_19-47-47
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 261
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.7507063388824462
      entropy_coeff: 0.0
      kl: 0.013333377242088319
      policy_loss: -0.14250193238258363
      total_loss: -0.10898012518882752
      vf_explained_var: 0.6380909085273743
      vf_loss: 0.0031466851476579906
  num_steps_sampled: 65400
  num_steps_trained: 65400
iterations_since_restore: 109
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.755555555555556
  ram_util_percent: 43.68888888888889
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_19-48-26
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 276
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6648890495300293
      entropy_coeff: 0.0
      kl: 0.011478576622903348
      policy_loss: -0.02860870361328125
      total_loss: 0.0006927698850631714
      vf_explained_var: 0.626150906085968
      vf_loss: 0.003151835175231099
  num_steps_sampled: 69000
  num_steps_trained: 69000
iterations_since_restore: 115
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.688888888888886
  ram_util_percent: 43.7
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_19-49-04
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 290
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6560429811477662
      entropy_coeff: 0.0
      kl: 0.015350187756121158
      policy_loss: -0.22819487303495406
      total_loss: -0.1895612120628357
      vf_explained_var: 0.5803403854370117
      vf_loss: 0.003664003987796605
  num_steps_sampled: 72600
  num_steps_trained: 72600
iterations_since_restore: 121
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.11
  ram_util_percent: 43.7
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  mean

custom_metrics: {}
date: 2021-04-06_19-49-43
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 304
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6438140273094177
      entropy_coeff: 0.0
      kl: 0.012627571122720838
      policy_loss: -0.44397748447954655
      total_loss: -0.41188739612698555
      vf_explained_var: 0.6099023222923279
      vf_loss: 0.0033229085383936763
  num_steps_sampled: 76200
  num_steps_trained: 76200
iterations_since_restore: 127
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.8125
  ram_util_percent: 43.7
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  

custom_metrics: {}
date: 2021-04-06_19-50-22
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 319
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6436578035354614
      entropy_coeff: 0.0
      kl: 0.013633421994745732
      policy_loss: -0.1663101613521576
      total_loss: -0.1319973886013031
      vf_explained_var: 0.5512118339538574
      vf_loss: 0.0032541206339374183
  num_steps_sampled: 79800
  num_steps_trained: 79800
iterations_since_restore: 133
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.43
  ram_util_percent: 43.7
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  mean

custom_metrics: {}
date: 2021-04-06_19-51-00
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 333
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.676300287246704
      entropy_coeff: 0.0
      kl: 0.012756672780960798
      policy_loss: -0.1202220469713211
      total_loss: -0.08783976584672928
      vf_explained_var: 0.7025639414787292
      vf_loss: 0.0033209854271262886
  num_steps_sampled: 83400
  num_steps_trained: 83400
iterations_since_restore: 139
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.77777777777778
  ram_util_percent: 43.72222222222222
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_19-51-39
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 348
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6451117992401123
      entropy_coeff: 0.0
      kl: 0.012784709222614765
      policy_loss: -0.06983385533094406
      total_loss: -0.03843375742435455
      vf_explained_var: 0.751315712928772
      vf_loss: 0.00227491888217628
  num_steps_sampled: 87000
  num_steps_trained: 87000
iterations_since_restore: 145
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.4
  ram_util_percent: 43.970000000000006
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_

custom_metrics: {}
date: 2021-04-06_19-52-18
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 362
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.706741952896118
      entropy_coeff: 0.0
      kl: 0.014682556502521038
      policy_loss: -0.21184605360031128
      total_loss: -0.1758398875594139
      vf_explained_var: 0.7612128853797913
      vf_loss: 0.002557482453994453
  num_steps_sampled: 90600
  num_steps_trained: 90600
iterations_since_restore: 151
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.555555555555557
  ram_util_percent: 44.02222222222222
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_19-52-56
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 376
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5796494781970978
      entropy_coeff: 0.0
      kl: 0.012940347427502275
      policy_loss: -0.2254367545247078
      total_loss: -0.19261863082647324
      vf_explained_var: 0.6937543749809265
      vf_loss: 0.003338400216307491
  num_steps_sampled: 94200
  num_steps_trained: 94200
iterations_since_restore: 157
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.525
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  mea

custom_metrics: {}
date: 2021-04-06_19-53-35
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 391
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6724619150161744
      entropy_coeff: 0.0
      kl: 0.012965496443212033
      policy_loss: -0.14659760296344757
      total_loss: -0.11198788285255432
      vf_explained_var: 0.6991587281227112
      vf_loss: 0.005072676204144954
  num_steps_sampled: 97800
  num_steps_trained: 97800
iterations_since_restore: 163
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.522222222222226
  ram_util_percent: 43.73333333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_19-54-12
done: false
episode_len_mean: 248.92
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9856632648129016
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 406
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5804501175880432
      entropy_coeff: 0.0
      kl: 0.012742665130645037
      policy_loss: 0.033539265394210815
      total_loss: 0.06929738074541092
      vf_explained_var: 0.3187621235847473
      vf_loss: 0.00672873726580292
  num_steps_sampled: 101400
  num_steps_trained: 101400
iterations_since_restore: 169
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.84444444444445
  ram_util_percent: 43.77777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_19-54-49
done: false
episode_len_mean: 248.92
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9856632648129016
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 420
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6420427560806274
      entropy_coeff: 0.0
      kl: 0.013913681730628014
      policy_loss: -0.17800115048885345
      total_loss: -0.14159391820430756
      vf_explained_var: 0.5623072385787964
      vf_loss: 0.004710138717200607
  num_steps_sampled: 105000
  num_steps_trained: 105000
iterations_since_restore: 175
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.57777777777778
  ram_util_percent: 43.74444444444444
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_19-55-25
done: false
episode_len_mean: 248.92
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9856632648129016
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 434
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.542280673980713
      entropy_coeff: 0.0
      kl: 0.014184980653226376
      policy_loss: -0.42970276717096567
      total_loss: -0.3934021946042776
      vf_explained_var: 0.7126952409744263
      vf_loss: 0.003985421732068062
  num_steps_sampled: 108600
  num_steps_trained: 108600
iterations_since_restore: 181
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.744444444444444
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_19-56-01
done: false
episode_len_mean: 248.92
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9856632648129016
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 449
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6150476336479187
      entropy_coeff: 0.0
      kl: 0.014024271629750729
      policy_loss: -0.04149797558784485
      total_loss: -0.00629013404250145
      vf_explained_var: 0.6136348247528076
      vf_loss: 0.0032587892783340067
  num_steps_sampled: 112200
  num_steps_trained: 112200
iterations_since_restore: 187
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.788888888888888
  ram_util_percent: 43.77777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_rewa

custom_metrics: {}
date: 2021-04-06_19-56-38
done: false
episode_len_mean: 248.92
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9856632648129016
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 463
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6069331765174866
      entropy_coeff: 0.0
      kl: 0.01490860665217042
      policy_loss: -0.28668075799942017
      total_loss: -0.24792621657252312
      vf_explained_var: 0.64641934633255
      vf_loss: 0.00479086849372834
  num_steps_sampled: 115800
  num_steps_trained: 115800
iterations_since_restore: 193
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.4125
  ram_util_percent: 43.975
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  

custom_metrics: {}
date: 2021-04-06_19-57-14
done: false
episode_len_mean: 248.47
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9738599313329905
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 478
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.473493754863739
      entropy_coeff: 0.0
      kl: 0.01159685361199081
      policy_loss: 0.029388390481472015
      total_loss: 0.059294819831848145
      vf_explained_var: 0.7160578370094299
      vf_loss: 0.0034873418626375496
  num_steps_sampled: 119400
  num_steps_trained: 119400
iterations_since_restore: 199
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.65
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  m

custom_metrics: {}
date: 2021-04-06_19-57-51
done: false
episode_len_mean: 248.07
episode_reward_max: 0.43366669584065676
episode_reward_mean: -0.9622699313983322
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 492
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5597800016403198
      entropy_coeff: 0.0
      kl: 0.015373860951513052
      policy_loss: -0.12426773458719254
      total_loss: -0.08376891165971756
      vf_explained_var: 0.5616538524627686
      vf_loss: 0.005475240061059594
  num_steps_sampled: 123000
  num_steps_trained: 123000
iterations_since_restore: 205
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.9125
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:


custom_metrics: {}
date: 2021-04-06_19-58-27
done: false
episode_len_mean: 249.15
episode_reward_max: 0.18033341690897942
episode_reward_mean: -0.97660659766756
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 507
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6162545680999756
      entropy_coeff: 0.0
      kl: 0.013822040520608425
      policy_loss: 0.10784560488536954
      total_loss: 0.14245324302464724
      vf_explained_var: 0.6947664022445679
      vf_loss: 0.003119278117083013
  num_steps_sampled: 126600
  num_steps_trained: 126600
iterations_since_restore: 211
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.1125
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  me

custom_metrics: {}
date: 2021-04-06_19-59-03
done: false
episode_len_mean: 249.15
episode_reward_max: 0.18033341690897942
episode_reward_mean: -0.97660659766756
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 521
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5158425569534302
      entropy_coeff: 0.0
      kl: 0.014636798528954387
      policy_loss: -0.2344856783747673
      total_loss: -0.19708005152642727
      vf_explained_var: 0.7137771248817444
      vf_loss: 0.004061156301759183
  num_steps_sampled: 130200
  num_steps_trained: 130200
iterations_since_restore: 217
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.477777777777778
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_19-59-39
done: false
episode_len_mean: 249.15
episode_reward_max: 0.18033341690897942
episode_reward_mean: -0.97660659766756
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 535
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5346999764442444
      entropy_coeff: 0.0
      kl: 0.015667750732973218
      policy_loss: -0.21804585307836533
      total_loss: -0.1789664551615715
      vf_explained_var: 0.6680848002433777
      vf_loss: 0.0033863132703118026
  num_steps_sampled: 133800
  num_steps_trained: 133800
iterations_since_restore: 223
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.655555555555555
  ram_util_percent: 43.888888888888886
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-00-16
done: false
episode_len_mean: 249.15
episode_reward_max: 0.18033341690897942
episode_reward_mean: -0.97660659766756
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 550
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4666442275047302
      entropy_coeff: 0.0
      kl: 0.012766268104314804
      policy_loss: -0.265913805924356
      total_loss: -0.23274181690067053
      vf_explained_var: 0.6986173987388611
      vf_loss: 0.004088806745130569
  num_steps_sampled: 137400
  num_steps_trained: 137400
iterations_since_restore: 229
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.95
  ram_util_percent: 43.824999999999996
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_20-00-52
done: false
episode_len_mean: 249.15
episode_reward_max: 0.18033341690897942
episode_reward_mean: -0.97660659766756
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 564
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.54228937625885
      entropy_coeff: 0.0
      kl: 0.012788008200004697
      policy_loss: -0.34561020880937576
      total_loss: -0.3138560950756073
      vf_explained_var: 0.7388550043106079
      vf_loss: 0.0026214135286863893
  num_steps_sampled: 141000
  num_steps_trained: 141000
iterations_since_restore: 235
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.6875
  ram_util_percent: 43.8875
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
 

custom_metrics: {}
date: 2021-04-06_20-01-28
done: false
episode_len_mean: 249.6
episode_reward_max: 0.15900006238371134
episode_reward_mean: -0.9884099311474711
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 579
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4890411794185638
      entropy_coeff: 0.0
      kl: 0.015672652749344707
      policy_loss: -0.04196023941040039
      total_loss: -0.002602614462375641
      vf_explained_var: 0.6782193183898926
      vf_loss: 0.0036533831153064966
  num_steps_sampled: 144600
  num_steps_trained: 144600
iterations_since_restore: 241
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 20.9375
  ram_util_percent: 43.95
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf

custom_metrics: {}
date: 2021-04-06_20-02-05
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 593
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.571394681930542
      entropy_coeff: 0.0
      kl: 0.01669596927240491
      policy_loss: -0.03327985107898712
      total_loss: 0.008673014119267464
      vf_explained_var: 0.6771365404129028
      vf_loss: 0.003917321766493842
  num_steps_sampled: 148200
  num_steps_trained: 148200
iterations_since_restore: 247
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.1625
  ram_util_percent: 43.8125
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:


custom_metrics: {}
date: 2021-04-06_20-02-41
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 607
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5782431662082672
      entropy_coeff: 0.0
      kl: 0.014473479939624667
      policy_loss: -0.3535441420972347
      total_loss: -0.31666689179837704
      vf_explained_var: 0.630192756652832
      vf_loss: 0.0039049084880389273
  num_steps_sampled: 151800
  num_steps_trained: 151800
iterations_since_restore: 253
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.666666666666668
  ram_util_percent: 43.8
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_20-03-17
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 622
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5035883486270905
      entropy_coeff: 0.0
      kl: 0.013161833165213466
      policy_loss: -0.22836332768201828
      total_loss: -0.1912134401500225
      vf_explained_var: 0.5633014440536499
      vf_loss: 0.007165574585087597
  num_steps_sampled: 155400
  num_steps_trained: 155400
iterations_since_restore: 259
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.61111111111111
  ram_util_percent: 43.83333333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-03-54
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 636
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4665541648864746
      entropy_coeff: 0.0
      kl: 0.013125311816111207
      policy_loss: -0.20943444594740868
      total_loss: -0.17337781190872192
      vf_explained_var: 0.6237096786499023
      vf_loss: 0.006155538489110768
  num_steps_sampled: 159000
  num_steps_trained: 159000
iterations_since_restore: 265
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.833333333333332
  ram_util_percent: 43.84444444444444
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-04-30
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 651
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4987370669841766
      entropy_coeff: 0.0
      kl: 0.015168786980211735
      policy_loss: -0.14839368034154177
      total_loss: -0.11160353384912014
      vf_explained_var: 0.7542858123779297
      vf_loss: 0.0022337225382216275
  num_steps_sampled: 162600
  num_steps_trained: 162600
iterations_since_restore: 271
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.1875
  ram_util_percent: 43.9
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:


custom_metrics: {}
date: 2021-04-06_20-05-06
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 665
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.517563134431839
      entropy_coeff: 0.0
      kl: 0.01954572182148695
      policy_loss: -0.16855433210730553
      total_loss: -0.12123488634824753
      vf_explained_var: 0.742118775844574
      vf_loss: 0.0027918717823922634
  num_steps_sampled: 166200
  num_steps_trained: 166200
iterations_since_restore: 277
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.200000000000003
  ram_util_percent: 43.8875
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sam

custom_metrics: {}
date: 2021-04-06_20-05-43
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 679
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4129957258701324
      entropy_coeff: 0.0
      kl: 0.015405331505462527
      policy_loss: -0.20881860423833132
      total_loss: -0.16921997629106045
      vf_explained_var: 0.5600215792655945
      vf_loss: 0.004503362433752045
  num_steps_sampled: 169800
  num_steps_trained: 169800
iterations_since_restore: 283
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.974999999999998
  ram_util_percent: 43.9
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-06-19
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 694
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4851202070713043
      entropy_coeff: 0.0
      kl: 0.014378592371940613
      policy_loss: -0.06857389956712723
      total_loss: -0.03210487216711044
      vf_explained_var: 0.7160192728042603
      vf_loss: 0.003712787583936006
  num_steps_sampled: 173400
  num_steps_trained: 173400
iterations_since_restore: 289
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.555555555555557
  ram_util_percent: 43.87777777777777
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-06-56
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 708
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5023502707481384
      entropy_coeff: 0.0
      kl: 0.01692064432427287
      policy_loss: -0.21418246626853943
      total_loss: -0.1710263192653656
      vf_explained_var: 0.6665444374084473
      vf_loss: 0.004608803312294185
  num_steps_sampled: 177000
  num_steps_trained: 177000
iterations_since_restore: 295
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.7125
  ram_util_percent: 44.125
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
 

custom_metrics: {}
date: 2021-04-06_20-07-32
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 723
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.417286366224289
      entropy_coeff: 0.0
      kl: 0.01817540777847171
      policy_loss: -0.037164926528930664
      total_loss: 0.007683251053094864
      vf_explained_var: 0.6365442872047424
      vf_loss: 0.0034423256292939186
  num_steps_sampled: 180600
  num_steps_trained: 180600
iterations_since_restore: 301
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.4875
  ram_util_percent: 43.9
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
 

custom_metrics: {}
date: 2021-04-06_20-08-08
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 737
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.331648975610733
      entropy_coeff: 0.0
      kl: 0.01798039861023426
      policy_loss: -0.20443806797266006
      total_loss: -0.15968404710292816
      vf_explained_var: 0.695279598236084
      vf_loss: 0.003792451345361769
  num_steps_sampled: 184200
  num_steps_trained: 184200
iterations_since_restore: 307
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.5875
  ram_util_percent: 43.9
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
  me

custom_metrics: {}
date: 2021-04-06_20-08-45
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 751
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5692394077777863
      entropy_coeff: 0.0
      kl: 0.015216301893815398
      policy_loss: -0.33868988417088985
      total_loss: -0.2963544875383377
      vf_explained_var: 0.48868173360824585
      vf_loss: 0.007670746883377433
  num_steps_sampled: 187800
  num_steps_trained: 187800
iterations_since_restore: 313
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.322222222222223
  ram_util_percent: 43.93333333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-09-21
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 766
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.3065628707408905
      entropy_coeff: 0.0
      kl: 0.015112994238734245
      policy_loss: -0.07711033523082733
      total_loss: -0.03588104993104935
      vf_explained_var: 0.5114762783050537
      vf_loss: 0.006799991009756923
  num_steps_sampled: 191400
  num_steps_trained: 191400
iterations_since_restore: 319
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.844444444444445
  ram_util_percent: 43.93333333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-09-57
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 780
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.3859401047229767
      entropy_coeff: 0.0
      kl: 0.019042791333049536
      policy_loss: -0.21094395965337753
      total_loss: -0.16247176378965378
      vf_explained_var: 0.6191245317459106
      vf_loss: 0.005090337013825774
  num_steps_sampled: 195000
  num_steps_trained: 195000
iterations_since_restore: 325
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.2875
  ram_util_percent: 43.9375
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf

custom_metrics: {}
date: 2021-04-06_20-10-34
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 795
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.3192643523216248
      entropy_coeff: 0.0
      kl: 0.017697835341095924
      policy_loss: 0.05701478756964207
      total_loss: 0.10354947112500668
      vf_explained_var: 0.6225041747093201
      vf_loss: 0.0062167854339350015
  num_steps_sampled: 198600
  num_steps_trained: 198600
iterations_since_restore: 331
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.9625
  ram_util_percent: 43.95
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
 

custom_metrics: {}
date: 2021-04-06_20-11-10
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 809
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4164075553417206
      entropy_coeff: 0.0
      kl: 0.016299822833389044
      policy_loss: -0.0849885493516922
      total_loss: -0.04497479647397995
      vf_explained_var: 0.6662552952766418
      vf_loss: 0.0028807055205106735
  num_steps_sampled: 202200
  num_steps_trained: 202200
iterations_since_restore: 337
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.15
  ram_util_percent: 43.9875
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:


custom_metrics: {}
date: 2021-04-06_20-11-47
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 823
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4026022851467133
      entropy_coeff: 0.0
      kl: 0.017383635276928544
      policy_loss: -0.2553796982392669
      total_loss: -0.21102794725447893
      vf_explained_var: 0.6633594632148743
      vf_loss: 0.0047496966435573995
  num_steps_sampled: 205800
  num_steps_trained: 205800
iterations_since_restore: 343
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.225
  ram_util_percent: 44.16250000000001
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sam

custom_metrics: {}
date: 2021-04-06_20-12-23
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 838
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.467744916677475
      entropy_coeff: 0.0
      kl: 0.012878959532827139
      policy_loss: -0.10445303842425346
      total_loss: -0.05606778711080551
      vf_explained_var: 0.6147524118423462
      vf_loss: 0.0043754231301136315
  num_steps_sampled: 209400
  num_steps_trained: 209400
iterations_since_restore: 349
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.424999999999997
  ram_util_percent: 44.0
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-12-59
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 852
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3704239428043365
      entropy_coeff: 0.0
      kl: 0.014871250605210662
      policy_loss: -0.11669187899678946
      total_loss: -0.05783285200595856
      vf_explained_var: 0.57737135887146
      vf_loss: 0.008041149121709168
  num_steps_sampled: 213000
  num_steps_trained: 213000
iterations_since_restore: 355
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.88888888888889
  ram_util_percent: 43.98888888888889
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy

custom_metrics: {}
date: 2021-04-06_20-13-36
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 867
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4538161158561707
      entropy_coeff: 0.0
      kl: 0.00925852742511779
      policy_loss: 0.07091932417824864
      total_loss: 0.10631960537284613
      vf_explained_var: 0.6203499436378479
      vf_loss: 0.003762166335945949
  num_steps_sampled: 216600
  num_steps_trained: 216600
iterations_since_restore: 361
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.477777777777774
  ram_util_percent: 43.94444444444444
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy

custom_metrics: {}
date: 2021-04-06_20-14-12
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 881
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4927839636802673
      entropy_coeff: 0.0
      kl: 0.011482711881399155
      policy_loss: -0.06032160855829716
      total_loss: -0.01773341652005911
      vf_explained_var: 0.7669752240180969
      vf_loss: 0.003349628415890038
  num_steps_sampled: 220200
  num_steps_trained: 220200
iterations_since_restore: 367
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.811111111111114
  ram_util_percent: 43.92222222222222
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
pol

custom_metrics: {}
date: 2021-04-06_20-14-48
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 895
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.5224160850048065
      entropy_coeff: 0.0
      kl: 0.011666105827316642
      policy_loss: -0.2918037548661232
      total_loss: -0.24832924455404282
      vf_explained_var: 0.7198032140731812
      vf_loss: 0.0036092207301408052
  num_steps_sampled: 223800
  num_steps_trained: 223800
iterations_since_restore: 373
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.288888888888888
  ram_util_percent: 44.0
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-15-25
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 910
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4694482684135437
      entropy_coeff: 0.0
      kl: 0.01110509317368269
      policy_loss: -0.09247913584113121
      total_loss: -0.05118591710925102
      vf_explained_var: 0.710502564907074
      vf_loss: 0.0033450539340265095
  num_steps_sampled: 227400
  num_steps_trained: 227400
iterations_since_restore: 379
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.82222222222222
  ram_util_percent: 43.98888888888889
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
polic

custom_metrics: {}
date: 2021-04-06_20-16-01
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 924
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.48496475815773
      entropy_coeff: 0.0
      kl: 0.01416966482065618
      policy_loss: -0.3057148903608322
      total_loss: -0.25395533442497253
      vf_explained_var: 0.8073484897613525
      vf_loss: 0.0033391453325748444
  num_steps_sampled: 231000
  num_steps_trained: 231000
iterations_since_restore: 385
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.7875
  ram_util_percent: 43.975
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_20-16-38
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 939
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4662085175514221
      entropy_coeff: 0.0
      kl: 0.010764943668618798
      policy_loss: 0.06286296993494034
      total_loss: 0.10301584750413895
      vf_explained_var: 0.7480844259262085
      vf_loss: 0.0033670476404950023
  num_steps_sampled: 234600
  num_steps_trained: 234600
iterations_since_restore: 391
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.3875
  ram_util_percent: 44.212500000000006
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_20-17-14
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 953
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4161919057369232
      entropy_coeff: 0.0
      kl: 0.011383194127120078
      policy_loss: -0.10030404850840569
      total_loss: -0.058467328548431396
      vf_explained_var: 0.7081937789916992
      vf_loss: 0.0029382073844317347
  num_steps_sampled: 238200
  num_steps_trained: 238200
iterations_since_restore: 397
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.575000000000003
  ram_util_percent: 44.0
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_20-17-50
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 967
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4732467234134674
      entropy_coeff: 0.0
      kl: 0.010791649692691863
      policy_loss: -0.319497711956501
      total_loss: -0.2806574273854494
      vf_explained_var: 0.8119608759880066
      vf_loss: 0.001963198068551719
  num_steps_sampled: 241800
  num_steps_trained: 241800
iterations_since_restore: 403
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.0375
  ram_util_percent: 44.0375
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_20-18-27
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 982
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.348519742488861
      entropy_coeff: 0.0
      kl: 0.01191798597574234
      policy_loss: -0.19639165699481964
      total_loss: -0.15047748666256666
      vf_explained_var: 0.7198580503463745
      vf_loss: 0.0051881877006962895
  num_steps_sampled: 245400
  num_steps_trained: 245400
iterations_since_restore: 409
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.537499999999998
  ram_util_percent: 44.0
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_20-19-03
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 996
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4939108490943909
      entropy_coeff: 0.0
      kl: 0.014347477816045284
      policy_loss: -0.25540953129529953
      total_loss: -0.20164597779512405
      vf_explained_var: 0.697197437286377
      vf_loss: 0.004735559341497719
  num_steps_sampled: 249000
  num_steps_trained: 249000
iterations_since_restore: 415
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.4125
  ram_util_percent: 44.075
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-19-40
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1011
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3910482227802277
      entropy_coeff: 0.0
      kl: 0.010431096889078617
      policy_loss: -0.009538372978568077
      total_loss: 0.030206261202692986
      vf_explained_var: 0.6676706671714783
      vf_loss: 0.004099617130123079
  num_steps_sampled: 252600
  num_steps_trained: 252600
iterations_since_restore: 421
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.744444444444444
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_20-20-16
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1025
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.413139969110489
      entropy_coeff: 0.0
      kl: 0.013098847353830934
      policy_loss: -0.01533670723438263
      total_loss: 0.033316198736429214
      vf_explained_var: 0.7451305985450745
      vf_loss: 0.003891669475706294
  num_steps_sampled: 256200
  num_steps_trained: 256200
iterations_since_restore: 427
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.01111111111111
  ram_util_percent: 44.07777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
poli

custom_metrics: {}
date: 2021-04-06_20-20-53
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1039
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3261790573596954
      entropy_coeff: 0.0
      kl: 0.014537411509081721
      policy_loss: -0.19375504180788994
      total_loss: -0.1405739113688469
      vf_explained_var: 0.6422343850135803
      vf_loss: 0.003504090360365808
  num_steps_sampled: 259800
  num_steps_trained: 259800
iterations_since_restore: 433
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.825
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_20-21-29
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1054
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4711729288101196
      entropy_coeff: 0.0
      kl: 0.012299993773922324
      policy_loss: -0.11617902666330338
      total_loss: -0.06889177858829498
      vf_explained_var: 0.6849279999732971
      vf_loss: 0.005255857307929546
  num_steps_sampled: 263400
  num_steps_trained: 263400
iterations_since_restore: 439
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.933333333333334
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-22-05
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1068
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4799549281597137
      entropy_coeff: 0.0
      kl: 0.00968305068090558
      policy_loss: -0.26843031123280525
      total_loss: -0.23238113895058632
      vf_explained_var: 0.8265381455421448
      vf_loss: 0.002960392041131854
  num_steps_sampled: 267000
  num_steps_trained: 267000
iterations_since_restore: 445
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.2625
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_20-22-41
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1083
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4400324523448944
      entropy_coeff: 0.0
      kl: 0.012007427983917296
      policy_loss: -0.02787527861073613
      total_loss: 0.017344280146062374
      vf_explained_var: 0.7637852430343628
      vf_loss: 0.004187930928310379
  num_steps_sampled: 270600
  num_steps_trained: 270600
iterations_since_restore: 451
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.037499999999998
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-23-18
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1097
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.373308539390564
      entropy_coeff: 0.0
      kl: 0.012393456418067217
      policy_loss: -0.05489920452237129
      total_loss: -0.009570896625518799
      vf_explained_var: 0.7413811087608337
      vf_loss: 0.0029775438306387514
  num_steps_sampled: 274200
  num_steps_trained: 274200
iterations_since_restore: 457
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.7
  ram_util_percent: 44.075
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-23-54
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1111
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3937362432479858
      entropy_coeff: 0.0
      kl: 0.012208011467009783
      policy_loss: -0.29517505317926407
      total_loss: -0.2505999132990837
      vf_explained_var: 0.7765116691589355
      vf_loss: 0.0028581013320945203
  num_steps_sampled: 277800
  num_steps_trained: 277800
iterations_since_restore: 463
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.32222222222222
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-24-30
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1126
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.334972620010376
      entropy_coeff: 0.0
      kl: 0.012571965577080846
      policy_loss: -0.06465914659202099
      total_loss: -0.015442395582795143
      vf_explained_var: 0.6397714018821716
      vf_loss: 0.006255982705624774
  num_steps_sampled: 281400
  num_steps_trained: 281400
iterations_since_restore: 469
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.1625
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-25-06
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1140
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.346297174692154
      entropy_coeff: 0.0
      kl: 0.015751238213852048
      policy_loss: -0.12354272603988647
      total_loss: -0.06759569048881531
      vf_explained_var: 0.8363919258117676
      vf_loss: 0.0021220804774202406
  num_steps_sampled: 285000
  num_steps_trained: 285000
iterations_since_restore: 475
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.15
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_20-25-43
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1155
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.5010643601417542
      entropy_coeff: 0.0
      kl: 0.011278582736849785
      policy_loss: -0.06664363853633404
      total_loss: -0.022023314610123634
      vf_explained_var: 0.6576405763626099
      vf_loss: 0.006079288781620562
  num_steps_sampled: 288600
  num_steps_trained: 288600
iterations_since_restore: 481
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.125
  ram_util_percent: 44.1
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-26-19
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1169
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4202414155006409
      entropy_coeff: 0.0
      kl: 0.011470265686511993
      policy_loss: -0.056205395609140396
      total_loss: -0.012225847691297531
      vf_explained_var: 0.7510093450546265
      vf_loss: 0.0047835318255238235
  num_steps_sampled: 292200
  num_steps_trained: 292200
iterations_since_restore: 487
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.744444444444444
  ram_util_percent: 44.111111111111114
pid: 18605
policy_reward_max: {}
policy_reward_mean: {

custom_metrics: {}
date: 2021-04-06_20-26-56
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1183
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4482060074806213
      entropy_coeff: 0.0
      kl: 0.010939356638118625
      policy_loss: -0.3046083841472864
      total_loss: -0.26271916925907135
      vf_explained_var: 0.6918928623199463
      vf_loss: 0.0045073866785969585
  num_steps_sampled: 295800
  num_steps_trained: 295800
iterations_since_restore: 493
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.3
  ram_util_percent: 44.4
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler

custom_metrics: {}
date: 2021-04-06_20-27-32
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1198
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4235668182373047
      entropy_coeff: 0.0
      kl: 0.012583894655108452
      policy_loss: -0.15109251253306866
      total_loss: -0.10525684058666229
      vf_explained_var: 0.772890031337738
      vf_loss: 0.0028341416618786752
  num_steps_sampled: 299400
  num_steps_trained: 299400
iterations_since_restore: 499
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.766666666666666
  ram_util_percent: 44.13333333333334
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_20-28-08
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1212
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4664410948753357
      entropy_coeff: 0.0
      kl: 0.013184037990868092
      policy_loss: -0.20327620208263397
      total_loss: -0.15546276792883873
      vf_explained_var: 0.7753922939300537
      vf_loss: 0.002761111070867628
  num_steps_sampled: 303000
  num_steps_trained: 303000
iterations_since_restore: 505
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.633333333333333
  ram_util_percent: 44.17777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_20-28-44
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1227
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.300685167312622
      entropy_coeff: 0.0
      kl: 0.010537387104704976
      policy_loss: 0.022155314683914185
      total_loss: 0.06190745532512665
      vf_explained_var: 0.7527147531509399
      vf_loss: 0.0037439310108311474
  num_steps_sampled: 306600
  num_steps_trained: 306600
iterations_since_restore: 511
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.45555555555556
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_20-29-20
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1241
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3194470405578613
      entropy_coeff: 0.0
      kl: 0.013481060974299908
      policy_loss: 0.0036445632576942444
      total_loss: 0.0537073127925396
      vf_explained_var: 0.7295528650283813
      vf_loss: 0.003995438804849982
  num_steps_sampled: 310200
  num_steps_trained: 310200
iterations_since_restore: 517
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.022222222222226
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-29-56
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1255
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.4116520881652832
      entropy_coeff: 0.0
      kl: 0.012531058047898114
      policy_loss: -0.2770935744047165
      total_loss: -0.22712518274784088
      vf_explained_var: 0.6136295795440674
      vf_loss: 0.007147429860197008
  num_steps_sampled: 313800
  num_steps_trained: 313800
iterations_since_restore: 523
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.225
  ram_util_percent: 44.2375
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sam

custom_metrics: {}
date: 2021-04-06_20-30-32
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1270
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3461616039276123
      entropy_coeff: 0.0
      kl: 0.011490442790091038
      policy_loss: -0.21497228741645813
      total_loss: -0.17297524958848953
      vf_explained_var: 0.7297242283821106
      vf_loss: 0.002732013090280816
  num_steps_sampled: 317400
  num_steps_trained: 317400
iterations_since_restore: 529
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.644444444444446
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-31-09
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1284
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.303331345319748
      entropy_coeff: 0.0
      kl: 0.01075581251643598
      policy_loss: -0.1452077515423298
      total_loss: -0.10593690723180771
      vf_explained_var: 0.8098437786102295
      vf_loss: 0.0025162137462757528
  num_steps_sampled: 321000
  num_steps_trained: 321000
iterations_since_restore: 535
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.912499999999998
  ram_util_percent: 44.1875
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_20-31-45
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1299
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.339493304491043
      entropy_coeff: 0.0
      kl: 0.01135491207242012
      policy_loss: -0.050281114876270294
      total_loss: -0.008702792227268219
      vf_explained_var: 0.8104245662689209
      vf_loss: 0.0027764610713347793
  num_steps_sampled: 324600
  num_steps_trained: 324600
iterations_since_restore: 541
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.855555555555558
  ram_util_percent: 44.355555555555554
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}


custom_metrics: {}
date: 2021-04-06_20-32-21
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1313
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2573761940002441
      entropy_coeff: 0.0
      kl: 0.00975967338308692
      policy_loss: -0.14478617906570435
      total_loss: -0.10788779705762863
      vf_explained_var: 0.7955139875411987
      vf_loss: 0.0035477516357786953
  num_steps_sampled: 328200
  num_steps_trained: 328200
iterations_since_restore: 547
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.566666666666666
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-32-57
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1327
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3312739133834839
      entropy_coeff: 0.0
      kl: 0.011655121576040983
      policy_loss: -0.35654666274785995
      total_loss: -0.3100080182775855
      vf_explained_var: 0.6251799464225769
      vf_loss: 0.0067109259543940425
  num_steps_sampled: 331800
  num_steps_trained: 331800
iterations_since_restore: 553
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.744444444444444
  ram_util_percent: 44.25555555555556
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_20-33-34
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1342
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2044063806533813
      entropy_coeff: 0.0
      kl: 0.011283689877018332
      policy_loss: -0.08615650050342083
      total_loss: -0.04357392527163029
      vf_explained_var: 0.6536006331443787
      vf_loss: 0.004024071851745248
  num_steps_sampled: 335400
  num_steps_trained: 335400
iterations_since_restore: 559
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.3375
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-34-10
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1356
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2743399143218994
      entropy_coeff: 0.0
      kl: 0.01125992788001895
      policy_loss: -0.28980543464422226
      total_loss: -0.24840137362480164
      vf_explained_var: 0.7522156238555908
      vf_loss: 0.0029267786303535104
  num_steps_sampled: 339000
  num_steps_trained: 339000
iterations_since_restore: 565
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.355555555555558
  ram_util_percent: 44.2
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-34-46
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1371
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.3689219951629639
      entropy_coeff: 0.0
      kl: 0.013047769665718079
      policy_loss: 0.0044833943247795105
      total_loss: 0.054471977055072784
      vf_explained_var: 0.6535180807113647
      vf_loss: 0.0054019097005948424
  num_steps_sampled: 342600
  num_steps_trained: 342600
iterations_since_restore: 571
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.1375
  ram_util_percent: 44.25
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
s

custom_metrics: {}
date: 2021-04-06_20-35-22
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1385
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1587393134832382
      entropy_coeff: 0.0
      kl: 0.012456059688702226
      policy_loss: -0.029635459184646606
      total_loss: 0.018020257353782654
      vf_explained_var: 0.6585568189620972
      vf_loss: 0.005091027182061225
  num_steps_sampled: 346200
  num_steps_trained: 346200
iterations_since_restore: 577
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.2875
  ram_util_percent: 44.275
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
s

custom_metrics: {}
date: 2021-04-06_20-35-58
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1399
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2717063128948212
      entropy_coeff: 0.0
      kl: 0.013386750360950828
      policy_loss: -0.14820683002471924
      total_loss: -0.09910555183887482
      vf_explained_var: 0.6528580188751221
      vf_loss: 0.003356214292580262
  num_steps_sampled: 349800
  num_steps_trained: 349800
iterations_since_restore: 583
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.666666666666668
  ram_util_percent: 44.3
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-36-35
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1414
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2948842644691467
      entropy_coeff: 0.0
      kl: 0.011806308990344405
      policy_loss: -0.15024740248918533
      total_loss: -0.10444025695323944
      vf_explained_var: 0.710594892501831
      vf_loss: 0.005462770699523389
  num_steps_sampled: 353400
  num_steps_trained: 353400
iterations_since_restore: 589
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.58888888888889
  ram_util_percent: 44.67777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
poli

custom_metrics: {}
date: 2021-04-06_20-37-11
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1428
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2838866412639618
      entropy_coeff: 0.0
      kl: 0.012268793187104166
      policy_loss: -0.1829531192779541
      total_loss: -0.13677259534597397
      vf_explained_var: 0.7561231255531311
      vf_loss: 0.004255775245837867
  num_steps_sampled: 357000
  num_steps_trained: 357000
iterations_since_restore: 595
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.677777777777774
  ram_util_percent: 44.3
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-37-47
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1443
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1762857139110565
      entropy_coeff: 0.0
      kl: 0.013385547907091677
      policy_loss: -0.13362254668027163
      total_loss: -0.08556284941732883
      vf_explained_var: 0.7109008431434631
      vf_loss: 0.0023187664919532835
  num_steps_sampled: 360600
  num_steps_trained: 360600
iterations_since_restore: 601
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.1375
  ram_util_percent: 44.3
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sam

custom_metrics: {}
date: 2021-04-06_20-38-24
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1457
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2239395678043365
      entropy_coeff: 0.0
      kl: 0.01382243586704135
      policy_loss: -0.09423666074872017
      total_loss: -0.043200522661209106
      vf_explained_var: 0.7164734601974487
      vf_loss: 0.003802297287620604
  num_steps_sampled: 364200
  num_steps_trained: 364200
iterations_since_restore: 607
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.674999999999997
  ram_util_percent: 44.3125
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_rewar

custom_metrics: {}
date: 2021-04-06_20-39-00
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1471
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2080467343330383
      entropy_coeff: 0.0
      kl: 0.011668547056615353
      policy_loss: -0.23299970477819443
      total_loss: -0.187723308801651
      vf_explained_var: 0.5865342617034912
      vf_loss: 0.00540278508560732
  num_steps_sampled: 367800
  num_steps_trained: 367800
iterations_since_restore: 613
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.96666666666667
  ram_util_percent: 44.3
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: 

custom_metrics: {}
date: 2021-04-06_20-39-36
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1486
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1615112721920013
      entropy_coeff: 0.0
      kl: 0.014773102477192879
      policy_loss: -0.09953167289495468
      total_loss: -0.04303842782974243
      vf_explained_var: 0.6596517562866211
      vf_loss: 0.006010789307765663
  num_steps_sampled: 371400
  num_steps_trained: 371400
iterations_since_restore: 619
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.755555555555553
  ram_util_percent: 44.27777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_20-40-13
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1500
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1359829604625702
      entropy_coeff: 0.0
      kl: 0.012455626856535673
      policy_loss: 0.004649311304092407
      total_loss: 0.050006017088890076
      vf_explained_var: 0.7269190549850464
      vf_loss: 0.002793516847305
  num_steps_sampled: 375000
  num_steps_trained: 375000
iterations_since_restore: 625
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.11111111111111
  ram_util_percent: 44.288888888888884
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
polic

custom_metrics: {}
date: 2021-04-06_20-40-54
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1515
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2658680081367493
      entropy_coeff: 0.0
      kl: 0.011211997363716364
      policy_loss: -0.062124088406562805
      total_loss: -0.018648971803486347
      vf_explained_var: 0.6955935955047607
      vf_loss: 0.005161629058420658
  num_steps_sampled: 378600
  num_steps_trained: 378600
iterations_since_restore: 631
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 39.040000000000006
  ram_util_percent: 46.39
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_rewar

custom_metrics: {}
date: 2021-04-06_20-41-36
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1529
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1778365075588226
      entropy_coeff: 0.0
      kl: 0.011832328978925943
      policy_loss: -0.2006860300898552
      total_loss: -0.1565102357417345
      vf_explained_var: 0.6658357381820679
      vf_loss: 0.0037425139162223786
  num_steps_sampled: 382200
  num_steps_trained: 382200
iterations_since_restore: 637
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 29.5
  ram_util_percent: 49.13333333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_20-42-17
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1543
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.144524723291397
      entropy_coeff: 0.0
      kl: 0.012974390992894769
      policy_loss: -0.29225563630461693
      total_loss: -0.2444891706109047
      vf_explained_var: 0.69318026304245
      vf_loss: 0.00343053403776139
  num_steps_sampled: 385800
  num_steps_trained: 385800
iterations_since_restore: 643
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 32.13
  ram_util_percent: 48.71
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_pe

custom_metrics: {}
date: 2021-04-06_20-43-00
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1558
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1548174917697906
      entropy_coeff: 0.0
      kl: 0.01018126925919205
      policy_loss: -0.12501254677772522
      total_loss: -0.08681285008788109
      vf_explained_var: 0.7956368327140808
      vf_loss: 0.0034084089857060462
  num_steps_sampled: 389400
  num_steps_trained: 389400
iterations_since_restore: 649
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 44.01
  ram_util_percent: 49.14
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_20-43-42
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1572
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1615125834941864
      entropy_coeff: 0.0
      kl: 0.014918056316673756
      policy_loss: -0.2910820674151182
      total_loss: -0.23782407492399216
      vf_explained_var: 0.7714565992355347
      vf_loss: 0.002280206375871785
  num_steps_sampled: 393000
  num_steps_trained: 393000
iterations_since_restore: 655
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 33.14
  ram_util_percent: 48.64000000000001
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-44-24
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1587
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2162724733352661
      entropy_coeff: 0.0
      kl: 0.014331361977383494
      policy_loss: -0.02287914603948593
      total_loss: 0.028532058000564575
      vf_explained_var: 0.7368516325950623
      vf_loss: 0.0024382799747399986
  num_steps_sampled: 396600
  num_steps_trained: 396600
iterations_since_restore: 661
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 33.16
  ram_util_percent: 48.5
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_20-45-05
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1601
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9931304901838303
      entropy_coeff: 0.0
      kl: 0.01069885166361928
      policy_loss: -0.031163468956947327
      total_loss: 0.01031544804573059
      vf_explained_var: 0.5428647994995117
      vf_loss: 0.004918980877846479
  num_steps_sampled: 400200
  num_steps_trained: 400200
iterations_since_restore: 667
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 29.82
  ram_util_percent: 48.83
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_20-45-47
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1615
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2348806858062744
      entropy_coeff: 0.0
      kl: 0.011999281123280525
      policy_loss: -0.24871570616960526
      total_loss: -0.20408813655376434
      vf_explained_var: 0.6546629667282104
      vf_loss: 0.0036238289321772754
  num_steps_sampled: 403800
  num_steps_trained: 403800
iterations_since_restore: 673
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.14
  ram_util_percent: 49.13
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
samp

custom_metrics: {}
date: 2021-04-06_20-46-29
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1630
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2126458287239075
      entropy_coeff: 0.0
      kl: 0.012849674094468355
      policy_loss: -0.12135005742311478
      total_loss: -0.07315204292535782
      vf_explained_var: 0.7131237387657166
      vf_loss: 0.0042882541893050075
  num_steps_sampled: 407400
  num_steps_trained: 407400
iterations_since_restore: 679
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.68
  ram_util_percent: 49.220000000000006
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-47-14
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1644
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.025572881102562
      entropy_coeff: 0.0
      kl: 0.010079228784888983
      policy_loss: -0.2016915986314416
      total_loss: -0.16426550038158894
      vf_explained_var: 0.6992321014404297
      vf_loss: 0.0029834755696356297
  num_steps_sampled: 411000
  num_steps_trained: 411000
iterations_since_restore: 685
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 59.583333333333336
  ram_util_percent: 50.25833333333333
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
pol

custom_metrics: {}
date: 2021-04-06_20-48-02
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1659
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.117802917957306
      entropy_coeff: 0.0
      kl: 0.010494395974092185
      policy_loss: -0.010371780022978783
      total_loss: 0.02797337807714939
      vf_explained_var: 0.8246642351150513
      vf_loss: 0.002483814430888742
  num_steps_sampled: 414600
  num_steps_trained: 414600
iterations_since_restore: 691
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.21818181818182
  ram_util_percent: 53.618181818181824
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
pol

custom_metrics: {}
date: 2021-04-06_20-48-43
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1673
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1705556213855743
      entropy_coeff: 0.0
      kl: 0.0108073391020298
      policy_loss: -0.25594808906316757
      total_loss: -0.21696429699659348
      vf_explained_var: 0.8117344379425049
      vf_loss: 0.0020530772890197113
  num_steps_sampled: 418200
  num_steps_trained: 418200
iterations_since_restore: 697
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.230000000000004
  ram_util_percent: 55.33
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-49-26
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1687
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0233851224184036
      entropy_coeff: 0.0
      kl: 0.008780871517956257
      policy_loss: -0.2540842089802027
      total_loss: -0.2154233418405056
      vf_explained_var: 0.6031400561332703
      vf_loss: 0.008654937890241854
  num_steps_sampled: 421800
  num_steps_trained: 421800
iterations_since_restore: 703
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.345454545454544
  ram_util_percent: 56.93636363636363
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
poli

custom_metrics: {}
date: 2021-04-06_20-50-08
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1702
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9692590236663818
      entropy_coeff: 0.0
      kl: 0.010991333285346627
      policy_loss: -0.08291570469737053
      total_loss: -0.04085448011755943
      vf_explained_var: 0.7913979291915894
      vf_loss: 0.004501787596382201
  num_steps_sampled: 425400
  num_steps_trained: 425400
iterations_since_restore: 709
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.690000000000005
  ram_util_percent: 56.879999999999995
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
p

custom_metrics: {}
date: 2021-04-06_20-50-51
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1716
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1305232346057892
      entropy_coeff: 0.0
      kl: 0.012542253592982888
      policy_loss: -0.29088009893894196
      total_loss: -0.2448338121175766
      vf_explained_var: 0.7625800371170044
      vf_loss: 0.003187049936968833
  num_steps_sampled: 429000
  num_steps_trained: 429000
iterations_since_restore: 715
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.059999999999995
  ram_util_percent: 56.71
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_20-51-33
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1731
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1134734153747559
      entropy_coeff: 0.0
      kl: 0.01312025892548263
      policy_loss: 0.10872031189501286
      total_loss: 0.15631810016930103
      vf_explained_var: 0.6957452297210693
      vf_loss: 0.0027634265134111047
  num_steps_sampled: 432600
  num_steps_trained: 432600
iterations_since_restore: 721
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.8
  ram_util_percent: 56.92999999999999
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min:

custom_metrics: {}
date: 2021-04-06_20-52-16
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1745
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1107999980449677
      entropy_coeff: 0.0
      kl: 0.013760528527200222
      policy_loss: -0.021724633872509003
      total_loss: 0.028599608689546585
      vf_explained_var: 0.6651033163070679
      vf_loss: 0.003301935241324827
  num_steps_sampled: 436200
  num_steps_trained: 436200
iterations_since_restore: 727
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 39.44
  ram_util_percent: 56.620000000000005
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-52-58
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1759
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.8902564942836761
      entropy_coeff: 0.0
      kl: 0.008139075594954193
      policy_loss: -0.18805156648159027
      total_loss: -0.15630098432302475
      vf_explained_var: 0.6424538493156433
      vf_loss: 0.003937875502742827
  num_steps_sampled: 439800
  num_steps_trained: 439800
iterations_since_restore: 733
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.0
  ram_util_percent: 56.81
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_20-53-40
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1774
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1469028145074844
      entropy_coeff: 0.0
      kl: 0.013565929373726249
      policy_loss: -0.30227847024798393
      total_loss: -0.25271706096827984
      vf_explained_var: 0.6913077235221863
      vf_loss: 0.0032040877267718315
  num_steps_sampled: 443400
  num_steps_trained: 443400
iterations_since_restore: 739
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.489999999999995
  ram_util_percent: 56.86999999999999
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
p

custom_metrics: {}
date: 2021-04-06_20-54-22
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1788
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0868467539548874
      entropy_coeff: 0.0
      kl: 0.014838616829365492
      policy_loss: -0.03664700873196125
      total_loss: 0.016074007377028465
      vf_explained_var: 0.7646348476409912
      vf_loss: 0.0020146957831457257
  num_steps_sampled: 447000
  num_steps_trained: 447000
iterations_since_restore: 745
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.62222222222222
  ram_util_percent: 56.77777777777778
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_20-55-04
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1803
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9898395538330078
      entropy_coeff: 0.0
      kl: 0.011277584242634475
      policy_loss: 0.08414331078529358
      total_loss: 0.12632424384355545
      vf_explained_var: 0.6830425262451172
      vf_loss: 0.003643316449597478
  num_steps_sampled: 450600
  num_steps_trained: 450600
iterations_since_restore: 751
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.43333333333334
  ram_util_percent: 56.81111111111111
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
polic

custom_metrics: {}
date: 2021-04-06_20-55-46
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1817
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9675430953502655
      entropy_coeff: 0.0
      kl: 0.011787238530814648
      policy_loss: -0.06057679280638695
      total_loss: -0.017082085832953453
      vf_explained_var: 0.7262235879898071
      vf_loss: 0.0032155380758922547
  num_steps_sampled: 454200
  num_steps_trained: 454200
iterations_since_restore: 757
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.5
  ram_util_percent: 56.80999999999999
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_20-56-28
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1831
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1780258119106293
      entropy_coeff: 0.0
      kl: 0.011018951772712171
      policy_loss: -0.31538705714046955
      total_loss: -0.27475740388035774
      vf_explained_var: 0.6685783863067627
      vf_loss: 0.0029758334276266396
  num_steps_sampled: 457800
  num_steps_trained: 457800
iterations_since_restore: 763
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.25000000000001
  ram_util_percent: 56.970000000000006
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
p

custom_metrics: {}
date: 2021-04-06_20-57-10
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1846
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0190515220165253
      entropy_coeff: 0.0
      kl: 0.009518576669506729
      policy_loss: -0.06180429086089134
      total_loss: -0.024779390543699265
      vf_explained_var: 0.618851900100708
      vf_loss: 0.004498130292631686
  num_steps_sampled: 461400
  num_steps_trained: 461400
iterations_since_restore: 769
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.71
  ram_util_percent: 56.95
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_20-57-52
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1860
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9151801764965057
      entropy_coeff: 0.0
      kl: 0.010035353247076273
      policy_loss: -0.08639740012586117
      total_loss: -0.049416616559028625
      vf_explained_var: 0.6885983943939209
      vf_loss: 0.0026881108642555773
  num_steps_sampled: 465000
  num_steps_trained: 465000
iterations_since_restore: 775
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.379999999999995
  ram_util_percent: 56.910000000000004
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}

custom_metrics: {}
date: 2021-04-06_20-58-33
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1875
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1132052093744278
      entropy_coeff: 0.0
      kl: 0.009888026281259954
      policy_loss: 0.04762124642729759
      total_loss: 0.08510037139058113
      vf_explained_var: 0.6677896976470947
      vf_loss: 0.0036898875259794295
  num_steps_sampled: 468600
  num_steps_trained: 468600
iterations_since_restore: 781
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.830000000000005
  ram_util_percent: 56.9
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_20-59-16
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1889
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.2778251469135284
      entropy_coeff: 0.0
      kl: 0.014132959535345435
      policy_loss: -0.06303991749882698
      total_loss: -0.010268591344356537
      vf_explained_var: 0.6164509654045105
      vf_loss: 0.004476371104829013
  num_steps_sampled: 472200
  num_steps_trained: 472200
iterations_since_restore: 787
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 33.71
  ram_util_percent: 56.919999999999995
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_20-59-57
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1903
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1188148856163025
      entropy_coeff: 0.0
      kl: 0.010027088923379779
      policy_loss: -0.25319023057818413
      total_loss: -0.21580374240875244
      vf_explained_var: 0.6586350202560425
      vf_loss: 0.0031220122473314404
  num_steps_sampled: 475800
  num_steps_trained: 475800
iterations_since_restore: 793
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 33.9
  ram_util_percent: 56.94
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_21-00-39
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1918
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.001237079501152
      entropy_coeff: 0.0
      kl: 0.008675100049003959
      policy_loss: -0.18723568134009838
      total_loss: -0.15316095389425755
      vf_explained_var: 0.6304782629013062
      vf_loss: 0.004430294269695878
  num_steps_sampled: 479400
  num_steps_trained: 479400
iterations_since_restore: 799
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.08
  ram_util_percent: 57.02
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_21-01-21
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1932
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1869699656963348
      entropy_coeff: 0.0
      kl: 0.009953516302630305
      policy_loss: -0.09366515092551708
      total_loss: -0.056605017744004726
      vf_explained_var: 0.7304076552391052
      vf_loss: 0.0030471076897811145
  num_steps_sampled: 483000
  num_steps_trained: 483000
iterations_since_restore: 805
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.58
  ram_util_percent: 56.989999999999995
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_rewar

custom_metrics: {}
date: 2021-04-06_21-02-03
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1947
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.112460732460022
      entropy_coeff: 0.0
      kl: 0.013023493229411542
      policy_loss: -0.1125631807371974
      total_loss: -0.06528495997190475
      vf_explained_var: 0.6504982113838196
      vf_loss: 0.0027745145198423415
  num_steps_sampled: 486600
  num_steps_trained: 486600
iterations_since_restore: 811
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.800000000000004
  ram_util_percent: 56.95
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_21-02-45
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1961
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.991956353187561
      entropy_coeff: 0.0
      kl: 0.008970339084044099
      policy_loss: 0.028672680258750916
      total_loss: 0.062046151608228683
      vf_explained_var: 0.7806333303451538
      vf_loss: 0.0027201594784855843
  num_steps_sampled: 490200
  num_steps_trained: 490200
iterations_since_restore: 817
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.46
  ram_util_percent: 57.010000000000005
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_21-03-27
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1975
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0005146265029907
      entropy_coeff: 0.0
      kl: 0.011153699597343802
      policy_loss: -0.2523651793599129
      total_loss: -0.2117840051651001
      vf_explained_var: 0.633355975151062
      vf_loss: 0.002466913778334856
  num_steps_sampled: 493800
  num_steps_trained: 493800
iterations_since_restore: 823
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.589999999999996
  ram_util_percent: 57.0
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min:

custom_metrics: {}
date: 2021-04-06_21-04-09
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 1990
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0345950722694397
      entropy_coeff: 0.0
      kl: 0.007067598053254187
      policy_loss: -0.07773356139659882
      total_loss: -0.04822029545903206
      vf_explained_var: 0.669995903968811
      vf_loss: 0.005361975287087262
  num_steps_sampled: 497400
  num_steps_trained: 497400
iterations_since_restore: 829
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.03333333333334
  ram_util_percent: 56.94444444444444
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
poli

custom_metrics: {}
date: 2021-04-06_21-04-51
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2004
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9770112037658691
      entropy_coeff: 0.0
      kl: 0.009406826342456043
      policy_loss: -0.16982046514749527
      total_loss: -0.13363008946180344
      vf_explained_var: 0.710192084312439
      vf_loss: 0.004045509587740526
  num_steps_sampled: 501000
  num_steps_trained: 501000
iterations_since_restore: 835
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 33.43
  ram_util_percent: 56.98
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_21-05-33
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2019
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.138754278421402
      entropy_coeff: 0.0
      kl: 0.012458546087145805
      policy_loss: -0.05639335513114929
      total_loss: -0.01119905710220337
      vf_explained_var: 0.7622905969619751
      vf_loss: 0.002621097635710612
  num_steps_sampled: 504600
  num_steps_trained: 504600
iterations_since_restore: 841
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.760000000000005
  ram_util_percent: 57.05
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_21-06-15
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2033
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0863163471221924
      entropy_coeff: 0.0
      kl: 0.010400028666481376
      policy_loss: -0.25463568791747093
      total_loss: -0.21619081124663353
      vf_explained_var: 0.7076416015625
      vf_loss: 0.0029060191300231963
  num_steps_sampled: 508200
  num_steps_trained: 508200
iterations_since_restore: 847
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.760000000000005
  ram_util_percent: 57.010000000000005
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
pol

custom_metrics: {}
date: 2021-04-06_21-06-57
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2047
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0496982336044312
      entropy_coeff: 0.0
      kl: 0.013541081920266151
      policy_loss: -0.17034069448709488
      total_loss: -0.1215650886297226
      vf_explained_var: 0.754484236240387
      vf_loss: 0.0025032042467501014
  num_steps_sampled: 511800
  num_steps_trained: 511800
iterations_since_restore: 853
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.260000000000005
  ram_util_percent: 57.23
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_21-07-39
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2062
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0845462083816528
      entropy_coeff: 0.0
      kl: 0.01115648658014834
      policy_loss: -0.06302767805755138
      total_loss: -0.02286907285451889
      vf_explained_var: 0.7791808843612671
      vf_loss: 0.0020348012330941856
  num_steps_sampled: 515400
  num_steps_trained: 515400
iterations_since_restore: 859
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.3
  ram_util_percent: 57.01818181818182
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_21-08-21
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2076
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.052353411912918
      entropy_coeff: 0.0
      kl: 0.01146256597712636
      policy_loss: -0.27533167973160744
      total_loss: -0.2324594408273697
      vf_explained_var: 0.740077018737793
      vf_loss: 0.0037024993798695505
  num_steps_sampled: 519000
  num_steps_trained: 519000
iterations_since_restore: 865
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.92
  ram_util_percent: 56.96
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_

custom_metrics: {}
date: 2021-04-06_21-09-04
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2091
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.062776505947113
      entropy_coeff: 0.0
      kl: 0.011821368243545294
      policy_loss: 0.051612332463264465
      total_loss: 0.09697385877370834
      vf_explained_var: 0.7284699082374573
      vf_loss: 0.00496569590177387
  num_steps_sampled: 522600
  num_steps_trained: 522600
iterations_since_restore: 871
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.059999999999995
  ram_util_percent: 57.0
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min:

custom_metrics: {}
date: 2021-04-06_21-09-46
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2105
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0480608642101288
      entropy_coeff: 0.0
      kl: 0.009268325986340642
      policy_loss: -0.18691833317279816
      total_loss: -0.15312568843364716
      vf_explained_var: 0.779531717300415
      vf_loss: 0.002121036231983453
  num_steps_sampled: 526200
  num_steps_trained: 526200
iterations_since_restore: 877
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.51
  ram_util_percent: 57.02
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_21-10-27
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2119
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9779396057128906
      entropy_coeff: 0.0
      kl: 0.010597081622108817
      policy_loss: -0.3040211908519268
      total_loss: -0.2629527747631073
      vf_explained_var: 0.5345268249511719
      vf_loss: 0.004856189130805433
  num_steps_sampled: 529800
  num_steps_trained: 529800
iterations_since_restore: 883
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 32.35
  ram_util_percent: 57.18000000000001
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_21-11-09
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2134
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0404359698295593
      entropy_coeff: 0.0
      kl: 0.011945204227231443
      policy_loss: -0.027001861482858658
      total_loss: 0.016141604632139206
      vf_explained_var: 0.6900938749313354
      vf_loss: 0.0023244655458256602
  num_steps_sampled: 533400
  num_steps_trained: 533400
iterations_since_restore: 889
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 31.05
  ram_util_percent: 56.77
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sam

custom_metrics: {}
date: 2021-04-06_21-11-50
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2148
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1327328085899353
      entropy_coeff: 0.0
      kl: 0.014359395718201995
      policy_loss: -0.1982490010559559
      total_loss: -0.14649203419685364
      vf_explained_var: 0.7564132213592529
      vf_loss: 0.0026882284582825378
  num_steps_sampled: 537000
  num_steps_trained: 537000
iterations_since_restore: 895
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 30.41111111111111
  ram_util_percent: 56.777777777777786
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_21-12-32
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2163
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9798244088888168
      entropy_coeff: 0.0
      kl: 0.009187135030515492
      policy_loss: 0.06394666247069836
      total_loss: 0.0982865784317255
      vf_explained_var: 0.7233926653862
      vf_loss: 0.002945768675999716
  num_steps_sampled: 540600
  num_steps_trained: 540600
iterations_since_restore: 901
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 31.889999999999997
  ram_util_percent: 56.73
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {

custom_metrics: {}
date: 2021-04-06_21-13-13
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2177
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0164920538663864
      entropy_coeff: 0.0
      kl: 0.01084054820239544
      policy_loss: -0.005174417048692703
      total_loss: 0.037562236189842224
      vf_explained_var: 0.6486032009124756
      vf_loss: 0.005692448699846864
  num_steps_sampled: 544200
  num_steps_trained: 544200
iterations_since_restore: 907
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 29.71
  ram_util_percent: 56.779999999999994
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_

custom_metrics: {}
date: 2021-04-06_21-13-55
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2191
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0663462579250336
      entropy_coeff: 0.0
      kl: 0.009988546604290605
      policy_loss: -0.2812548503279686
      total_loss: -0.24273563921451569
      vf_explained_var: 0.6933867931365967
      vf_loss: 0.004386497632367536
  num_steps_sampled: 547800
  num_steps_trained: 547800
iterations_since_restore: 913
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 41.029999999999994
  ram_util_percent: 57.36999999999999
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
pol

custom_metrics: {}
date: 2021-04-06_21-14-37
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2206
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1439683139324188
      entropy_coeff: 0.0
      kl: 0.014394477941095829
      policy_loss: -0.17179210484027863
      total_loss: -0.11695148050785065
      vf_explained_var: 0.6843777894973755
      vf_loss: 0.005652001011185348
  num_steps_sampled: 551400
  num_steps_trained: 551400
iterations_since_restore: 919
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.519999999999996
  ram_util_percent: 57.18000000000001
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
po

custom_metrics: {}
date: 2021-04-06_21-15-19
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2220
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0228112041950226
      entropy_coeff: 0.0
      kl: 0.012915380881167948
      policy_loss: -0.2211966272443533
      total_loss: -0.1744764093309641
      vf_explained_var: 0.7930611968040466
      vf_loss: 0.002585961949080229
  num_steps_sampled: 555000
  num_steps_trained: 555000
iterations_since_restore: 925
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.82000000000001
  ram_util_percent: 57.20000000000001
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
polic

custom_metrics: {}
date: 2021-04-06_21-16-01
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2235
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.107427567243576
      entropy_coeff: 0.0
      kl: 0.013312178663909435
      policy_loss: -0.02046915888786316
      total_loss: 0.029717624187469482
      vf_explained_var: 0.7195718288421631
      vf_loss: 0.004696563584730029
  num_steps_sampled: 558600
  num_steps_trained: 558600
iterations_since_restore: 931
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.82
  ram_util_percent: 57.11
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

custom_metrics: {}
date: 2021-04-06_21-16-43
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2249
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1112549602985382
      entropy_coeff: 0.0
      kl: 0.012719109887257218
      policy_loss: -0.09727691859006882
      total_loss: -0.05125392973423004
      vf_explained_var: 0.8059637546539307
      vf_loss: 0.0025594170729164034
  num_steps_sampled: 562200
  num_steps_trained: 562200
iterations_since_restore: 937
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.9
  ram_util_percent: 57.39
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_21-17-25
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2263
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.015331745147705
      entropy_coeff: 0.0
      kl: 0.01031196117401123
      policy_loss: -0.29417772218585014
      total_loss: -0.2556425407528877
      vf_explained_var: 0.7531814575195312
      vf_loss: 0.003297218310763128
  num_steps_sampled: 565800
  num_steps_trained: 565800
iterations_since_restore: 943
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.37
  ram_util_percent: 57.170000000000016
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min

custom_metrics: {}
date: 2021-04-06_21-18-07
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2278
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9797922074794769
      entropy_coeff: 0.0
      kl: 0.01026049128267914
      policy_loss: 0.0029033198952674866
      total_loss: 0.041266292333602905
      vf_explained_var: 0.6977111101150513
      vf_loss: 0.003300949465483427
  num_steps_sampled: 569400
  num_steps_trained: 569400
iterations_since_restore: 949
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 33.11
  ram_util_percent: 57.17
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampl

custom_metrics: {}
date: 2021-04-06_21-18-50
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2292
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9539212584495544
      entropy_coeff: 0.0
      kl: 0.011002460843883455
      policy_loss: -0.23416417092084885
      total_loss: -0.19292326271533966
      vf_explained_var: 0.7090137004852295
      vf_loss: 0.0036434229914448224
  num_steps_sampled: 573000
  num_steps_trained: 573000
iterations_since_restore: 955
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 35.66
  ram_util_percent: 57.120000000000005
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_21-19-32
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2307
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0656912624835968
      entropy_coeff: 0.0
      kl: 0.011802632827311754
      policy_loss: 0.002764858305454254
      total_loss: 0.04552238807082176
      vf_explained_var: 0.7440524101257324
      vf_loss: 0.002425722239422612
  num_steps_sampled: 576600
  num_steps_trained: 576600
iterations_since_restore: 961
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.7
  ram_util_percent: 57.210000000000015
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_mi

custom_metrics: {}
date: 2021-04-06_21-20-15
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2321
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.1593629121780396
      entropy_coeff: 0.0
      kl: 0.012410279363393784
      policy_loss: -0.09591908007860184
      total_loss: -0.04942671023309231
      vf_explained_var: 0.726704478263855
      vf_loss: 0.00408408889779821
  num_steps_sampled: 580200
  num_steps_trained: 580200
iterations_since_restore: 967
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.870000000000005
  ram_util_percent: 57.19000000000001
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
poli

custom_metrics: {}
date: 2021-04-06_21-20-57
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2335
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0305641889572144
      entropy_coeff: 0.0
      kl: 0.009988307603634894
      policy_loss: -0.23704037815332413
      total_loss: -0.20021551847457886
      vf_explained_var: 0.7355868816375732
      vf_loss: 0.0026929265295621008
  num_steps_sampled: 583800
  num_steps_trained: 583800
iterations_since_restore: 973
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.17
  ram_util_percent: 57.120000000000005
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward

custom_metrics: {}
date: 2021-04-06_21-21-39
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2350
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.033822476863861
      entropy_coeff: 0.0
      kl: 0.011285018175840378
      policy_loss: -0.1602741777896881
      total_loss: -0.11915672197937965
      vf_explained_var: 0.7852039337158203
      vf_loss: 0.0025544499803800136
  num_steps_sampled: 587400
  num_steps_trained: 587400
iterations_since_restore: 979
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 30.35
  ram_util_percent: 57.279999999999994
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_m

custom_metrics: {}
date: 2021-04-06_21-22-21
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2364
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 1.0565763115882874
      entropy_coeff: 0.0
      kl: 0.014111665077507496
      policy_loss: -0.061953261494636536
      total_loss: -0.010068334639072418
      vf_explained_var: 0.730987548828125
      vf_loss: 0.0036627230001613498
  num_steps_sampled: 591000
  num_steps_trained: 591000
iterations_since_restore: 985
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 34.76
  ram_util_percent: 57.06
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sam

custom_metrics: {}
date: 2021-04-06_21-23-03
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 3
episodes_total: 2379
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9793567359447479
      entropy_coeff: 0.0
      kl: 0.01008391275536269
      policy_loss: -0.046229854226112366
      total_loss: -0.009114093147218227
      vf_explained_var: 0.8027640581130981
      vf_loss: 0.0026571328344289213
  num_steps_sampled: 594600
  num_steps_trained: 594600
iterations_since_restore: 991
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 37.17999999999999
  ram_util_percent: 57.089999999999996
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}


custom_metrics: {}
date: 2021-04-06_21-23-45
done: false
episode_len_mean: 250.0
episode_reward_max: -0.9999999310821295
episode_reward_mean: -0.9999999310821295
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 2393
experiment_id: a88c6111ff18408aad4cfbb03e696fbe
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 3.4171875000000003
      cur_lr: 5.0e-05
      entropy: 0.9850463718175888
      entropy_coeff: 0.0
      kl: 0.012543221702799201
      policy_loss: -0.0697830319404602
      total_loss: -0.02242763340473175
      vf_explained_var: 0.7762529850006104
      vf_loss: 0.004492853186093271
  num_steps_sampled: 598200
  num_steps_trained: 598200
iterations_since_restore: 997
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.51
  ram_util_percent: 57.17
pid: 18605
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sample

In [23]:
trainer

<ray.rllib.agents.trainer_template.PPO at 0x7feae7ff4f50>

In [10]:
## Trying to test the trained agent on an environment

env = AnimalAIGym(
            environment_filename = "../examples/env/AnimalAI",
            worker_id = 150,
            flatten_branched = True,
            uint8_visual = True,
            arenas_configurations = ArenaConfig('../examples/configurations/curriculum/0.yml')
)

INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
INFO:mlagents_envs:Connected new brain:
AnimalAI?team=0
INFO:gym_unity:1 agents within environment.


In [11]:
# run for number of steps
episode_reward = 0
done = False
recurrent_initial_state = trainer.get_policy().model.get_initial_state()
obs = env.reset()


# print(obs.shape)
while not done:
    action = trainer.compute_action(obs, recurrent_initial_state)
    action, recurrent_initial_state = action[0], action[1]
    print("ACTION: ", action)
#     action, recurrent_initial_state = action[0], action[1]
    obs, reward, done, info = env.step(action)
    
    print("REWARD: ", reward)
#     print(info)
    episode_reward += reward

ACTION:  5
REWARD:  -0.0039999997
ACTION:  7
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  5
REWARD:  -0.0039999997
ACTION:  4
REWARD:  -0.0039999997
ACTION:  4
REWARD:  -0.0039999997
ACTION:  1
REWARD:  -0.0039999997
ACTION:  2
REWARD:  -0.0039999997
ACTION:  0
REWARD:  -0.0039999997
ACTION:  0
REWARD:  -0.0039999997
ACTION:  7
REWARD:  -0.0039999997
ACTION:  0
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  4
REWARD:  -0.0039999997
ACTION:  1
REWARD:  -0.0039999997
ACTION:  4
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  0
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  8
REWARD:  -0.0039999997
ACTION:  6
REWARD:  -0.0039999997
ACTION:  6
REWARD:  -0.0039999997
ACTION:  0
REWARD:  -0.0039999997
ACTION:  0
REWARD:  -0.0039999997
ACTION:  3
REWARD:  -0.0039999997
ACTION:  2
REWARD:  -0.0039999997
ACTION:  6
REWARD:  -0.0039999997
ACTION:  3
REW

In [12]:
episode_reward

-0.9999999310821295

In [13]:
env.close()

INFO:mlagents_envs:Environment shut down with return code 0.
