# WasteNet

## Setup

In [None]:
# Colab
!rm -r smart-cities-drl
!git clone https://github.com/eescriba/smart-cities-drl
!cd smart-cities-drl/ && pip install -e .

import sys
sys.path.insert(0,'./smart-cities-drl/src/')

# Local
# !pip install -e ..
# import sys
# sys.path.insert(0,'../src/')

In [2]:
import json
import shutil
import random

import gym
import ray
from ray.tune import run, choice
from core.rl import PPOAgent
from core.pbt import PbtOptimizer
from wastenet.env import WasteNetEnv



Instructions for updating:
experimental_compile is deprecated, use jit_compile instead


## Proximal Policy Optimization (PPO)

### Tune hyperparameters

In [3]:
ppo = PPOAgent("wastenet_ppo_tune", WasteNetEnv, {})

2021-06-25 06:54:32,717	INFO services.py:1274 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-06-25 06:54:35,580	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2021-06-25 06:54:35,582	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=256)[0m Instructions for updating:
[2m[36m(pid=256)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=257)[0m Instructions for updating:
[2m[36m(pid=257)[0m experimental_compile is deprecated, use jit_compile instead
2021-06-25 06:54:52,453	INFO trainable.py:104 -- Trainable.setup took 16.874 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [4]:
tune_config = {
    "env": WasteNetEnv,
    "seed": 123,
    "num_gpus": 1,
    "num_workers": 1,
    "observation_filter": "MeanStdFilter",
    "model": {
        "free_log_std": True
    },
    "lambda": 0.95,
    "clip_param": 0.3,
    "lr": 5e-5,
    "num_sgd_iter": choice([10, 20, 30]),
    "sgd_minibatch_size": choice([128, 256, 512]),
    "train_batch_size": choice([8000, 16000, 32000])
}
stop_criteria = {
    "timesteps_total": 2000000
}
hyperparam_mutations={
    "lambda": lambda: random.uniform(0.7, 1.0),
    "clip_param": lambda: random.uniform(0.1, 0.5),
    "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
    "num_sgd_iter": lambda: random.randint(1, 30),
    "sgd_minibatch_size": lambda: random.randint(128, 16384),
    "train_batch_size": lambda: random.randint(2000, 160000),
}
pbt = PbtOptimizer(hyperparam_mutations)

In [None]:
ppo.restart()
analysis = ppo.tune(tune_config, stop_criteria, scheduler=pbt.scheduler)
best_config =  analysis.get_best_config(metric="episode_reward_mean", mode="max")
print("Best hyperparameters found: ", best_config)

2021-06-25 06:57:02,354	INFO services.py:1274 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000


[2m[36m(pid=463)[0m Instructions for updating:
[2m[36m(pid=463)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=463)[0m 2021-06-25 06:57:07,532	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=463)[0m 2021-06-25 06:57:07,532	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size
PPO_WasteNetEnv_8c324_00000,RUNNING,,20,512,20000
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000


[2m[36m(pid=464)[0m Instructions for updating:
[2m[36m(pid=464)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=464)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=464)[0m are not present in its tracked objects:
[2m[36m(pid=464)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=464)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=464)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=464)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=463)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=463)[0m are not present in its tracked objects:
[2m[36m(pid=463)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=463)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=4

Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_06-57-38
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1032.0
  episode_reward_mean: -1373.6060606060605
  episode_reward_min: -1726.0
  episodes_this_iter: 66
  episodes_total: 66
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6655796766281128
          entropy_coeff: 0.0
          kl: 0.028304753825068474
          model: {}
          policy_loss: -0.04645070433616638
          total_loss: 1542.11474609375
          vf_explained_var: 0.19557024538516998
          vf_loss: 1542.1556396484375
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:463,20,512,20000,1.0,23.011,20000.0,-1373.61,-1032.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_06-58-01
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -722.0
  episode_reward_mean: -1084.58
  episode_reward_min: -1644.0
  episodes_this_iter: 67
  episodes_total: 133
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.6084969639778137
          entropy_coeff: 0.0
          kl: 0.025190914049744606
          model: {}
          policy_loss: -0.05230894312262535
          total_loss: 735.4019775390625
          vf_explained_var: 0.19240757822990417
          vf_loss: 735.4467163085938
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 2
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:463,20,512,20000,2.0,45.5579,40000.0,-1084.58,-722.0,-1644.0,300.0
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_06-58-23
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -425.0
  episode_reward_mean: -752.55
  episode_reward_min: -1208.0
  episodes_this_iter: 67
  episodes_total: 200
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.5499230623245239
          entropy_coeff: 0.0
          kl: 0.014201642945408821
          model: {}
          policy_loss: -0.03488915041089058
          total_loss: 453.30194091796875
          vf_explained_var: 0.11838731914758682
          vf_loss: 453.3304748535156
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 3
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:463,20,512,20000,3.0,68.0613,60000.0,-752.55,-425.0,-1208.0,300.0
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_06-58-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -214.0
  episode_reward_mean: -512.29
  episode_reward_min: -979.0
  episodes_this_iter: 66
  episodes_total: 266
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.4962548613548279
          entropy_coeff: 0.0
          kl: 0.011470436118543148
          model: {}
          policy_loss: -0.0264006145298481
          total_loss: 310.26300048828125
          vf_explained_var: 0.10318458080291748
          vf_loss: 310.2842712402344
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 4
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:463,20,512,20000,4.0,90.6838,80000.0,-512.29,-214.0,-979.0,300.0
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_06-59-08
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -156.0
  episode_reward_mean: -342.2
  episode_reward_min: -647.0
  episodes_this_iter: 67
  episodes_total: 333
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.44390439987182617
          entropy_coeff: 0.0
          kl: 0.0088455555960536
          model: {}
          policy_loss: -0.0211667250841856
          total_loss: 234.36465454101562
          vf_explained_var: 0.1540573388338089
          vf_loss: 234.3818359375
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 5
  node_ip: 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:463,20,512,20000,5.0,112.926,100000.0,-342.2,-156.0,-647.0,300.0
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_06-59-31
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -94.0
  episode_reward_mean: -242.99
  episode_reward_min: -429.0
  episodes_this_iter: 67
  episodes_total: 400
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.40162527561187744
          entropy_coeff: 0.0
          kl: 0.006970599759370089
          model: {}
          policy_loss: -0.017293790355324745
          total_loss: 171.5852813720703
          vf_explained_var: 0.26304712891578674
          vf_loss: 171.5994415283203
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 6
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=591)[0m Instructions for updating:
[2m[36m(pid=591)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=591)[0m 2021-06-25 06:59:35,804	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=591)[0m 2021-06-25 06:59:35,804	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=592)[0m Instructions for updating:
[2m[36m(pid=592)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=592)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=592)[0m are not present in its tracked objects:
[2m[36m(pid=592)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=592)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=592)[0m an omission. This is a strong indication

Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-00-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1032.0
  episode_reward_mean: -1373.6060606060605
  episode_reward_min: -1726.0
  episodes_this_iter: 66
  episodes_total: 66
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6655796766281128
          entropy_coeff: 0.0
          kl: 0.028304753825068474
          model: {}
          policy_loss: -0.04645070433616638
          total_loss: 1542.11474609375
          vf_explained_var: 0.19557027518749237
          vf_loss: 1542.1556396484375
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:591,20,512,20000,1.0,22.5153,20000.0,-1373.61,-1032.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-00-28
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -722.0
  episode_reward_mean: -1084.58
  episode_reward_min: -1644.0
  episodes_this_iter: 67
  episodes_total: 133
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.6084969639778137
          entropy_coeff: 0.0
          kl: 0.02519093081355095
          model: {}
          policy_loss: -0.05230914056301117
          total_loss: 735.4019775390625
          vf_explained_var: 0.19240757822990417
          vf_loss: 735.4467163085938
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 2
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:591,20,512,20000,2.0,45.0062,40000.0,-1084.58,-722.0,-1644.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-00-51
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -425.0
  episode_reward_mean: -752.55
  episode_reward_min: -1208.0
  episodes_this_iter: 67
  episodes_total: 200
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.5499230623245239
          entropy_coeff: 0.0
          kl: 0.014201642945408821
          model: {}
          policy_loss: -0.03488915041089058
          total_loss: 453.30194091796875
          vf_explained_var: 0.11838731914758682
          vf_loss: 453.3304748535156
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 3
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:591,20,512,20000,3.0,67.5181,60000.0,-752.55,-425.0,-1208.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-01-13
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -214.0
  episode_reward_mean: -512.29
  episode_reward_min: -979.0
  episodes_this_iter: 66
  episodes_total: 266
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.49625474214553833
          entropy_coeff: 0.0
          kl: 0.011470436118543148
          model: {}
          policy_loss: -0.026400618255138397
          total_loss: 310.26300048828125
          vf_explained_var: 0.10318458080291748
          vf_loss: 310.2842712402344
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:591,20,512,20000,4.0,89.9532,80000.0,-512.29,-214.0,-979.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_07-01-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -156.0
  episode_reward_mean: -342.2
  episode_reward_min: -647.0
  episodes_this_iter: 67
  episodes_total: 333
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.4439043402671814
          entropy_coeff: 0.0
          kl: 0.008845553733408451
          model: {}
          policy_loss: -0.021166754886507988
          total_loss: 234.36465454101562
          vf_explained_var: 0.1540573388338089
          vf_loss: 234.3818359375
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 5
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:591,20,512,20000,5.0,112.757,100000.0,-342.2,-156.0,-647.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_07-01-59
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -94.0
  episode_reward_mean: -242.99
  episode_reward_min: -429.0
  episodes_this_iter: 67
  episodes_total: 400
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.40162527561187744
          entropy_coeff: 0.0
          kl: 0.006970594171434641
          model: {}
          policy_loss: -0.0172937773168087
          total_loss: 171.5852813720703
          vf_explained_var: 0.26304706931114197
          vf_loss: 171.59945678710938
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 6
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=689)[0m Instructions for updating:
[2m[36m(pid=689)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=689)[0m 2021-06-25 07:02:04,027	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=689)[0m 2021-06-25 07:02:04,028	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=688)[0m Instructions for updating:
[2m[36m(pid=688)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=688)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=688)[0m are not present in its tracked objects:
[2m[36m(pid=688)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=688)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=688)[0m an omission. This is a strong indication

Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-06-25_07-02-22
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1174.0
  episode_reward_mean: -1416.878787878788
  episode_reward_min: -1726.0
  episodes_this_iter: 33
  episodes_total: 33
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6891652941703796
          entropy_coeff: 0.0
          kl: 0.003971072845160961
          model: {}
          policy_loss: -0.007431670557707548
          total_loss: 2194.807373046875
          vf_explained_var: 0.009591028094291687
          vf_loss: 2194.81396484375
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,1.0,10.2644,10000.0,-1416.88,-1174.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-02-32
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -904.0
  episode_reward_mean: -1313.0151515151515
  episode_reward_min: -1726.0
  episodes_this_iter: 33
  episodes_total: 66
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.6501545310020447
          entropy_coeff: 0.0
          kl: 0.022569987922906876
          model: {}
          policy_loss: -0.035024985671043396
          total_loss: 1653.03759765625
          vf_explained_var: 0.036851853132247925
          vf_loss: 1653.070556640625
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,2.0,20.6065,20000.0,-1313.02,-904.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-06-25_07-02-42
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -720.0
  episode_reward_mean: -1206.25
  episode_reward_min: -1726.0
  episodes_this_iter: 34
  episodes_total: 100
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.5845321416854858
          entropy_coeff: 0.0
          kl: 0.019719747826457024
          model: {}
          policy_loss: -0.03321979194879532
          total_loss: 1224.773681640625
          vf_explained_var: 0.07767835259437561
          vf_loss: 1224.803955078125
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 3
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,3.0,30.6812,30000.0,-1206.25,-720.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-02-52
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -600.0
  episode_reward_mean: -994.48
  episode_reward_min: -1543.0
  episodes_this_iter: 33
  episodes_total: 133
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.5243884325027466
          entropy_coeff: 0.0
          kl: 0.012888263911008835
          model: {}
          policy_loss: -0.023995308205485344
          total_loss: 789.994873046875
          vf_explained_var: 0.14112257957458496
          vf_loss: 790.0169677734375
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 4
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,4.0,40.7156,40000.0,-994.48,-600.0,-1543.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-06-25_07-03-02
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -362.0
  episode_reward_mean: -804.63
  episode_reward_min: -1282.0
  episodes_this_iter: 33
  episodes_total: 166
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.46906232833862305
          entropy_coeff: 0.0
          kl: 0.010832453146576881
          model: {}
          policy_loss: -0.022409094497561455
          total_loss: 555.7473754882812
          vf_explained_var: 0.2050563544034958
          vf_loss: 555.7681884765625
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 5
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,5.0,50.7287,50000.0,-804.63,-362.0,-1282.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-03-12
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -362.0
  episode_reward_mean: -649.76
  episode_reward_min: -1118.0
  episodes_this_iter: 34
  episodes_total: 200
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.4204593300819397
          entropy_coeff: 0.0
          kl: 0.007903369143605232
          model: {}
          policy_loss: -0.014680784195661545
          total_loss: 414.50555419921875
          vf_explained_var: 0.2806284427642822
          vf_loss: 414.51904296875
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 6
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,6.0,60.8195,60000.0,-649.76,-362.0,-1118.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-06-25_07-03-22
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -203.0
  episode_reward_mean: -529.63
  episode_reward_min: -967.0
  episodes_this_iter: 33
  episodes_total: 233
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.380587637424469
          entropy_coeff: 0.0
          kl: 0.006405563559383154
          model: {}
          policy_loss: -0.008524668402969837
          total_loss: 273.2839660644531
          vf_explained_var: 0.3432725965976715
          vf_loss: 273.2914733886719
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 7
  node_ip: 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,7.0,70.8437,70000.0,-529.63,-203.0,-967.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-03-33
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -203.0
  episode_reward_mean: -442.52
  episode_reward_min: -902.0
  episodes_this_iter: 33
  episodes_total: 266
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.34902626276016235
          entropy_coeff: 0.0
          kl: 0.006177704781293869
          model: {}
          policy_loss: -0.018750693649053574
          total_loss: 219.2126007080078
          vf_explained_var: 0.3847576975822449
          vf_loss: 219.23045349121094
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 8
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,8.0,81.2568,80000.0,-442.52,-203.0,-902.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-06-25_07-03-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -181.0
  episode_reward_mean: -360.39
  episode_reward_min: -666.0
  episodes_this_iter: 34
  episodes_total: 300
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.3082418441772461
          entropy_coeff: 0.0
          kl: 0.0041052973829209805
          model: {}
          policy_loss: -0.014482850208878517
          total_loss: 168.8410186767578
          vf_explained_var: 0.4288322329521179
          vf_loss: 168.85488891601562
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 9
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,9.0,91.4796,90000.0,-360.39,-181.0,-666.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_07-03-53
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -180.0
  episode_reward_mean: -313.37
  episode_reward_min: -557.0
  episodes_this_iter: 33
  episodes_total: 333
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07500000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.27720609307289124
          entropy_coeff: 0.0
          kl: 0.004781362600624561
          model: {}
          policy_loss: -0.013201327063143253
          total_loss: 141.3533935546875
          vf_explained_var: 0.4548262655735016
          vf_loss: 141.36624145507812
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 10
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,10.0,101.575,100000.0,-313.37,-180.0,-557.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-06-25_07-04-03
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -127.0
  episode_reward_mean: -268.0
  episode_reward_min: -460.0
  episodes_this_iter: 33
  episodes_total: 366
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03750000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.2656960189342499
          entropy_coeff: 0.0
          kl: 0.0036249980330467224
          model: {}
          policy_loss: -0.013403034768998623
          total_loss: 116.32189178466797
          vf_explained_var: 0.5051747560501099
          vf_loss: 116.33516693115234
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 11
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:689,20,2048,10000,11.0,111.634,110000.0,-268.0,-127.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_07-04-13
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -95.0
  episode_reward_mean: -236.79
  episode_reward_min: -460.0
  episodes_this_iter: 34
  episodes_total: 400
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01875000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.2356177270412445
          entropy_coeff: 0.0
          kl: 0.004195921588689089
          model: {}
          policy_loss: -0.014059938490390778
          total_loss: 95.55453491210938
          vf_explained_var: 0.541361927986145
          vf_loss: 95.56851959228516
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 12
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PENDING,,10,2048,10000,,,,,,,
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=801)[0m Instructions for updating:
[2m[36m(pid=801)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=801)[0m 2021-06-25 07:04:18,569	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=801)[0m 2021-06-25 07:04:18,569	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=802)[0m Instructions for updating:
[2m[36m(pid=802)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=802)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=802)[0m are not present in its tracked objects:
[2m[36m(pid=802)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=802)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=802)[0m an omission. This is a strong indication

Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-06-25_07-04-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1174.0
  episode_reward_mean: -1416.878787878788
  episode_reward_min: -1726.0
  episodes_this_iter: 33
  episodes_total: 33
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6927469968795776
          entropy_coeff: 0.0
          kl: 0.00039088804624043405
          model: {}
          policy_loss: 0.004110022448003292
          total_loss: 2200.55029296875
          vf_explained_var: 0.002796560525894165
          vf_loss: 2200.546142578125
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,1.0,10.183,10000.0,-1416.88,-1174.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-04-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1013.0
  episode_reward_mean: -1355.060606060606
  episode_reward_min: -1726.0
  episodes_this_iter: 33
  episodes_total: 66
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.6896038055419922
          entropy_coeff: 0.0
          kl: 0.001505479565821588
          model: {}
          policy_loss: -0.010102611035108566
          total_loss: 1863.615966796875
          vf_explained_var: 0.009043216705322266
          vf_loss: 1863.6260986328125
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restor

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,2.0,20.0848,20000.0,-1355.06,-1013.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-06-25_07-04-56
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -919.0
  episode_reward_mean: -1328.14
  episode_reward_min: -1726.0
  episodes_this_iter: 34
  episodes_total: 100
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.6736770868301392
          entropy_coeff: 0.0
          kl: 0.0061995298601686954
          model: {}
          policy_loss: -0.027044573798775673
          total_loss: 1873.464111328125
          vf_explained_var: 0.01943381130695343
          vf_loss: 1873.4908447265625
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 3
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,3.0,29.9039,30000.0,-1328.14,-919.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-05-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -817.0
  episode_reward_mean: -1218.33
  episode_reward_min: -1626.0
  episodes_this_iter: 33
  episodes_total: 133
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.6309508085250854
          entropy_coeff: 0.0
          kl: 0.01213205885142088
          model: {}
          policy_loss: -0.029525931924581528
          total_loss: 1424.11669921875
          vf_explained_var: 0.03843072056770325
          vf_loss: 1424.1456298828125
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 4
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,4.0,39.7217,40000.0,-1218.33,-817.0,-1626.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-06-25_07-05-16
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -631.0
  episode_reward_mean: -1095.86
  episode_reward_min: -1606.0
  episodes_this_iter: 33
  episodes_total: 166
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.5767536163330078
          entropy_coeff: 0.0
          kl: 0.011188002303242683
          model: {}
          policy_loss: -0.030287116765975952
          total_loss: 1084.8250732421875
          vf_explained_var: 0.06470693647861481
          vf_loss: 1084.854736328125
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 5
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,5.0,49.6924,50000.0,-1095.86,-631.0,-1606.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-05-26
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -579.0
  episode_reward_mean: -935.98
  episode_reward_min: -1413.0
  episodes_this_iter: 34
  episodes_total: 200
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.5236678123474121
          entropy_coeff: 0.0
          kl: 0.008859435096383095
          model: {}
          policy_loss: -0.018057474866509438
          total_loss: 864.7879028320312
          vf_explained_var: 0.09373229742050171
          vf_loss: 864.8054809570312
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 6
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,6.0,59.5875,60000.0,-935.98,-579.0,-1413.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-06-25_07-05-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -433.0
  episode_reward_mean: -780.95
  episode_reward_min: -1158.0
  episodes_this_iter: 33
  episodes_total: 233
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.4825896620750427
          entropy_coeff: 0.0
          kl: 0.007136927451938391
          model: {}
          policy_loss: -0.012750966474413872
          total_loss: 571.5410766601562
          vf_explained_var: 0.1377474069595337
          vf_loss: 571.553466796875
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 7
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,7.0,69.5984,70000.0,-780.95,-433.0,-1158.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-05-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -375.0
  episode_reward_mean: -664.78
  episode_reward_min: -1055.0
  episodes_this_iter: 33
  episodes_total: 266
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.4393034875392914
          entropy_coeff: 0.0
          kl: 0.004457156639546156
          model: {}
          policy_loss: -0.013328619301319122
          total_loss: 483.5673522949219
          vf_explained_var: 0.17526942491531372
          vf_loss: 483.58050537109375
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 8
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,8.0,79.6273,80000.0,-664.78,-375.0,-1055.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-06-25_07-05-56
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -246.0
  episode_reward_mean: -543.33
  episode_reward_min: -1015.0
  episodes_this_iter: 34
  episodes_total: 300
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.4051678776741028
          entropy_coeff: 0.0
          kl: 0.004481499083340168
          model: {}
          policy_loss: -0.018244072794914246
          total_loss: 332.79632568359375
          vf_explained_var: 0.2404259294271469
          vf_loss: 332.814453125
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 9
  node_ip: 1

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,9.0,89.5967,90000.0,-543.33,-246.0,-1015.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_07-06-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -246.0
  episode_reward_mean: -476.77
  episode_reward_min: -791.0
  episodes_this_iter: 33
  episodes_total: 333
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.37401846051216125
          entropy_coeff: 0.0
          kl: 0.0035456756595522165
          model: {}
          policy_loss: -0.0141130480915308
          total_loss: 301.3857727050781
          vf_explained_var: 0.25573405623435974
          vf_loss: 301.39984130859375
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 10
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,10.0,99.4498,100000.0,-476.77,-246.0,-791.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-06-25_07-06-16
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -207.0
  episode_reward_mean: -406.55
  episode_reward_min: -791.0
  episodes_this_iter: 33
  episodes_total: 366
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.3529828190803528
          entropy_coeff: 0.0
          kl: 0.004030855838209391
          model: {}
          policy_loss: -0.00950455479323864
          total_loss: 247.55001831054688
          vf_explained_var: 0.30598127841949463
          vf_loss: 247.55950927734375
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 11


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,11.0,109.438,110000.0,-406.55,-207.0,-791.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_07-06-26
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -182.0
  episode_reward_mean: -378.65
  episode_reward_min: -645.0
  episodes_this_iter: 34
  episodes_total: 400
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.32571420073509216
          entropy_coeff: 0.0
          kl: 0.002876883838325739
          model: {}
          policy_loss: -0.007882369682192802
          total_loss: 241.93124389648438
          vf_explained_var: 0.32456982135772705
          vf_loss: 241.93910217285156
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 1

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:801,10,2048,10000,12.0,119.402,120000.0,-378.65,-182.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


2021-06-25 07:06:36,188	INFO pbt.py:543 -- [exploit] transferring weights from trial PPO_WasteNetEnv_8c324_00002 (score -236.79) -> PPO_WasteNetEnv_8c324_00003 (score -345.19)
2021-06-25 07:06:36,189	INFO pbt.py:558 -- [explore] perturbed config from {'lambda': 0.9, 'clip_param': 0.3, 'lr': 5e-05, 'num_sgd_iter': 20, 'sgd_minibatch_size': 2048, 'train_batch_size': 10000} -> {'lambda': 0.9615230483514837, 'clip_param': 0.48677423988167945, 'lr': 0.0001, 'num_sgd_iter': 24, 'sgd_minibatch_size': 14043, 'train_batch_size': 28086}


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-06-25_07-06-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -168.0
  episode_reward_mean: -345.19
  episode_reward_min: -645.0
  episodes_this_iter: 33
  episodes_total: 433
  experiment_id: 4f83619180b0476882125d5947c2b2ad
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 4.999999873689376e-05
          entropy: 0.30633240938186646
          entropy_coeff: 0.0
          kl: 0.0029428531415760517
          model: {}
          policy_loss: -0.004626805428415537
          total_loss: 187.84048461914062
          vf_explained_var: 0.3756711483001709
          vf_loss: 187.84512329101562
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 1

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,,,,,,,
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=902)[0m Instructions for updating:
[2m[36m(pid=902)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=902)[0m 2021-06-25 07:06:40,792	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=902)[0m 2021-06-25 07:06:40,792	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=927)[0m Instructions for updating:
[2m[36m(pid=927)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=927)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=927)[0m are not present in its tracked objects:
[2m[36m(pid=927)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=927)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=927)[0m an omission. This is a strong indication

Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-06-25_07-07-14
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1174.0
  episode_reward_mean: -1416.878787878788
  episode_reward_min: -1726.0
  episodes_this_iter: 33
  episodes_total: 33
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6627295017242432
          entropy_coeff: 0.0
          kl: 0.031285326927900314
          model: {}
          policy_loss: -0.049077264964580536
          total_loss: 1671.9827880859375
          vf_explained_var: 0.21256378293037415
          vf_loss: 1672.0255126953125
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restor

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,1.0,13.0194,10000.0,-1416.88,-1174.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-07-27
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -625.0
  episode_reward_mean: -1199.2878787878788
  episode_reward_min: -1726.0
  episodes_this_iter: 33
  episodes_total: 66
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.6059379577636719
          entropy_coeff: 0.0
          kl: 0.025192486122250557
          model: {}
          policy_loss: -0.046799976378679276
          total_loss: 730.176513671875
          vf_explained_var: 0.20895221829414368
          vf_loss: 730.2158203125
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 2
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,2.0,25.7768,20000.0,-1199.29,-625.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-06-25_07-07-40
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -510.0
  episode_reward_mean: -1033.44
  episode_reward_min: -1726.0
  episodes_this_iter: 34
  episodes_total: 100
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.5509965419769287
          entropy_coeff: 0.0
          kl: 0.013448664918541908
          model: {}
          policy_loss: -0.03423546254634857
          total_loss: 517.3688354492188
          vf_explained_var: 0.10981020331382751
          vf_loss: 517.39697265625
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 3
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,3.0,38.45,30000.0,-1033.44,-510.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-07-52
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -327.0
  episode_reward_mean: -731.39
  episode_reward_min: -1282.0
  episodes_this_iter: 33
  episodes_total: 133
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.509957492351532
          entropy_coeff: 0.0
          kl: 0.009681403636932373
          model: {}
          policy_loss: -0.024498404935002327
          total_loss: 384.5679016113281
          vf_explained_var: 0.06320042908191681
          vf_loss: 384.5880432128906
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 4
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,4.0,51.0557,40000.0,-731.39,-327.0,-1282.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-06-25_07-08-05
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -248.0
  episode_reward_mean: -532.95
  episode_reward_min: -987.0
  episodes_this_iter: 33
  episodes_total: 166
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.4717772603034973
          entropy_coeff: 0.0
          kl: 0.006468078121542931
          model: {}
          policy_loss: -0.016951052471995354
          total_loss: 282.1720886230469
          vf_explained_var: 0.10445982217788696
          vf_loss: 282.18609619140625
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 5
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,5.0,63.7446,50000.0,-532.95,-248.0,-987.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-08-18
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -120.0
  episode_reward_mean: -398.02
  episode_reward_min: -691.0
  episodes_this_iter: 34
  episodes_total: 200
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.432300329208374
          entropy_coeff: 0.0
          kl: 0.005995919927954674
          model: {}
          policy_loss: -0.016992248594760895
          total_loss: 212.5089874267578
          vf_explained_var: 0.18808868527412415
          vf_loss: 212.52328491210938
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 6
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,6.0,76.2968,60000.0,-398.02,-120.0,-691.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-06-25_07-08-30
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -82.0
  episode_reward_mean: -305.91
  episode_reward_min: -544.0
  episodes_this_iter: 33
  episodes_total: 233
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.39641761779785156
          entropy_coeff: 0.0
          kl: 0.005048434250056744
          model: {}
          policy_loss: -0.0124360928311944
          total_loss: 133.85414123535156
          vf_explained_var: 0.37232574820518494
          vf_loss: 133.8643035888672
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 7
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,7.0,88.8571,70000.0,-305.91,-82.0,-544.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-08-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -82.0
  episode_reward_mean: -245.32
  episode_reward_min: -534.0
  episodes_this_iter: 33
  episodes_total: 266
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.36987873911857605
          entropy_coeff: 0.0
          kl: 0.005278170574456453
          model: {}
          policy_loss: -0.014762068167328835
          total_loss: 94.73658752441406
          vf_explained_var: 0.49395185708999634
          vf_loss: 94.74897766113281
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 8
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,8.0,101.634,80000.0,-245.32,-82.0,-534.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-06-25_07-08-56
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -79.0
  episode_reward_mean: -191.17
  episode_reward_min: -345.0
  episodes_this_iter: 34
  episodes_total: 300
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.3402855098247528
          entropy_coeff: 0.0
          kl: 0.005262432619929314
          model: {}
          policy_loss: -0.013767989352345467
          total_loss: 70.9498291015625
          vf_explained_var: 0.5799325704574585
          vf_loss: 70.96123504638672
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 9
  node_ip: 1

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:994,10,128,10000,9.0,114.165,90000.0,-191.17,-79.0,-345.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_07-09-08
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -53.0
  episode_reward_mean: -159.6
  episode_reward_min: -303.0
  episodes_this_iter: 33
  episodes_total: 333
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.3191593885421753
          entropy_coeff: 0.0
          kl: 0.005901410710066557
          model: {}
          policy_loss: -0.014780621975660324
          total_loss: 51.30914306640625
          vf_explained_var: 0.6594942808151245
          vf_loss: 51.32126235961914
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 10
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PENDING,,10,512,20000,,,,,,,
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=1088)[0m Instructions for updating:
[2m[36m(pid=1088)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1088)[0m 2021-06-25 07:09:13,489	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1088)[0m 2021-06-25 07:09:13,489	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=1089)[0m Instructions for updating:
[2m[36m(pid=1089)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1089)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1089)[0m are not present in its tracked objects:
[2m[36m(pid=1089)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1089)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1089)[0m an omission. This is a strong

Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-09-42
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1032.0
  episode_reward_mean: -1373.6060606060605
  episode_reward_min: -1726.0
  episodes_this_iter: 66
  episodes_total: 66
  experiment_id: 49cd2efcff114fd2b183aa8151c33e02
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.665286660194397
          entropy_coeff: 0.0
          kl: 0.028585556894540787
          model: {}
          policy_loss: -0.04261205345392227
          total_loss: 1885.9422607421875
          vf_explained_var: 0.15075308084487915
          vf_loss: 1885.9793701171875
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:1088,10,512,20000,1.0,21.2672,20000.0,-1373.61,-1032.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-10-03
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -751.0
  episode_reward_mean: -1113.44
  episode_reward_min: -1644.0
  episodes_this_iter: 67
  episodes_total: 133
  experiment_id: 49cd2efcff114fd2b183aa8151c33e02
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.6048493385314941
          entropy_coeff: 0.0
          kl: 0.02298310399055481
          model: {}
          policy_loss: -0.045292552560567856
          total_loss: 940.0050048828125
          vf_explained_var: 0.22567039728164673
          vf_loss: 940.0432739257812
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 2
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:1088,10,512,20000,2.0,41.9104,40000.0,-1113.44,-751.0,-1644.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-10-24
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -469.0
  episode_reward_mean: -815.05
  episode_reward_min: -1323.0
  episodes_this_iter: 67
  episodes_total: 200
  experiment_id: 49cd2efcff114fd2b183aa8151c33e02
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.5541078448295593
          entropy_coeff: 0.0
          kl: 0.011688734404742718
          model: {}
          policy_loss: -0.033035848289728165
          total_loss: 542.08544921875
          vf_explained_var: 0.24967074394226074
          vf_loss: 542.1133422851562
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 3
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:1088,10,512,20000,3.0,62.7773,60000.0,-815.05,-469.0,-1323.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-10-45
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -304.0
  episode_reward_mean: -594.15
  episode_reward_min: -956.0
  episodes_this_iter: 66
  episodes_total: 266
  experiment_id: 49cd2efcff114fd2b183aa8151c33e02
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.5061208009719849
          entropy_coeff: 0.0
          kl: 0.009503154084086418
          model: {}
          policy_loss: -0.026360278949141502
          total_loss: 311.44281005859375
          vf_explained_var: 0.28212249279022217
          vf_loss: 311.4648742675781
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 4
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:1088,10,512,20000,4.0,83.7881,80000.0,-594.15,-304.0,-956.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_07-11-05
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -177.0
  episode_reward_mean: -406.25
  episode_reward_min: -758.0
  episodes_this_iter: 67
  episodes_total: 333
  experiment_id: 49cd2efcff114fd2b183aa8151c33e02
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.45912986993789673
          entropy_coeff: 0.0
          kl: 0.008163008838891983
          model: {}
          policy_loss: -0.021989993751049042
          total_loss: 208.7262420654297
          vf_explained_var: 0.32460838556289673
          vf_loss: 208.74456787109375
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 5
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:1088,10,512,20000,5.0,104.546,100000.0,-406.25,-177.0,-758.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


2021-06-25 07:11:26,787	INFO pbt.py:543 -- [exploit] transferring weights from trial PPO_WasteNetEnv_8c324_00004 (score -159.6) -> PPO_WasteNetEnv_8c324_00005 (score -307.72)
2021-06-25 07:11:26,788	INFO pbt.py:558 -- [explore] perturbed config from {'lambda': 0.9, 'clip_param': 0.3, 'lr': 5e-05, 'num_sgd_iter': 10, 'sgd_minibatch_size': 128, 'train_batch_size': 10000} -> {'lambda': 0.972751454205281, 'clip_param': 0.36, 'lr': 1e-05, 'num_sgd_iter': 12, 'sgd_minibatch_size': 153, 'train_batch_size': 12000}


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_07-11-26
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -152.0
  episode_reward_mean: -307.72
  episode_reward_min: -492.0
  episodes_this_iter: 67
  episodes_total: 400
  experiment_id: 49cd2efcff114fd2b183aa8151c33e02
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.4213167428970337
          entropy_coeff: 0.0
          kl: 0.005602619145065546
          model: {}
          policy_loss: -0.017071641981601715
          total_loss: 167.0237579345703
          vf_explained_var: 0.3529297113418579
          vf_loss: 167.03831481933594
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 6
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6.0,125.33,120000.0,-307.72,-152.0,-492.0,300.0
PPO_WasteNetEnv_8c324_00006,PENDING,,30,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=1165)[0m Instructions for updating:
[2m[36m(pid=1165)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1165)[0m 2021-06-25 07:11:31,522	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1165)[0m 2021-06-25 07:11:31,522	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=1166)[0m Instructions for updating:
[2m[36m(pid=1166)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1166)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1166)[0m are not present in its tracked objects:
[2m[36m(pid=1166)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1166)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1166)[0m an omission. This is a strong

Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-12-29
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1032.0
  episode_reward_mean: -1373.6060606060605
  episode_reward_min: -1726.0
  episodes_this_iter: 66
  episodes_total: 66
  experiment_id: 1b2163214974428da8e226cb6893ea49
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6679884791374207
          entropy_coeff: 0.0
          kl: 0.025824937969446182
          model: {}
          policy_loss: -0.05307336896657944
          total_loss: 1433.4151611328125
          vf_explained_var: 0.11215400695800781
          vf_loss: 1433.462890625
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 1

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1254,30,128,20000,1.0,37.2192,20000.0,-1373.61,-1032.0,-1726.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6.0,125.33,120000.0,-307.72,-152.0,-492.0,300.0
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-13-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -700.0
  episode_reward_mean: -1060.34
  episode_reward_min: -1644.0
  episodes_this_iter: 67
  episodes_total: 133
  experiment_id: 1b2163214974428da8e226cb6893ea49
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.6020565629005432
          entropy_coeff: 0.0
          kl: 0.030081061646342278
          model: {}
          policy_loss: -0.05843007192015648
          total_loss: 662.431396484375
          vf_explained_var: 0.1851769983768463
          vf_loss: 662.4807739257812
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 2
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1254,30,128,20000,2.0,74.2243,40000.0,-1060.34,-700.0,-1644.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6.0,125.33,120000.0,-307.72,-152.0,-492.0,300.0
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-13-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -354.0
  episode_reward_mean: -693.0
  episode_reward_min: -1151.0
  episodes_this_iter: 67
  episodes_total: 200
  experiment_id: 1b2163214974428da8e226cb6893ea49
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.5362263917922974
          entropy_coeff: 0.0
          kl: 0.016204049810767174
          model: {}
          policy_loss: -0.04064571484923363
          total_loss: 274.5264892578125
          vf_explained_var: 0.3391059339046478
          vf_loss: 274.5598449707031
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 3
  node_ip: 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1254,30,128,20000,3.0,111.298,60000.0,-693.0,-354.0,-1151.0,300.0
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6.0,125.33,120000.0,-307.72,-152.0,-492.0,300.0
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


2021-06-25 07:14:20,716	INFO pbt.py:543 -- [exploit] transferring weights from trial PPO_WasteNetEnv_8c324_00002 (score -236.79) -> PPO_WasteNetEnv_8c324_00006 (score -450.23)
2021-06-25 07:14:20,719	INFO pbt.py:558 -- [explore] perturbed config from {'lambda': 0.9, 'clip_param': 0.3, 'lr': 5e-05, 'num_sgd_iter': 20, 'sgd_minibatch_size': 2048, 'train_batch_size': 10000} -> {'lambda': 1.08, 'clip_param': 0.24, 'lr': 0.0001, 'num_sgd_iter': 16, 'sgd_minibatch_size': 1638, 'train_batch_size': 8000}


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-14-20
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -130.0
  episode_reward_mean: -450.23
  episode_reward_min: -880.0
  episodes_this_iter: 66
  episodes_total: 266
  experiment_id: 1b2163214974428da8e226cb6893ea49
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.47291505336761475
          entropy_coeff: 0.0
          kl: 0.011693961918354034
          model: {}
          policy_loss: -0.03296218812465668
          total_loss: 145.34429931640625
          vf_explained_var: 0.46804770827293396
          vf_loss: 145.37200927734375
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6.0,125.33,120000.0,-307.72,-152.0,-492.0,300.0
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4.0,148.335,80000.0,-450.23,-130.0,-880.0,300.0
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,,,,,,,


[2m[36m(pid=1331)[0m Instructions for updating:
[2m[36m(pid=1331)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1331)[0m 2021-06-25 07:14:25,082	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1331)[0m 2021-06-25 07:14:25,082	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,,10,128,20000,,,,,,,
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6.0,135.451,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6.0,135.228,120000.0,-242.99,-94.0,-429.0,300.0
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13.0,129.506,130000.0,-345.19,-168.0,-645.0,300.0
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10.0,126.704,100000.0,-159.6,-53.0,-303.0,300.0
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6.0,125.33,120000.0,-307.72,-152.0,-492.0,300.0
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4.0,148.335,80000.0,-450.23,-130.0,-880.0,300.0
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,12.0,121.744,120000.0,-236.79,-95.0,-460.0,300.0


[2m[36m(pid=1330)[0m Instructions for updating:
[2m[36m(pid=1330)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1330)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1330)[0m are not present in its tracked objects:
[2m[36m(pid=1330)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1330)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1330)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1330)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=1331)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1331)[0m are not present in its tracked objects:
[2m[36m(pid=1331)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1331)[0m It is possible that this is intended behavior, but it is more likely
[2

Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-06-25_07-15-10
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1032.0
  episode_reward_mean: -1373.6060606060605
  episode_reward_min: -1726.0
  episodes_this_iter: 66
  episodes_total: 66
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6663699746131897
          entropy_coeff: 0.0
          kl: 0.027503179386258125
          model: {}
          policy_loss: -0.046776168048381805
          total_loss: 1480.823486328125
          vf_explained_var: 0.16069796681404114
          vf_loss: 1480.864990234375
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1423,10,128,20000,1,24.9429,20000,-1373.61,-1032,-1726,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-06-25_07-15-35
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -725.0
  episode_reward_mean: -1079.71
  episode_reward_min: -1644.0
  episodes_this_iter: 67
  episodes_total: 133
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.6088528037071228
          entropy_coeff: 0.0
          kl: 0.026200085878372192
          model: {}
          policy_loss: -0.05122913047671318
          total_loss: 723.8287963867188
          vf_explained_var: 0.15237842500209808
          vf_loss: 723.8721923828125
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 2
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1423,10,128,20000,2,49.6984,40000,-1079.71,-725,-1644,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-06-25_07-16-00
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -424.0
  episode_reward_mean: -741.04
  episode_reward_min: -1225.0
  episodes_this_iter: 67
  episodes_total: 200
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.551002025604248
          entropy_coeff: 0.0
          kl: 0.013578479178249836
          model: {}
          policy_loss: -0.0327347069978714
          total_loss: 397.4873046875
          vf_explained_var: 0.16420698165893555
          vf_loss: 397.513916015625
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 3
  node_ip: 172.

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1423,10,128,20000,3,74.71,60000,-741.04,-424,-1225,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-06-25_07-16-25
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -226.0
  episode_reward_mean: -511.14
  episode_reward_min: -986.0
  episodes_this_iter: 66
  episodes_total: 266
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.4982525706291199
          entropy_coeff: 0.0
          kl: 0.010778131894767284
          model: {}
          policy_loss: -0.025807080790400505
          total_loss: 262.82080078125
          vf_explained_var: 0.18554630875587463
          vf_loss: 262.84173583984375
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 4
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1423,10,128,20000,4,99.837,80000,-511.14,-226,-986,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-06-25_07-16-51
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -164.0
  episode_reward_mean: -344.32
  episode_reward_min: -651.0
  episodes_this_iter: 67
  episodes_total: 333
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 0.44437769055366516
          entropy_coeff: 0.0
          kl: 0.010217576287686825
          model: {}
          policy_loss: -0.021472444757819176
          total_loss: 185.7744140625
          vf_explained_var: 0.29356545209884644
          vf_loss: 185.7913055419922
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 5
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300


[2m[36m(pid=1512)[0m Instructions for updating:
[2m[36m(pid=1512)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1512)[0m 2021-06-25 07:16:55,825	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1512)[0m 2021-06-25 07:16:55,825	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


[2m[36m(pid=1513)[0m Instructions for updating:
[2m[36m(pid=1513)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1513)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1513)[0m are not present in its tracked objects:
[2m[36m(pid=1513)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1513)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1513)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1513)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=1512)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1512)[0m are not present in its tracked objects:
[2m[36m(pid=1512)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1512)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,,20,2048,10000,12,121.744,120000,-236.79,-95,-460,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


[2m[36m(pid=1512)[0m 2021-06-25 07:17:04,017	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00002_2_num_sgd_iter=20,sgd_minibatch_size=2048,train_batch_size=10000_2021-06-25_06-59-31/tmpe5kkahysrestore_from_object/checkpoint-12
[2m[36m(pid=1512)[0m 2021-06-25 07:17:04,017	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 12, '_timesteps_total': None, '_time_total': 121.74419379234314, '_episodes_total': 400}


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-06-25_07-17-14
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -84.0
  episode_reward_mean: -190.96969696969697
  episode_reward_min: -284.0
  episodes_this_iter: 33
  episodes_total: 433
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.22841772437095642
          entropy_coeff: 0.0
          kl: 0.0020630108192563057
          model: {}
          policy_loss: -0.005116707645356655
          total_loss: 83.31045532226562
          vf_explained_var: 0.5816450715065002
          vf_loss: 83.3151626586914
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_rest

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,13,131.961,130000,-190.97,-84,-284,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-06-25_07-17-24
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -84.0
  episode_reward_mean: -182.0
  episode_reward_min: -284.0
  episodes_this_iter: 33
  episodes_total: 466
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.21455983817577362
          entropy_coeff: 0.0
          kl: 0.00410229479894042
          model: {}
          policy_loss: -0.009061901830136776
          total_loss: 71.266357421875
          vf_explained_var: 0.6140012741088867
          vf_loss: 71.2750015258789
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 2
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,14,142.136,140000,-182.0,-84,-284,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-06-25_07-17-34
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -63.0
  episode_reward_mean: -170.85
  episode_reward_min: -284.0
  episodes_this_iter: 34
  episodes_total: 500
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.2031804472208023
          entropy_coeff: 0.0
          kl: 0.0032109154853969812
          model: {}
          policy_loss: -0.004656615667045116
          total_loss: 60.6142578125
          vf_explained_var: 0.6377132534980774
          vf_loss: 60.618751525878906
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 3
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,15,152.223,150000,-170.85,-63,-284,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-17-44
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -63.0
  episode_reward_mean: -153.08
  episode_reward_min: -278.0
  episodes_this_iter: 33
  episodes_total: 533
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.19741661846637726
          entropy_coeff: 0.0
          kl: 0.0021626686211675406
          model: {}
          policy_loss: -0.004663567058742046
          total_loss: 52.2779655456543
          vf_explained_var: 0.6712217926979065
          vf_loss: 52.28257751464844
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 4
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,16,162.173,160000,-153.08,-63,-278,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-06-25_07-17-54
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -58.0
  episode_reward_mean: -141.71
  episode_reward_min: -259.0
  episodes_this_iter: 33
  episodes_total: 566
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.18856289982795715
          entropy_coeff: 0.0
          kl: 0.002975242445245385
          model: {}
          policy_loss: -0.006895768456161022
          total_loss: 54.15354919433594
          vf_explained_var: 0.6618754863739014
          vf_loss: 54.16040802001953
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 5
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,17,172.127,170000,-141.71,-58,-259,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-06-25_07-18-04
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -49.0
  episode_reward_mean: -134.46
  episode_reward_min: -258.0
  episodes_this_iter: 34
  episodes_total: 600
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.17559663951396942
          entropy_coeff: 0.0
          kl: 0.0024773222394287586
          model: {}
          policy_loss: -0.008409330621361732
          total_loss: 50.07133483886719
          vf_explained_var: 0.6779707074165344
          vf_loss: 50.07973098754883
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 6
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,18,182.127,180000,-134.46,-49,-258,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-06-25_07-18-14
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -49.0
  episode_reward_mean: -126.43
  episode_reward_min: -258.0
  episodes_this_iter: 33
  episodes_total: 633
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.16974413394927979
          entropy_coeff: 0.0
          kl: 0.0018804057035595179
          model: {}
          policy_loss: -0.013743339106440544
          total_loss: 41.23089599609375
          vf_explained_var: 0.7143781185150146
          vf_loss: 41.24463653564453
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 7
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,19,192.127,190000,-126.43,-49,-258,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-06-25_07-18-24
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -49.0
  episode_reward_mean: -115.65
  episode_reward_min: -256.0
  episodes_this_iter: 33
  episodes_total: 666
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 4.999999873689376e-05
          entropy: 0.16137608885765076
          entropy_coeff: 0.0
          kl: 0.002574753947556019
          model: {}
          policy_loss: -0.0077271610498428345
          total_loss: 37.5389404296875
          vf_explained_var: 0.7342050075531006
          vf_loss: 37.54666519165039
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 8
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,20,202.324,200000,-115.65,-49,-256,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-06-25_07-18-34
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -39.0
  episode_reward_mean: -100.52
  episode_reward_min: -212.0
  episodes_this_iter: 34
  episodes_total: 700
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 4.999999873689376e-05
          entropy: 0.1508558690547943
          entropy_coeff: 0.0
          kl: 0.002821437083184719
          model: {}
          policy_loss: -0.006460774689912796
          total_loss: 30.92892074584961
          vf_explained_var: 0.7613314390182495
          vf_loss: 30.93537712097168
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 9
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,21,212.397,210000,-100.52,-39,-212,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-06-25_07-18-45
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -39.0
  episode_reward_mean: -95.7
  episode_reward_min: -201.0
  episodes_this_iter: 33
  episodes_total: 733
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906250058207661
          cur_lr: 4.999999873689376e-05
          entropy: 0.14833581447601318
          entropy_coeff: 0.0
          kl: 0.0017231854144483805
          model: {}
          policy_loss: -0.0018881261348724365
          total_loss: 36.830745697021484
          vf_explained_var: 0.7323019504547119
          vf_loss: 36.83263397216797
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 10
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,22,222.609,220000,-95.7,-39,-201,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-06-25_07-18-55
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -39.0
  episode_reward_mean: -88.58
  episode_reward_min: -173.0
  episodes_this_iter: 33
  episodes_total: 766
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 4.999999873689376e-05
          entropy: 0.14538441598415375
          entropy_coeff: 0.0
          kl: 0.0012867687037214637
          model: {}
          policy_loss: -0.003390165511518717
          total_loss: 30.674976348876953
          vf_explained_var: 0.7654518485069275
          vf_loss: 30.67836570739746
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 11


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:1512,20,2048,10000,23,232.633,230000,-88.58,-39,-173,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-06-25_07-19-05
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -6.0
  episode_reward_mean: -87.62
  episode_reward_min: -173.0
  episodes_this_iter: 34
  episodes_total: 800
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.14148232340812683
          entropy_coeff: 0.0
          kl: 0.0016443373169749975
          model: {}
          policy_loss: -0.008004935458302498
          total_loss: 28.370498657226562
          vf_explained_var: 0.7734156847000122
          vf_loss: 28.378501892089844
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 12
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300


[2m[36m(pid=1631)[0m Instructions for updating:
[2m[36m(pid=1631)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1631)[0m 2021-06-25 07:19:10,265	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1631)[0m 2021-06-25 07:19:10,265	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


[2m[36m(pid=1632)[0m Instructions for updating:
[2m[36m(pid=1632)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1632)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1632)[0m are not present in its tracked objects:
[2m[36m(pid=1632)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1632)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1632)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1632)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=1631)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1631)[0m are not present in its tracked objects:
[2m[36m(pid=1631)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1631)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,,24,14043,28086,13,129.506,130000,-345.19,-168,-645,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


[2m[36m(pid=1631)[0m 2021-06-25 07:19:18,297	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00003_3_num_sgd_iter=10,sgd_minibatch_size=2048,train_batch_size=10000_2021-06-25_07-01-59/tmp4v4g8rhjrestore_from_object/checkpoint-12
[2m[36m(pid=1631)[0m 2021-06-25 07:19:18,299	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 12, '_timesteps_total': None, '_time_total': 121.74419379234314, '_episodes_total': 400}


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 148086
  custom_metrics: {}
  date: 2021-06-25_07-19-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -84.0
  episode_reward_mean: -193.55913978494624
  episode_reward_min: -359.0
  episodes_this_iter: 93
  episodes_total: 493
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.22488367557525635
          entropy_coeff: 0.0
          kl: 0.0069376155734062195
          model: {}
          policy_loss: -0.009609299711883068
          total_loss: 242.08221435546875
          vf_explained_var: 0.33607518672943115
          vf_loss: 242.09042358398438
    num_agent_steps_sampled: 148086
    num_agent_steps_trained: 148086
    num_steps_sampled: 148086
    num_steps_trained: 148086
  iterations_since_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:1631,24,14043,28086,13,149.717,148086,-193.559,-84,-359,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 176172
  custom_metrics: {}
  date: 2021-06-25_07-20-13
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -73.0
  episode_reward_mean: -171.01
  episode_reward_min: -302.0
  episodes_this_iter: 94
  episodes_total: 587
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.20951500535011292
          entropy_coeff: 0.0
          kl: 0.0016541721997782588
          model: {}
          policy_loss: -0.005497604608535767
          total_loss: 180.60693359375
          vf_explained_var: 0.3760477602481842
          vf_loss: 180.61209106445312
    num_agent_steps_sampled: 176172
    num_agent_steps_trained: 176172
    num_steps_sampled: 176172
    num_steps_trained: 176172
  iterations_since_restore: 2
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:1631,24,14043,28086,14,176.902,176172,-171.01,-73,-302,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 204258
  custom_metrics: {}
  date: 2021-06-25_07-20-41
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -67.0
  episode_reward_mean: -151.04
  episode_reward_min: -283.0
  episodes_this_iter: 93
  episodes_total: 680
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.1931506097316742
          entropy_coeff: 0.0
          kl: 0.004730207845568657
          model: {}
          policy_loss: -0.006278883665800095
          total_loss: 143.814697265625
          vf_explained_var: 0.41053760051727295
          vf_loss: 143.82049560546875
    num_agent_steps_sampled: 204258
    num_agent_steps_trained: 204258
    num_steps_sampled: 204258
    num_steps_trained: 204258
  iterations_since_restore: 3
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:1631,24,14043,28086,15,204.668,204258,-151.04,-67,-283,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 232344
  custom_metrics: {}
  date: 2021-06-25_07-21-08
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -60.0
  episode_reward_mean: -137.38
  episode_reward_min: -263.0
  episodes_this_iter: 94
  episodes_total: 774
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.18027007579803467
          entropy_coeff: 0.0
          kl: 0.0035629530902951956
          model: {}
          policy_loss: -0.005978889763355255
          total_loss: 133.95123291015625
          vf_explained_var: 0.41725942492485046
          vf_loss: 133.95704650878906
    num_agent_steps_sampled: 232344
    num_agent_steps_trained: 232344
    num_steps_sampled: 232344
    num_steps_trained: 232344
  iterations_since_restore: 4
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:1631,24,14043,28086,16,232.101,232344,-137.38,-60,-263,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 260430
  custom_metrics: {}
  date: 2021-06-25_07-21-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -61.0
  episode_reward_mean: -125.53
  episode_reward_min: -243.0
  episodes_this_iter: 94
  episodes_total: 868
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-05
          entropy: 0.1680763214826584
          entropy_coeff: 0.0
          kl: 0.00541779724881053
          model: {}
          policy_loss: -0.005131586920469999
          total_loss: 112.92448425292969
          vf_explained_var: 0.4568294882774353
          vf_loss: 112.92948913574219
    num_agent_steps_sampled: 260430
    num_agent_steps_trained: 260430
    num_steps_sampled: 260430
    num_steps_trained: 260430
  iterations_since_restore: 5
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300


[2m[36m(pid=1762)[0m Instructions for updating:
[2m[36m(pid=1762)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1762)[0m 2021-06-25 07:21:41,210	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1762)[0m 2021-06-25 07:21:41,210	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


[2m[36m(pid=1789)[0m Instructions for updating:
[2m[36m(pid=1789)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1789)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1789)[0m are not present in its tracked objects:
[2m[36m(pid=1789)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1789)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1789)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1789)[0m formulated as a subclassed Layer rather than a Lambda layer.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,,16,1638,8000,4,148.335,80000,-450.23,-130,-880,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


[2m[36m(pid=1762)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1762)[0m are not present in its tracked objects:
[2m[36m(pid=1762)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1762)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1762)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1762)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=1762)[0m 2021-06-25 07:21:49,724	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00006_6_num_sgd_iter=30,sgd_minibatch_size=128,train_batch_size=20000_2021-06-25_07-09-08/tmpursncko_restore_from_object/checkpoint-12
[2m[36m(pid=1762)[0m 2021-06-25 07:21:49,725	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 12, '_timesteps_total': None, '_time_total': 121.7441

Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-06-25_07-21-57
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -84.0
  episode_reward_mean: -180.84615384615384
  episode_reward_min: -281.0
  episodes_this_iter: 26
  episodes_total: 426
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.2394810914993286
          entropy_coeff: 0.0
          kl: 0.0022196995560079813
          model: {}
          policy_loss: -0.0027264789678156376
          total_loss: 1061153669120.0
          vf_explained_var: 8.344650268554688e-07
          vf_loss: 1061153669120.0
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_rest

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,13,130.012,128000,-180.846,-84,-281,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-06-25_07-22-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -84.0
  episode_reward_mean: -190.03773584905662
  episode_reward_min: -281.0
  episodes_this_iter: 27
  episodes_total: 453
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.22679118812084198
          entropy_coeff: 0.0
          kl: 0.003398965112864971
          model: {}
          policy_loss: -0.013376733288168907
          total_loss: 1424123101184.0
          vf_explained_var: -7.450580596923828e-08
          vf_loss: 1424123101184.0
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_rest

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,14,138.117,136000,-190.038,-84,-281,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-06-25_07-22-14
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -84.0
  episode_reward_mean: -185.9625
  episode_reward_min: -284.0
  episodes_this_iter: 27
  episodes_total: 480
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.22994427382946014
          entropy_coeff: 0.0
          kl: 0.0032333757262676954
          model: {}
          policy_loss: -0.0013539292849600315
          total_loss: 898732916736.0
          vf_explained_var: 4.3213367462158203e-07
          vf_loss: 898732916736.0
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 3
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,15,146.192,144000,-185.963,-84,-284,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-06-25_07-22-22
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -101.0
  episode_reward_mean: -186.49
  episode_reward_min: -327.0
  episodes_this_iter: 26
  episodes_total: 506
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-05
          entropy: 0.22416025400161743
          entropy_coeff: 0.0
          kl: 0.004791107960045338
          model: {}
          policy_loss: 0.0017299670726060867
          total_loss: 818657296384.0
          vf_explained_var: 6.854534149169922e-07
          vf_loss: 818657296384.0
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,16,154.279,152000,-186.49,-101,-327,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-22-30
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -101.0
  episode_reward_mean: -191.74
  episode_reward_min: -327.0
  episodes_this_iter: 27
  episodes_total: 533
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 9.999999747378752e-05
          entropy: 0.22534765303134918
          entropy_coeff: 0.0
          kl: 0.004903239198029041
          model: {}
          policy_loss: 0.00679819704964757
          total_loss: 700365537280.0
          vf_explained_var: -4.470348358154297e-07
          vf_loss: 700365537280.0
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 5
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,17,162.269,160000,-191.74,-101,-327,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-06-25_07-22-38
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -101.0
  episode_reward_mean: -197.85
  episode_reward_min: -327.0
  episodes_this_iter: 27
  episodes_total: 560
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 9.999999747378752e-05
          entropy: 0.24075214564800262
          entropy_coeff: 0.0
          kl: 0.004139234311878681
          model: {}
          policy_loss: -0.008000276982784271
          total_loss: 1703120207872.0
          vf_explained_var: 9.238719940185547e-07
          vf_loss: 1703120207872.0
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 6
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,18,170.298,168000,-197.85,-101,-327,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-06-25_07-22-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -101.0
  episode_reward_mean: -212.91
  episode_reward_min: -384.0
  episodes_this_iter: 26
  episodes_total: 586
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 9.999999747378752e-05
          entropy: 0.24016031622886658
          entropy_coeff: 0.0
          kl: 0.00395078444853425
          model: {}
          policy_loss: 0.0017269635573029518
          total_loss: 1836946423808.0
          vf_explained_var: 1.296401023864746e-06
          vf_loss: 1836946423808.0
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 7
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,19,178.395,176000,-212.91,-101,-384,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-06-25_07-22-54
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -101.0
  episode_reward_mean: -224.38
  episode_reward_min: -384.0
  episodes_this_iter: 27
  episodes_total: 613
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 9.999999747378752e-05
          entropy: 0.24324819445610046
          entropy_coeff: 0.0
          kl: 0.002292770193889737
          model: {}
          policy_loss: -0.014943230897188187
          total_loss: 1536985661440.0
          vf_explained_var: 2.4437904357910156e-06
          vf_loss: 1536985661440.0
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 8
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,20,186.505,184000,-224.38,-101,-384,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-06-25_07-23-02
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -236.17
  episode_reward_min: -488.0
  episodes_this_iter: 27
  episodes_total: 640
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 9.999999747378752e-05
          entropy: 0.26489052176475525
          entropy_coeff: 0.0
          kl: 0.00581652345135808
          model: {}
          policy_loss: -0.0065928734838962555
          total_loss: 2687568969728.0
          vf_explained_var: 1.0132789611816406e-06
          vf_loss: 2687568969728.0
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 9
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,21,194.546,192000,-236.17,-96,-488,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-06-25_07-23-10
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -243.05
  episode_reward_min: -488.0
  episodes_this_iter: 26
  episodes_total: 666
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 9.999999747378752e-05
          entropy: 0.24736058712005615
          entropy_coeff: 0.0
          kl: 0.00565243698656559
          model: {}
          policy_loss: -0.008309797383844852
          total_loss: 2174823170048.0
          vf_explained_var: 1.6391277313232422e-06
          vf_loss: 2174823170048.0
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 10
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,22,202.667,200000,-243.05,-96,-488,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-06-25_07-23-18
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -245.75
  episode_reward_min: -488.0
  episodes_this_iter: 27
  episodes_total: 693
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 9.999999747378752e-05
          entropy: 0.24257655441761017
          entropy_coeff: 0.0
          kl: 0.003274376969784498
          model: {}
          policy_loss: -0.0031044003553688526
          total_loss: 4021945368576.0
          vf_explained_var: 7.450580596923828e-07
          vf_loss: 4021945368576.0
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 11
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,23,210.705,208000,-245.75,-96,-488,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-06-25_07-23-27
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -243.42
  episode_reward_min: -424.0
  episodes_this_iter: 27
  episodes_total: 720
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906250058207661
          cur_lr: 9.999999747378752e-05
          entropy: 0.24460580945014954
          entropy_coeff: 0.0
          kl: 0.0044989679008722305
          model: {}
          policy_loss: -0.00012564333155751228
          total_loss: 4153617678336.0
          vf_explained_var: 1.3709068298339844e-06
          vf_loss: 4153617678336.0
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 12

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,24,218.838,216000,-243.42,-96,-424,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-06-25_07-23-35
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -241.56
  episode_reward_min: -424.0
  episodes_this_iter: 26
  episodes_total: 746
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 9.999999747378752e-05
          entropy: 0.24026519060134888
          entropy_coeff: 0.0
          kl: 0.006170427892357111
          model: {}
          policy_loss: -0.004706155974417925
          total_loss: 4850148442112.0
          vf_explained_var: 1.9669532775878906e-06
          vf_loss: 4850148442112.0
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 13
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,25,226.905,224000,-241.56,-96,-424,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-06-25_07-23-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -265.3
  episode_reward_min: -523.0
  episodes_this_iter: 27
  episodes_total: 773
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 9.999999747378752e-05
          entropy: 0.24845707416534424
          entropy_coeff: 0.0
          kl: 0.003992302343249321
          model: {}
          policy_loss: -0.0031059742905199528
          total_loss: 7282701107200.0
          vf_explained_var: 1.1771917343139648e-06
          vf_loss: 7282701107200.0
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 14
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:1762,16,1638,8000,26,234.953,232000,-265.3,-96,-523,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-06-25_07-23-51
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -96.0
  episode_reward_mean: -281.3
  episode_reward_min: -523.0
  episodes_this_iter: 27
  episodes_total: 800
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 9.999999747378752e-05
          entropy: 0.23891928791999817
          entropy_coeff: 0.0
          kl: 0.003462265944108367
          model: {}
          policy_loss: -0.01304149255156517
          total_loss: 5398075015168.0
          vf_explained_var: -7.450580596923828e-08
          vf_loss: 5398075015168.0
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 15
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300


[2m[36m(pid=1855)[0m Instructions for updating:
[2m[36m(pid=1855)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1855)[0m 2021-06-25 07:23:55,827	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1855)[0m 2021-06-25 07:23:55,827	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


[2m[36m(pid=1857)[0m Instructions for updating:
[2m[36m(pid=1857)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1857)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1857)[0m are not present in its tracked objects:
[2m[36m(pid=1857)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1857)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1857)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1857)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=1855)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1855)[0m are not present in its tracked objects:
[2m[36m(pid=1855)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1855)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,,10,128,20000,5,125.017,100000,-344.32,-164,-651,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


[2m[36m(pid=1855)[0m 2021-06-25 07:24:03,814	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00007_7_num_sgd_iter=10,sgd_minibatch_size=128,train_batch_size=20000_2021-06-25_07-11-27/tmpdu15xh9arestore_from_object/checkpoint-5
[2m[36m(pid=1855)[0m 2021-06-25 07:24:03,814	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 5, '_timesteps_total': None, '_time_total': 125.01652479171753, '_episodes_total': 333}


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_07-24-29
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -113.0
  episode_reward_mean: -216.28787878787878
  episode_reward_min: -375.0
  episodes_this_iter: 66
  episodes_total: 399
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.39242398738861084
          entropy_coeff: 0.0
          kl: 0.010910212993621826
          model: {}
          policy_loss: -0.017498651519417763
          total_loss: 117.23509216308594
          vf_explained_var: 0.45312491059303284
          vf_loss: 117.25041198730469
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1855,10,128,20000,6,151.013,120000,-216.288,-113,-375,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-06-25_07-24-55
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -61.0
  episode_reward_mean: -171.28
  episode_reward_min: -375.0
  episodes_this_iter: 67
  episodes_total: 466
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3564613163471222
          entropy_coeff: 0.0
          kl: 0.008178019896149635
          model: {}
          policy_loss: -0.014999444596469402
          total_loss: 71.95939636230469
          vf_explained_var: 0.5811004042625427
          vf_loss: 71.97277069091797
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 2
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1855,10,128,20000,7,176.632,140000,-171.28,-61,-375,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-25-21
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -48.0
  episode_reward_mean: -133.41
  episode_reward_min: -231.0
  episodes_this_iter: 67
  episodes_total: 533
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3197552561759949
          entropy_coeff: 0.0
          kl: 0.007844640873372555
          model: {}
          policy_loss: -0.016054557636380196
          total_loss: 53.830116271972656
          vf_explained_var: 0.644597053527832
          vf_loss: 53.844608306884766
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 3
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1855,10,128,20000,8,202.271,160000,-133.41,-48,-231,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-06-25_07-25-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -17.0
  episode_reward_mean: -98.02
  episode_reward_min: -231.0
  episodes_this_iter: 66
  episodes_total: 599
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.28968262672424316
          entropy_coeff: 0.0
          kl: 0.005811099428683519
          model: {}
          policy_loss: -0.012223848141729832
          total_loss: 40.29069137573242
          vf_explained_var: 0.7042778134346008
          vf_loss: 40.3017463684082
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:1855,10,128,20000,9,227.863,180000,-98.02,-17,-231,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-06-25_07-26-11
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -81.81
  episode_reward_min: -176.0
  episodes_this_iter: 67
  episodes_total: 666
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.2687990963459015
          entropy_coeff: 0.0
          kl: 0.005007986910641193
          model: {}
          policy_loss: -0.011725394055247307
          total_loss: 42.30851364135742
          vf_explained_var: 0.7000027894973755
          vf_loss: 42.319236755371094
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 5
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300


[2m[36m(pid=1975)[0m Instructions for updating:
[2m[36m(pid=1975)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1975)[0m 2021-06-25 07:26:16,845	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=1975)[0m 2021-06-25 07:26:16,845	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,,10,128,10000,10,126.704,100000,-159.6,-53,-303,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


[2m[36m(pid=1976)[0m Instructions for updating:
[2m[36m(pid=1976)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=1976)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1976)[0m are not present in its tracked objects:
[2m[36m(pid=1976)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1976)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=1976)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=1976)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=1975)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=1975)[0m are not present in its tracked objects:
[2m[36m(pid=1975)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=1975)[0m It is possible that this is intended behavior, but it is more likely
[2

Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-06-25_07-26-37
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -32.0
  episode_reward_mean: -103.81818181818181
  episode_reward_min: -152.0
  episodes_this_iter: 33
  episodes_total: 366
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.29841941595077515
          entropy_coeff: 0.0
          kl: 0.0053154644556343555
          model: {}
          policy_loss: -0.013919632881879807
          total_loss: 40.85951614379883
          vf_explained_var: 0.7052722573280334
          vf_loss: 40.87236785888672
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_res

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,11,139.779,110000,-103.818,-32,-152,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-06-25_07-26-50
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -32.0
  episode_reward_mean: -105.89393939393939
  episode_reward_min: -184.0
  episodes_this_iter: 33
  episodes_total: 399
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.2789742052555084
          entropy_coeff: 0.0
          kl: 0.004217824898660183
          model: {}
          policy_loss: -0.012584006413817406
          total_loss: 41.16709518432617
          vf_explained_var: 0.7017930746078491
          vf_loss: 41.1788330078125
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restor

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,12,152.77,120000,-105.894,-32,-184,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-06-25_07-27-04
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -17.0
  episode_reward_mean: -97.86
  episode_reward_min: -184.0
  episodes_this_iter: 34
  episodes_total: 433
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.27740421891212463
          entropy_coeff: 0.0
          kl: 0.0033147954382002354
          model: {}
          policy_loss: -0.010174863040447235
          total_loss: 32.06338119506836
          vf_explained_var: 0.75136798620224
          vf_loss: 32.07322311401367
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 3
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,13,166.014,130000,-97.86,-17,-184,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-06-25_07-27-17
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -10.0
  episode_reward_mean: -89.04
  episode_reward_min: -184.0
  episodes_this_iter: 33
  episodes_total: 466
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.25949448347091675
          entropy_coeff: 0.0
          kl: 0.004838019143790007
          model: {}
          policy_loss: -0.01126557681709528
          total_loss: 35.45766830444336
          vf_explained_var: 0.7342808842658997
          vf_loss: 35.46869659423828
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,14,178.827,140000,-89.04,-10,-184,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-06-25_07-27-29
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -10.0
  episode_reward_mean: -79.38
  episode_reward_min: -154.0
  episodes_this_iter: 33
  episodes_total: 499
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.25349706411361694
          entropy_coeff: 0.0
          kl: 0.004782970063388348
          model: {}
          policy_loss: -0.011881797574460506
          total_loss: 34.85804748535156
          vf_explained_var: 0.7360741496086121
          vf_loss: 34.86981201171875
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 5
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,15,191.618,150000,-79.38,-10,-154,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-27-42
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -10.0
  episode_reward_mean: -76.69
  episode_reward_min: -163.0
  episodes_this_iter: 34
  episodes_total: 533
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.23657169938087463
          entropy_coeff: 0.0
          kl: 0.0052495114505290985
          model: {}
          policy_loss: -0.010619543492794037
          total_loss: 34.38517379760742
          vf_explained_var: 0.7404647469520569
          vf_loss: 34.39572525024414
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 6
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,16,204.59,160000,-76.69,-10,-163,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-06-25_07-27-55
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -11.0
  episode_reward_mean: -69.46
  episode_reward_min: -163.0
  episodes_this_iter: 33
  episodes_total: 566
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.22934894263744354
          entropy_coeff: 0.0
          kl: 0.00505759147927165
          model: {}
          policy_loss: -0.009483314119279385
          total_loss: 28.33096694946289
          vf_explained_var: 0.7782337665557861
          vf_loss: 28.34038543701172
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 7
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,17,217.584,170000,-69.46,-11,-163,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-06-25_07-28-08
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -1.0
  episode_reward_mean: -61.71
  episode_reward_min: -163.0
  episodes_this_iter: 33
  episodes_total: 599
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.21950794756412506
          entropy_coeff: 0.0
          kl: 0.0035772453993558884
          model: {}
          policy_loss: -0.006957650184631348
          total_loss: 31.0656795501709
          vf_explained_var: 0.7575615048408508
          vf_loss: 31.07259750366211
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 8
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,18,230.415,180000,-61.71,-1,-163,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-06-25_07-28-21
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 18.0
  episode_reward_mean: -48.65
  episode_reward_min: -138.0
  episodes_this_iter: 34
  episodes_total: 633
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.20799225568771362
          entropy_coeff: 0.0
          kl: 0.006544445641338825
          model: {}
          policy_loss: -0.008671785704791546
          total_loss: 25.387096405029297
          vf_explained_var: 0.7950728535652161
          vf_loss: 25.39573097229004
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 9
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:1975,10,128,10000,19,243.48,190000,-48.65,18,-138,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-06-25_07-28-34
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 18.0
  episode_reward_mean: -44.21
  episode_reward_min: -137.0
  episodes_this_iter: 33
  episodes_total: 666
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.20133914053440094
          entropy_coeff: 0.0
          kl: 0.004135518334805965
          model: {}
          policy_loss: -0.010117597877979279
          total_loss: 29.62833595275879
          vf_explained_var: 0.767234206199646
          vf_loss: 29.638427734375
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 10
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300


[2m[36m(pid=2079)[0m Instructions for updating:
[2m[36m(pid=2079)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2079)[0m 2021-06-25 07:28:39,493	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2079)[0m 2021-06-25 07:28:39,493	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,,12,153,12000,6,125.33,120000,-307.72,-152,-492,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


[2m[36m(pid=2080)[0m Instructions for updating:
[2m[36m(pid=2080)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2080)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2080)[0m are not present in its tracked objects:
[2m[36m(pid=2080)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2080)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2080)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2080)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2079)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2079)[0m are not present in its tracked objects:
[2m[36m(pid=2079)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2079)[0m It is possible that this is intended behavior, but it is more likely
[2

Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-06-25_07-29-03
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -32.0
  episode_reward_mean: -107.9
  episode_reward_min: -166.0
  episodes_this_iter: 40
  episodes_total: 373
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-06
          entropy: 0.3163384795188904
          entropy_coeff: 0.0
          kl: 0.003866669489070773
          model: {}
          policy_loss: -0.006007173098623753
          total_loss: 107.63541412353516
          vf_explained_var: 0.46383729577064514
          vf_loss: 107.64066314697266
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 1
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,11,142.328,112000,-107.9,-32,-166,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-06-25_07-29-18
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -32.0
  episode_reward_mean: -108.1875
  episode_reward_min: -208.0
  episodes_this_iter: 40
  episodes_total: 413
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-06
          entropy: 0.3145381212234497
          entropy_coeff: 0.0
          kl: 0.003838192205876112
          model: {}
          policy_loss: -0.004922294057905674
          total_loss: 110.84732055664062
          vf_explained_var: 0.46151068806648254
          vf_loss: 110.85185241699219
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 2
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,12,157.568,124000,-108.188,-32,-208,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-06-25_07-29-34
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -32.0
  episode_reward_mean: -105.05
  episode_reward_min: -208.0
  episodes_this_iter: 40
  episodes_total: 453
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-06
          entropy: 0.30442139506340027
          entropy_coeff: 0.0
          kl: 0.0046865143813192844
          model: {}
          policy_loss: -0.004919514525681734
          total_loss: 99.73453521728516
          vf_explained_var: 0.4831465184688568
          vf_loss: 99.73920440673828
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 3
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,13,173.283,136000,-105.05,-32,-208,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-06-25_07-29-49
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -31.0
  episode_reward_mean: -101.72
  episode_reward_min: -208.0
  episodes_this_iter: 40
  episodes_total: 493
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-06
          entropy: 0.2817140221595764
          entropy_coeff: 0.0
          kl: 0.005714858416467905
          model: {}
          policy_loss: -0.0066103506833314896
          total_loss: 100.61406707763672
          vf_explained_var: 0.48000413179397583
          vf_loss: 100.62054443359375
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 4
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,14,188.686,148000,-101.72,-31,-208,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-30-04
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -31.0
  episode_reward_mean: -103.05
  episode_reward_min: -213.0
  episodes_this_iter: 40
  episodes_total: 533
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-06
          entropy: 0.2771603763103485
          entropy_coeff: 0.0
          kl: 0.0045773861929774284
          model: {}
          policy_loss: -0.0049094450660049915
          total_loss: 111.94918060302734
          vf_explained_var: 0.45275169610977173
          vf_loss: 111.9539794921875
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 5
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,15,203.988,160000,-103.05,-31,-213,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-06-25_07-30-20
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -31.0
  episode_reward_mean: -101.35
  episode_reward_min: -213.0
  episodes_this_iter: 40
  episodes_total: 573
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 9.999999747378752e-06
          entropy: 0.2725399434566498
          entropy_coeff: 0.0
          kl: 0.004334769211709499
          model: {}
          policy_loss: -0.004290630109608173
          total_loss: 90.79936218261719
          vf_explained_var: 0.5057303309440613
          vf_loss: 90.8035888671875
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 6
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,16,219.361,172000,-101.35,-31,-213,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-06-25_07-30-35
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -26.0
  episode_reward_mean: -94.72
  episode_reward_min: -193.0
  episodes_this_iter: 40
  episodes_total: 613
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 9.999999747378752e-06
          entropy: 0.26325488090515137
          entropy_coeff: 0.0
          kl: 0.004125627223402262
          model: {}
          policy_loss: -0.004671833012253046
          total_loss: 95.46041107177734
          vf_explained_var: 0.49332013726234436
          vf_loss: 95.46507263183594
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 7
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2079,12,153,12000,17,234.783,184000,-94.72,-26,-193,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-06-25_07-30-51
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -26.0
  episode_reward_mean: -89.49
  episode_reward_min: -193.0
  episodes_this_iter: 40
  episodes_total: 653
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 9.999999747378752e-06
          entropy: 0.266845703125
          entropy_coeff: 0.0
          kl: 0.004130963236093521
          model: {}
          policy_loss: -0.004160761833190918
          total_loss: 101.9156494140625
          vf_explained_var: 0.47834229469299316
          vf_loss: 101.91979217529297
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 8
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300


[2m[36m(pid=2184)[0m Instructions for updating:
[2m[36m(pid=2184)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2184)[0m 2021-06-25 07:30:55,878	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2184)[0m 2021-06-25 07:30:55,878	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


[2m[36m(pid=2185)[0m Instructions for updating:
[2m[36m(pid=2185)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2185)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2185)[0m are not present in its tracked objects:
[2m[36m(pid=2185)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2185)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2185)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2185)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2184)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2184)[0m are not present in its tracked objects:
[2m[36m(pid=2184)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2184)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,,20,512,20000,6,135.228,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


[2m[36m(pid=2184)[0m 2021-06-25 07:31:03,963	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00001_1_num_sgd_iter=20,sgd_minibatch_size=512,train_batch_size=20000_2021-06-25_06-57-04/tmpslr726n5restore_from_object/checkpoint-6
[2m[36m(pid=2184)[0m 2021-06-25 07:31:03,963	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 6, '_timesteps_total': None, '_time_total': 135.227956533432, '_episodes_total': 400}


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-06-25_07-31-26
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -80.0
  episode_reward_mean: -165.43939393939394
  episode_reward_min: -308.0
  episodes_this_iter: 66
  episodes_total: 466
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.35633939504623413
          entropy_coeff: 0.0
          kl: 0.009675310924649239
          model: {}
          policy_loss: -0.0155715923756361
          total_loss: 110.21192932128906
          vf_explained_var: 0.4331495463848114
          vf_loss: 110.2255630493164
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_resto

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:2184,20,512,20000,7,158.239,140000,-165.439,-80,-308,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-31-50
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -31.0
  episode_reward_mean: -136.03
  episode_reward_min: -308.0
  episodes_this_iter: 67
  episodes_total: 533
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3297766149044037
          entropy_coeff: 0.0
          kl: 0.007255179807543755
          model: {}
          policy_loss: -0.013120661489665508
          total_loss: 67.21910858154297
          vf_explained_var: 0.5915749669075012
          vf_loss: 67.23078918457031
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 2
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:2184,20,512,20000,8,181.378,160000,-136.03,-31,-308,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-06-25_07-32-13
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -29.0
  episode_reward_mean: -107.41
  episode_reward_min: -201.0
  episodes_this_iter: 67
  episodes_total: 600
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.29656505584716797
          entropy_coeff: 0.0
          kl: 0.006221689283847809
          model: {}
          policy_loss: -0.015213260427117348
          total_loss: 48.532318115234375
          vf_explained_var: 0.6707590818405151
          vf_loss: 48.54628372192383
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 3
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:2184,20,512,20000,9,204.266,180000,-107.41,-29,-201,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-06-25_07-32-35
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -16.0
  episode_reward_mean: -84.16
  episode_reward_min: -183.0
  episodes_this_iter: 66
  episodes_total: 666
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.26887765526771545
          entropy_coeff: 0.0
          kl: 0.006008352153003216
          model: {}
          policy_loss: -0.013141925446689129
          total_loss: 35.31950759887695
          vf_explained_var: 0.7365210056304932
          vf_loss: 35.33145523071289
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 4
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:2184,20,512,20000,10,226.991,200000,-84.16,-16,-183,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-06-25_07-32-58
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -11.0
  episode_reward_mean: -65.65
  episode_reward_min: -154.0
  episodes_this_iter: 67
  episodes_total: 733
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.24573077261447906
          entropy_coeff: 0.0
          kl: 0.004903010092675686
          model: {}
          policy_loss: -0.012848054990172386
          total_loss: 30.85257339477539
          vf_explained_var: 0.7590559720993042
          vf_loss: 30.864439010620117
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 5
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:2184,20,512,20000,11,249.885,220000,-65.65,-11,-154,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-06-25_07-33-21
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 24.0
  episode_reward_mean: -55.76
  episode_reward_min: -151.0
  episodes_this_iter: 67
  episodes_total: 800
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.23244930803775787
          entropy_coeff: 0.0
          kl: 0.00422082981094718
          model: {}
          policy_loss: -0.01173129491508007
          total_loss: 28.252437591552734
          vf_explained_var: 0.7740146517753601
          vf_loss: 28.263744354248047
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 6
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300


[2m[36m(pid=2283)[0m Instructions for updating:
[2m[36m(pid=2283)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2283)[0m 2021-06-25 07:33:26,151	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2283)[0m 2021-06-25 07:33:26,151	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


[2m[36m(pid=2284)[0m Instructions for updating:
[2m[36m(pid=2284)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2284)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2284)[0m are not present in its tracked objects:
[2m[36m(pid=2284)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2284)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2284)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2284)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2283)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2283)[0m are not present in its tracked objects:
[2m[36m(pid=2283)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2283)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,,20,512,20000,6,135.451,120000,-242.99,-94,-429,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


[2m[36m(pid=2283)[0m 2021-06-25 07:33:34,314	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00000_0_num_sgd_iter=20,sgd_minibatch_size=512,train_batch_size=20000_2021-06-25_06-57-04/tmpnlvaf5nzrestore_from_object/checkpoint-6
[2m[36m(pid=2283)[0m 2021-06-25 07:33:34,314	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 6, '_timesteps_total': None, '_time_total': 135.45145726203918, '_episodes_total': 400}


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-06-25_07-33-57
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -80.0
  episode_reward_mean: -165.43939393939394
  episode_reward_min: -308.0
  episodes_this_iter: 66
  episodes_total: 466
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.35622867941856384
          entropy_coeff: 0.0
          kl: 0.009669340215623379
          model: {}
          policy_loss: -0.01559100579470396
          total_loss: 110.2119140625
          vf_explained_var: 0.43314942717552185
          vf_loss: 110.2255630493164
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:2283,20,512,20000,7,158.767,140000,-165.439,-80,-308,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-06-25_07-34-20
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -31.0
  episode_reward_mean: -136.22
  episode_reward_min: -308.0
  episodes_this_iter: 67
  episodes_total: 533
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.32879868149757385
          entropy_coeff: 0.0
          kl: 0.006993843242526054
          model: {}
          policy_loss: -0.013087136670947075
          total_loss: 67.7788314819336
          vf_explained_var: 0.5894916653633118
          vf_loss: 67.79053497314453
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 2
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:2283,20,512,20000,8,181.559,160000,-136.22,-31,-308,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-06-25_07-34-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -29.0
  episode_reward_mean: -108.06
  episode_reward_min: -201.0
  episodes_this_iter: 67
  episodes_total: 600
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.2958180606365204
          entropy_coeff: 0.0
          kl: 0.005932517349720001
          model: {}
          policy_loss: -0.014749256893992424
          total_loss: 48.959068298339844
          vf_explained_var: 0.6683332324028015
          vf_loss: 48.972625732421875
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 3
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:2283,20,512,20000,9,204.794,180000,-108.06,-29,-201,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-06-25_07-35-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -15.0
  episode_reward_mean: -84.14
  episode_reward_min: -183.0
  episodes_this_iter: 66
  episodes_total: 666
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.2702684998512268
          entropy_coeff: 0.0
          kl: 0.005728780757635832
          model: {}
          policy_loss: -0.012354478240013123
          total_loss: 35.25431823730469
          vf_explained_var: 0.7361574172973633
          vf_loss: 35.26552963256836
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:2283,20,512,20000,10,227.909,200000,-84.14,-15,-183,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-06-25_07-35-29
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -9.0
  episode_reward_mean: -65.3
  episode_reward_min: -177.0
  episodes_this_iter: 67
  episodes_total: 733
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.24580924212932587
          entropy_coeff: 0.0
          kl: 0.0051273261196911335
          model: {}
          policy_loss: -0.012953677214682102
          total_loss: 31.09867286682129
          vf_explained_var: 0.7574815154075623
          vf_loss: 31.1106014251709
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 5
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:2283,20,512,20000,11,250.813,220000,-65.3,-9,-177,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-06-25_07-35-52
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 31.0
  episode_reward_mean: -55.85
  episode_reward_min: -167.0
  episodes_this_iter: 67
  episodes_total: 800
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.23263119161128998
          entropy_coeff: 0.0
          kl: 0.003800179809331894
          model: {}
          policy_loss: -0.01153645571321249
          total_loss: 27.912904739379883
          vf_explained_var: 0.7767757773399353
          vf_loss: 27.923681259155273
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 6
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300


[2m[36m(pid=2400)[0m Instructions for updating:
[2m[36m(pid=2400)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2400)[0m 2021-06-25 07:35:57,177	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2400)[0m 2021-06-25 07:35:57,177	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


[2m[36m(pid=2425)[0m Instructions for updating:
[2m[36m(pid=2425)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2425)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2425)[0m are not present in its tracked objects:
[2m[36m(pid=2425)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2425)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2425)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2425)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2400)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2400)[0m are not present in its tracked objects:
[2m[36m(pid=2400)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2400)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,,20,2048,10000,24,242.876,240000,-87.62,-6,-173,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


[2m[36m(pid=2400)[0m 2021-06-25 07:36:05,781	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00002_2_num_sgd_iter=20,sgd_minibatch_size=2048,train_batch_size=10000_2021-06-25_06-59-31/tmpkgm3kh5lrestore_from_object/checkpoint-24
[2m[36m(pid=2400)[0m 2021-06-25 07:36:05,781	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 24, '_timesteps_total': None, '_time_total': 242.8761625289917, '_episodes_total': 800}


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-06-25_07-36-16
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -31.0
  episode_reward_mean: -76.84848484848484
  episode_reward_min: -173.0
  episodes_this_iter: 33
  episodes_total: 833
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.13963595032691956
          entropy_coeff: 0.0
          kl: 0.0018149102106690407
          model: {}
          policy_loss: -0.0064972140826284885
          total_loss: 30.37747573852539
          vf_explained_var: 0.7618035078048706
          vf_loss: 30.383609771728516
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_re

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,25,253.286,250000,-76.8485,-31,-173,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-06-25_07-36-26
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -80.18181818181819
  episode_reward_min: -195.0
  episodes_this_iter: 33
  episodes_total: 866
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.1391456127166748
          entropy_coeff: 0.0
          kl: 0.0015134075656533241
          model: {}
          policy_loss: -0.0037233331240713596
          total_loss: 33.22745132446289
          vf_explained_var: 0.7470126152038574
          vf_loss: 33.231021881103516
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_rest

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,26,263.434,260000,-80.1818,-8,-195,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-06-25_07-36-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -76.85
  episode_reward_min: -196.0
  episodes_this_iter: 34
  episodes_total: 900
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.13255774974822998
          entropy_coeff: 0.0
          kl: 0.0024776258505880833
          model: {}
          policy_loss: 0.001820229459553957
          total_loss: 30.460723876953125
          vf_explained_var: 0.7587670683860779
          vf_loss: 30.458782196044922
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 3
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,27,273.852,270000,-76.85,-8,-196,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-36-47
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -72.17
  episode_reward_min: -196.0
  episodes_this_iter: 33
  episodes_total: 933
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.1273556649684906
          entropy_coeff: 0.0
          kl: 0.002125740284100175
          model: {}
          policy_loss: -0.005678740330040455
          total_loss: 25.02476692199707
          vf_explained_var: 0.79246985912323
          vf_loss: 25.030391693115234
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 4
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,28,284.057,280000,-72.17,-8,-196,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-06-25_07-36-57
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -9.0
  episode_reward_mean: -69.57
  episode_reward_min: -196.0
  episodes_this_iter: 33
  episodes_total: 966
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.1294427514076233
          entropy_coeff: 0.0
          kl: 0.0018513877876102924
          model: {}
          policy_loss: -0.0063585760071873665
          total_loss: 31.147064208984375
          vf_explained_var: 0.7555639743804932
          vf_loss: 31.153400421142578
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 5
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,29,294.107,290000,-69.57,-9,-196,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-06-25_07-37-07
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -70.98
  episode_reward_min: -180.0
  episodes_this_iter: 34
  episodes_total: 1000
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.12353160977363586
          entropy_coeff: 0.0
          kl: 0.0014358980115503073
          model: {}
          policy_loss: -0.005845922511070967
          total_loss: 31.2447566986084
          vf_explained_var: 0.7534288167953491
          vf_loss: 31.250593185424805
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 6
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,30,304.663,300000,-70.98,-8,-180,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-06-25_07-37-17
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -71.56
  episode_reward_min: -180.0
  episodes_this_iter: 33
  episodes_total: 1033
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.12135932594537735
          entropy_coeff: 0.0
          kl: 0.0021090335212647915
          model: {}
          policy_loss: -0.005861436948180199
          total_loss: 24.632434844970703
          vf_explained_var: 0.7966188192367554
          vf_loss: 24.638290405273438
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 7
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,31,314.759,310000,-71.56,-8,-180,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-06-25_07-37-27
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -66.84
  episode_reward_min: -180.0
  episodes_this_iter: 33
  episodes_total: 1066
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 4.999999873689376e-05
          entropy: 0.1168479472398758
          entropy_coeff: 0.0
          kl: 0.0036720677744597197
          model: {}
          policy_loss: -0.010630277916789055
          total_loss: 27.088930130004883
          vf_explained_var: 0.7775009274482727
          vf_loss: 27.09955406188965
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 8
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,32,324.714,320000,-66.84,2,-180,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-06-25_07-37-37
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 18.0
  episode_reward_mean: -55.64
  episode_reward_min: -130.0
  episodes_this_iter: 34
  episodes_total: 1100
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 4.999999873689376e-05
          entropy: 0.11113771796226501
          entropy_coeff: 0.0
          kl: 0.003372343024238944
          model: {}
          policy_loss: -0.010838380083441734
          total_loss: 23.136363983154297
          vf_explained_var: 0.8022021055221558
          vf_loss: 23.147197723388672
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 9
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,33,334.828,330000,-55.64,18,-130,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-06-25_07-37-48
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 18.0
  episode_reward_mean: -57.64
  episode_reward_min: -137.0
  episodes_this_iter: 33
  episodes_total: 1133
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906250058207661
          cur_lr: 4.999999873689376e-05
          entropy: 0.11619820445775986
          entropy_coeff: 0.0
          kl: 0.0016131876036524773
          model: {}
          policy_loss: -0.006752708461135626
          total_loss: 35.417964935302734
          vf_explained_var: 0.7319352626800537
          vf_loss: 35.42471694946289
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 10
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,34,345.014,340000,-57.64,18,-137,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-06-25_07-37-58
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 18.0
  episode_reward_mean: -54.07
  episode_reward_min: -137.0
  episodes_this_iter: 33
  episodes_total: 1166
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 4.999999873689376e-05
          entropy: 0.11615869402885437
          entropy_coeff: 0.0
          kl: 0.0016592692118138075
          model: {}
          policy_loss: 0.0005344606470316648
          total_loss: 28.695999145507812
          vf_explained_var: 0.7656974792480469
          vf_loss: 28.695466995239258
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 11

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,172.28.0.2:2400,20,2048,10000,35,355.082,350000,-54.07,18,-137,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


Result for PPO_WasteNetEnv_8c324_00002:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-06-25_07-38-08
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -55.74
  episode_reward_min: -137.0
  episodes_this_iter: 34
  episodes_total: 1200
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.11244623363018036
          entropy_coeff: 0.0
          kl: 0.0027910054195672274
          model: {}
          policy_loss: -0.0068937670439481735
          total_loss: 28.04749870300293
          vf_explained_var: 0.7671847343444824
          vf_loss: 28.054393768310547
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 12
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00006,PENDING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300


[2m[36m(pid=2500)[0m Instructions for updating:
[2m[36m(pid=2500)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2500)[0m 2021-06-25 07:38:13,496	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2500)[0m 2021-06-25 07:38:13,496	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


[2m[36m(pid=2501)[0m Instructions for updating:
[2m[36m(pid=2501)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2501)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2501)[0m are not present in its tracked objects:
[2m[36m(pid=2501)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2501)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2501)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2501)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2500)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2500)[0m are not present in its tracked objects:
[2m[36m(pid=2500)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2500)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,,16,1638,8000,27,243.01,240000,-281.3,-96,-523,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


[2m[36m(pid=2500)[0m 2021-06-25 07:38:21,624	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00006_6_num_sgd_iter=30,sgd_minibatch_size=128,train_batch_size=20000_2021-06-25_07-09-08/tmp787whu2grestore_from_object/checkpoint-27
[2m[36m(pid=2500)[0m 2021-06-25 07:38:21,624	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 27, '_timesteps_total': None, '_time_total': 243.00979471206665, '_episodes_total': 800}


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-06-25_07-38-29
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -178.0
  episode_reward_mean: -323.9230769230769
  episode_reward_min: -589.0
  episodes_this_iter: 26
  episodes_total: 826
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.24603581428527832
          entropy_coeff: 0.0
          kl: 0.0030827687587589025
          model: {}
          policy_loss: 0.0042757438495755196
          total_loss: 7558387990528.0
          vf_explained_var: 2.6673078536987305e-06
          vf_loss: 7558387990528.0
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_res

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,28,251.283,248000,-323.923,-178,-589,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-06-25_07-38-37
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -122.0
  episode_reward_mean: -321.64150943396226
  episode_reward_min: -589.0
  episodes_this_iter: 27
  episodes_total: 853
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.25397413969039917
          entropy_coeff: 0.0
          kl: 0.0030501126311719418
          model: {}
          policy_loss: -0.012168316170573235
          total_loss: 7558077087744.0
          vf_explained_var: 1.2069940567016602e-06
          vf_loss: 7558077087744.0
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_re

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,29,259.221,256000,-321.642,-122,-589,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-06-25_07-38-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -122.0
  episode_reward_mean: -329.325
  episode_reward_min: -589.0
  episodes_this_iter: 27
  episodes_total: 880
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.2532358169555664
          entropy_coeff: 0.0
          kl: 0.003296754788607359
          model: {}
          policy_loss: -0.015973877161741257
          total_loss: 16155378974720.0
          vf_explained_var: 1.296401023864746e-06
          vf_loss: 16155378974720.0
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 3
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,30,267.621,264000,-329.325,-122,-589,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-06-25_07-38-54
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -122.0
  episode_reward_mean: -321.5
  episode_reward_min: -589.0
  episodes_this_iter: 26
  episodes_total: 906
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-05
          entropy: 0.24545469880104065
          entropy_coeff: 0.0
          kl: 0.004658743739128113
          model: {}
          policy_loss: -0.00553049286827445
          total_loss: 10857616506880.0
          vf_explained_var: 1.2069940567016602e-06
          vf_loss: 10857616506880.0
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 4
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,31,275.73,272000,-321.5,-122,-589,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-39-02
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -148.0
  episode_reward_mean: -332.28
  episode_reward_min: -587.0
  episodes_this_iter: 27
  episodes_total: 933
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 9.999999747378752e-05
          entropy: 0.24302060902118683
          entropy_coeff: 0.0
          kl: 0.0044419365003705025
          model: {}
          policy_loss: -0.002400647848844528
          total_loss: 12052877803520.0
          vf_explained_var: 1.9818544387817383e-06
          vf_loss: 12052877803520.0
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 5


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,32,283.942,280000,-332.28,-148,-587,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-06-25_07-39-10
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -148.0
  episode_reward_mean: -328.46
  episode_reward_min: -530.0
  episodes_this_iter: 27
  episodes_total: 960
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 9.999999747378752e-05
          entropy: 0.24308818578720093
          entropy_coeff: 0.0
          kl: 0.004132123664021492
          model: {}
          policy_loss: -0.007561863865703344
          total_loss: 17899295080448.0
          vf_explained_var: 5.662441253662109e-07
          vf_loss: 17899295080448.0
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 6
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,33,292.081,288000,-328.46,-148,-530,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-06-25_07-39-19
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -148.0
  episode_reward_mean: -331.59
  episode_reward_min: -581.0
  episodes_this_iter: 26
  episodes_total: 986
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 9.999999747378752e-05
          entropy: 0.23755212128162384
          entropy_coeff: 0.0
          kl: 0.0032711492385715246
          model: {}
          policy_loss: -0.0027957013808190823
          total_loss: 18931160973312.0
          vf_explained_var: 1.7881393432617188e-07
          vf_loss: 18931160973312.0
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,34,300.341,296000,-331.59,-148,-581,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-06-25_07-39-27
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -172.0
  episode_reward_mean: -351.75
  episode_reward_min: -612.0
  episodes_this_iter: 27
  episodes_total: 1013
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 9.999999747378752e-05
          entropy: 0.2519265413284302
          entropy_coeff: 0.0
          kl: 0.003724359441548586
          model: {}
          policy_loss: -0.00370788830332458
          total_loss: 21666384052224.0
          vf_explained_var: 1.2814998626708984e-06
          vf_loss: 21666384052224.0
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 8
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,35,308.538,304000,-351.75,-172,-612,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-06-25_07-39-35
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -180.0
  episode_reward_mean: -390.48
  episode_reward_min: -797.0
  episodes_this_iter: 27
  episodes_total: 1040
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 9.999999747378752e-05
          entropy: 0.2535555958747864
          entropy_coeff: 0.0
          kl: 0.0030607995577156544
          model: {}
          policy_loss: -0.002031183335930109
          total_loss: 16781013942272.0
          vf_explained_var: 1.1920928955078125e-06
          vf_loss: 16781013942272.0
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 9

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,36,316.647,312000,-390.48,-180,-797,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-06-25_07-39-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -166.0
  episode_reward_mean: -404.38
  episode_reward_min: -797.0
  episodes_this_iter: 26
  episodes_total: 1066
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906250058207661
          cur_lr: 9.999999747378752e-05
          entropy: 0.23516197502613068
          entropy_coeff: 0.0
          kl: 0.0049521648325026035
          model: {}
          policy_loss: -0.011434557847678661
          total_loss: 21285690146816.0
          vf_explained_var: -2.2351741790771484e-07
          vf_loss: 21285690146816.0
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,37,324.95,320000,-404.38,-166,-797,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-06-25_07-39-52
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -135.0
  episode_reward_mean: -408.76
  episode_reward_min: -797.0
  episodes_this_iter: 27
  episodes_total: 1093
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 9.999999747378752e-05
          entropy: 0.23335197567939758
          entropy_coeff: 0.0
          kl: 0.0069267963990569115
          model: {}
          policy_loss: -0.01365822646766901
          total_loss: 22569293971456.0
          vf_explained_var: -2.384185791015625e-07
          vf_loss: 22569293971456.0
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,38,333.146,328000,-408.76,-135,-797,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-06-25_07-40-00
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -135.0
  episode_reward_mean: -396.12
  episode_reward_min: -797.0
  episodes_this_iter: 27
  episodes_total: 1120
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 9.999999747378752e-05
          entropy: 0.23466116189956665
          entropy_coeff: 0.0
          kl: 0.005699086003005505
          model: {}
          policy_loss: -0.009571928530931473
          total_loss: 16395219763200.0
          vf_explained_var: 1.043081283569336e-07
          vf_loss: 16395219763200.0
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 1

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,39,341.221,336000,-396.12,-135,-797,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-06-25_07-40-08
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -135.0
  episode_reward_mean: -370.93
  episode_reward_min: -632.0
  episodes_this_iter: 26
  episodes_total: 1146
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 9.999999747378752e-05
          entropy: 0.22789278626441956
          entropy_coeff: 0.0
          kl: 0.003230142407119274
          model: {}
          policy_loss: -0.001526630250737071
          total_loss: 17785769951232.0
          vf_explained_var: -3.5762786865234375e-07
          vf_loss: 17785769951232.0
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,40,349.27,344000,-370.93,-135,-632,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-06-25_07-40-16
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -135.0
  episode_reward_mean: -365.01
  episode_reward_min: -632.0
  episodes_this_iter: 27
  episodes_total: 1173
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 9.999999747378752e-05
          entropy: 0.2331620454788208
          entropy_coeff: 0.0
          kl: 0.007295326795428991
          model: {}
          policy_loss: -0.00673207500949502
          total_loss: 12566824747008.0
          vf_explained_var: 2.130866050720215e-06
          vf_loss: 12566824747008.0
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 14
 

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:2500,16,1638,8000,41,357.383,352000,-365.01,-135,-632,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


2021-06-25 07:40:24,548	INFO pbt.py:543 -- [exploit] transferring weights from trial PPO_WasteNetEnv_8c324_00004 (score -44.21) -> PPO_WasteNetEnv_8c324_00006 (score -357.29)
2021-06-25 07:40:24,550	INFO pbt.py:558 -- [explore] perturbed config from {'lambda': 0.9, 'clip_param': 0.3, 'lr': 5e-05, 'num_sgd_iter': 10, 'sgd_minibatch_size': 128, 'train_batch_size': 10000} -> {'lambda': 0.7200000000000001, 'clip_param': 0.35738641637099167, 'lr': 0.0001, 'num_sgd_iter': 8, 'sgd_minibatch_size': 153, 'train_batch_size': 12000}


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-06-25_07-40-24
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -188.0
  episode_reward_mean: -357.29
  episode_reward_min: -550.0
  episodes_this_iter: 27
  episodes_total: 1200
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 9.999999747378752e-05
          entropy: 0.2153930813074112
          entropy_coeff: 0.0
          kl: 0.006840641610324383
          model: {}
          policy_loss: -0.006671345792710781
          total_loss: 15853088145408.0
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 15853088145408.0
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 15


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300


[2m[36m(pid=2590)[0m Instructions for updating:
[2m[36m(pid=2590)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2590)[0m 2021-06-25 07:40:29,200	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2590)[0m 2021-06-25 07:40:29,200	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,,12,153,12000,18,250.06,196000,-89.49,-26,-193,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


[2m[36m(pid=2589)[0m Instructions for updating:
[2m[36m(pid=2589)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2589)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2589)[0m are not present in its tracked objects:
[2m[36m(pid=2589)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2589)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2589)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2589)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2590)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2590)[0m are not present in its tracked objects:
[2m[36m(pid=2590)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2590)[0m It is possible that this is intended behavior, but it is more likely
[2

Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-06-25_07-41-05
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -18.0
  episode_reward_mean: -84.225
  episode_reward_min: -183.0
  episodes_this_iter: 40
  episodes_total: 693
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-06
          entropy: 0.2637111246585846
          entropy_coeff: 0.0
          kl: 0.0024498605635017157
          model: {}
          policy_loss: -0.004683133214712143
          total_loss: 91.53360748291016
          vf_explained_var: 0.510103702545166
          vf_loss: 91.53778839111328
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 1
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,19,265.617,208000,-84.225,-18,-183,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-06-25_07-41-20
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -16.0
  episode_reward_mean: -84.6
  episode_reward_min: -183.0
  episodes_this_iter: 40
  episodes_total: 733
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-06
          entropy: 0.2663273215293884
          entropy_coeff: 0.0
          kl: 0.00283845909871161
          model: {}
          policy_loss: -0.0036030621267855167
          total_loss: 87.67797088623047
          vf_explained_var: 0.5211643576622009
          vf_loss: 87.68128967285156
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 2
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,20,280.884,220000,-84.6,-16,-183,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-06-25_07-41-36
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -16.0
  episode_reward_mean: -78.99
  episode_reward_min: -183.0
  episodes_this_iter: 40
  episodes_total: 773
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-06
          entropy: 0.2640016973018646
          entropy_coeff: 0.0
          kl: 0.0040636686608195305
          model: {}
          policy_loss: -0.004085289314389229
          total_loss: 86.08482360839844
          vf_explained_var: 0.5220287442207336
          vf_loss: 86.08870697021484
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 3
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,21,296.297,232000,-78.99,-16,-183,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-06-25_07-41-51
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -16.0
  episode_reward_mean: -77.44
  episode_reward_min: -192.0
  episodes_this_iter: 40
  episodes_total: 813
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-06
          entropy: 0.2559605538845062
          entropy_coeff: 0.0
          kl: 0.0031575553584843874
          model: {}
          policy_loss: -0.004790177568793297
          total_loss: 95.31621551513672
          vf_explained_var: 0.49904969334602356
          vf_loss: 95.3209228515625
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 4
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,22,311.615,244000,-77.44,-16,-192,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-06-25_07-42-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -18.0
  episode_reward_mean: -81.38
  episode_reward_min: -192.0
  episodes_this_iter: 40
  episodes_total: 853
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 9.999999747378752e-06
          entropy: 0.24306797981262207
          entropy_coeff: 0.0
          kl: 0.0029917436186224222
          model: {}
          policy_loss: -0.004172564949840307
          total_loss: 95.38359832763672
          vf_explained_var: 0.4966568946838379
          vf_loss: 95.38773345947266
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 5
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,23,326.874,256000,-81.38,-18,-192,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-06-25_07-42-22
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -78.34
  episode_reward_min: -192.0
  episodes_this_iter: 40
  episodes_total: 893
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 9.999999747378752e-06
          entropy: 0.24203728139400482
          entropy_coeff: 0.0
          kl: 0.0035891712177544832
          model: {}
          policy_loss: -0.003958388697355986
          total_loss: 77.91348266601562
          vf_explained_var: 0.5467802882194519
          vf_loss: 77.91741180419922
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 6
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,24,342.193,268000,-78.34,1,-192,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-42-37
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -72.01
  episode_reward_min: -152.0
  episodes_this_iter: 40
  episodes_total: 933
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 9.999999747378752e-06
          entropy: 0.23015770316123962
          entropy_coeff: 0.0
          kl: 0.004265558440238237
          model: {}
          policy_loss: -0.003908710088580847
          total_loss: 78.2353515625
          vf_explained_var: 0.5475552678108215
          vf_loss: 78.23924255371094
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 7
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00005,RUNNING,172.28.0.2:2682,12,153,12000,25,357.47,280000,-72.01,1,-152,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


Result for PPO_WasteNetEnv_8c324_00005:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-06-25_07-42-52
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -70.83
  episode_reward_min: -161.0
  episodes_this_iter: 40
  episodes_total: 973
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 9.999999747378752e-06
          entropy: 0.2336738258600235
          entropy_coeff: 0.0
          kl: 0.0039175087586045265
          model: {}
          policy_loss: -0.003086480777710676
          total_loss: 99.79649353027344
          vf_explained_var: 0.48704788088798523
          vf_loss: 99.79956817626953
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 8
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PENDING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300


[2m[36m(pid=2786)[0m Instructions for updating:
[2m[36m(pid=2786)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2786)[0m 2021-06-25 07:42:57,745	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2786)[0m 2021-06-25 07:42:57,745	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


[2m[36m(pid=2787)[0m Instructions for updating:
[2m[36m(pid=2787)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2787)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2787)[0m are not present in its tracked objects:
[2m[36m(pid=2787)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2787)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2787)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2787)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2786)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2786)[0m are not present in its tracked objects:
[2m[36m(pid=2786)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2786)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,,10,128,20000,10,253.054,200000,-81.81,-8,-176,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


[2m[36m(pid=2786)[0m 2021-06-25 07:43:05,914	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00007_7_num_sgd_iter=10,sgd_minibatch_size=128,train_batch_size=20000_2021-06-25_07-11-27/tmplcgm3xb4restore_from_object/checkpoint-10
[2m[36m(pid=2786)[0m 2021-06-25 07:43:05,914	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 253.05354356765747, '_episodes_total': 666}


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-06-25_07-43-32
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 15.0
  episode_reward_mean: -59.621212121212125
  episode_reward_min: -160.0
  episodes_this_iter: 66
  episodes_total: 732
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.24601790308952332
          entropy_coeff: 0.0
          kl: 0.0048147751949727535
          model: {}
          policy_loss: -0.010642724111676216
          total_loss: 38.07937240600586
          vf_explained_var: 0.7237065434455872
          vf_loss: 38.08905029296875
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_rest

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:2786,10,128,20000,11,279.176,220000,-59.6212,15,-160,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-06-25_07-43-57
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: -56.73
  episode_reward_min: -160.0
  episodes_this_iter: 67
  episodes_total: 799
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.22959434986114502
          entropy_coeff: 0.0
          kl: 0.004059096332639456
          model: {}
          policy_loss: -0.009662024676799774
          total_loss: 38.229713439941406
          vf_explained_var: 0.722580075263977
          vf_loss: 38.23896789550781
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 2
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:2786,10,128,20000,12,304.702,240000,-56.73,8,-160,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-06-25_07-44-23
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: -48.67
  episode_reward_min: -171.0
  episodes_this_iter: 67
  episodes_total: 866
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.21051636338233948
          entropy_coeff: 0.0
          kl: 0.003421811619773507
          model: {}
          policy_loss: -0.009081044234335423
          total_loss: 32.54161834716797
          vf_explained_var: 0.7532615661621094
          vf_loss: 32.55052947998047
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 3
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:2786,10,128,20000,13,330.191,260000,-48.67,8,-171,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-44-48
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 17.0
  episode_reward_mean: -39.95
  episode_reward_min: -171.0
  episodes_this_iter: 66
  episodes_total: 932
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.19347266852855682
          entropy_coeff: 0.0
          kl: 0.004058466292917728
          model: {}
          policy_loss: -0.0077718510292470455
          total_loss: 28.225360870361328
          vf_explained_var: 0.7766689658164978
          vf_loss: 28.233028411865234
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 4
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00007,RUNNING,172.28.0.2:2786,10,128,20000,14,355.638,280000,-39.95,17,-171,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


Result for PPO_WasteNetEnv_8c324_00007:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-06-25_07-45-14
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 27.0
  episode_reward_mean: -28.27
  episode_reward_min: -112.0
  episodes_this_iter: 67
  episodes_total: 999
  experiment_id: 3f76d40dd1a742148c2f454fc19b1738
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.17786677181720734
          entropy_coeff: 0.0
          kl: 0.0045411051250994205
          model: {}
          policy_loss: -0.006938084494322538
          total_loss: 26.015443801879883
          vf_explained_var: 0.7914947867393494
          vf_loss: 26.02232551574707
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 5
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00004,PENDING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300


[2m[36m(pid=2893)[0m Instructions for updating:
[2m[36m(pid=2893)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2893)[0m 2021-06-25 07:45:18,966	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=2893)[0m 2021-06-25 07:45:18,967	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


[2m[36m(pid=2894)[0m Instructions for updating:
[2m[36m(pid=2894)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=2894)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2894)[0m are not present in its tracked objects:
[2m[36m(pid=2894)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2894)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=2894)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=2894)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=2893)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=2893)[0m are not present in its tracked objects:
[2m[36m(pid=2893)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=2893)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,,10,128,10000,20,256.257,200000,-44.21,18,-137,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


[2m[36m(pid=2893)[0m 2021-06-25 07:45:27,165	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00004_4_num_sgd_iter=10,sgd_minibatch_size=128,train_batch_size=10000_2021-06-25_07-04-14/tmpd3i8p_45restore_from_object/checkpoint-20
[2m[36m(pid=2893)[0m 2021-06-25 07:45:27,166	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 20, '_timesteps_total': None, '_time_total': 256.2574031352997, '_episodes_total': 666}


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-06-25_07-45-40
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: -43.696969696969695
  episode_reward_min: -111.0
  episodes_this_iter: 33
  episodes_total: 699
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.19153904914855957
          entropy_coeff: 0.0
          kl: 0.00361314183101058
          model: {}
          policy_loss: -0.010286861099302769
          total_loss: 30.43012237548828
          vf_explained_var: 0.7634334564208984
          vf_loss: 30.43968391418457
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,21,269.307,210000,-43.697,9,-111,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-06-25_07-45-53
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 15.0
  episode_reward_mean: -42.303030303030305
  episode_reward_min: -185.0
  episodes_this_iter: 33
  episodes_total: 732
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.18513628840446472
          entropy_coeff: 0.0
          kl: 0.00294350553303957
          model: {}
          policy_loss: -0.009525533765554428
          total_loss: 29.14851188659668
          vf_explained_var: 0.7715215682983398
          vf_loss: 29.157745361328125
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_resto

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,22,282.171,220000,-42.303,15,-185,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-06-25_07-46-06
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 20.0
  episode_reward_mean: -37.79
  episode_reward_min: -185.0
  episodes_this_iter: 34
  episodes_total: 766
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.18119406700134277
          entropy_coeff: 0.0
          kl: 0.0038007439579814672
          model: {}
          policy_loss: -0.008175271563231945
          total_loss: 23.048377990722656
          vf_explained_var: 0.8105949759483337
          vf_loss: 23.056364059448242
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 3
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,23,295.028,230000,-37.79,20,-185,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-06-25_07-46-18
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 20.0
  episode_reward_mean: -30.33
  episode_reward_min: -185.0
  episodes_this_iter: 33
  episodes_total: 799
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.1702214926481247
          entropy_coeff: 0.0
          kl: 0.002113469410687685
          model: {}
          policy_loss: -0.005730117671191692
          total_loss: 22.19415283203125
          vf_explained_var: 0.8150312304496765
          vf_loss: 22.199832916259766
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,24,307.694,240000,-30.33,20,-185,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-06-25_07-46-31
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 25.0
  episode_reward_mean: -28.16
  episode_reward_min: -105.0
  episodes_this_iter: 33
  episodes_total: 832
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.163206547498703
          entropy_coeff: 0.0
          kl: 0.0029078759253025055
          model: {}
          policy_loss: -0.008103658445179462
          total_loss: 25.529541015625
          vf_explained_var: 0.7951465249061584
          vf_loss: 25.53761100769043
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 5
  node_ip

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,25,320.558,250000,-28.16,25,-105,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-06-25_07-46-44
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 29.0
  episode_reward_mean: -30.02
  episode_reward_min: -140.0
  episodes_this_iter: 34
  episodes_total: 866
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.15913569927215576
          entropy_coeff: 0.0
          kl: 0.0032146996818482876
          model: {}
          policy_loss: -0.007030483800917864
          total_loss: 24.587663650512695
          vf_explained_var: 0.7994334697723389
          vf_loss: 24.594675064086914
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 6
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,26,333.315,260000,-30.02,29,-140,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-06-25_07-46-57
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 29.0
  episode_reward_mean: -28.47
  episode_reward_min: -140.0
  episodes_this_iter: 33
  episodes_total: 899
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.15152128040790558
          entropy_coeff: 0.0
          kl: 0.004560107830911875
          model: {}
          policy_loss: -0.006715354043990374
          total_loss: 18.593738555908203
          vf_explained_var: 0.840807318687439
          vf_loss: 18.600440979003906
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 7
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,27,345.954,270000,-28.47,29,-140,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-47-10
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 29.0
  episode_reward_mean: -26.43
  episode_reward_min: -162.0
  episodes_this_iter: 33
  episodes_total: 932
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 4.999999873689376e-05
          entropy: 0.14547719061374664
          entropy_coeff: 0.0
          kl: 0.0037600381765514612
          model: {}
          policy_loss: -0.007014420349150896
          total_loss: 22.34916877746582
          vf_explained_var: 0.8115224838256836
          vf_loss: 22.35618019104004
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 8
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,28,359.067,280000,-26.43,29,-162,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-06-25_07-47-23
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 26.0
  episode_reward_mean: -22.59
  episode_reward_min: -162.0
  episodes_this_iter: 34
  episodes_total: 966
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 4.999999873689376e-05
          entropy: 0.13708697259426117
          entropy_coeff: 0.0
          kl: 0.003427869640290737
          model: {}
          policy_loss: -0.008383032865822315
          total_loss: 22.906333923339844
          vf_explained_var: 0.8094704151153564
          vf_loss: 22.914714813232422
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 9
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00004,RUNNING,172.28.0.2:2893,10,128,10000,29,371.92,290000,-22.59,26,-162,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


Result for PPO_WasteNetEnv_8c324_00004:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-06-25_07-47-35
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 34.0
  episode_reward_mean: -22.91
  episode_reward_min: -162.0
  episodes_this_iter: 33
  episodes_total: 999
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906250058207661
          cur_lr: 4.999999873689376e-05
          entropy: 0.1383640468120575
          entropy_coeff: 0.0
          kl: 0.0038483100943267345
          model: {}
          policy_loss: -0.007691402453929186
          total_loss: 21.305761337280273
          vf_explained_var: 0.8198278546333313
          vf_loss: 21.31344985961914
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 10
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00006,PENDING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300


[2m[36m(pid=3008)[0m Instructions for updating:
[2m[36m(pid=3008)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3008)[0m 2021-06-25 07:47:41,277	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=3008)[0m 2021-06-25 07:47:41,277	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,,8,153,12000,42,365.523,360000,-357.29,-188,-550,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


[2m[36m(pid=3009)[0m Instructions for updating:
[2m[36m(pid=3009)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3009)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3009)[0m are not present in its tracked objects:
[2m[36m(pid=3009)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3009)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=3009)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=3009)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=3008)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3008)[0m are not present in its tracked objects:
[2m[36m(pid=3008)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3008)[0m It is possible that this is intended behavior, but it is more likely
[2

Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-06-25_07-48-03
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: -41.65
  episode_reward_min: -111.0
  episodes_this_iter: 40
  episodes_total: 706
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.18569055199623108
          entropy_coeff: 0.0
          kl: 0.007160291541367769
          model: {}
          policy_loss: -0.017464809119701385
          total_loss: 12.712672233581543
          vf_explained_var: 0.8839251399040222
          vf_loss: 12.728704452514648
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 1
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,21,270.454,212000,-41.65,9,-111,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-06-25_07-48-17
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 27.0
  episode_reward_mean: -40.6625
  episode_reward_min: -156.0
  episodes_this_iter: 40
  episodes_total: 746
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.17149735987186432
          entropy_coeff: 0.0
          kl: 0.004865134134888649
          model: {}
          policy_loss: -0.01293201744556427
          total_loss: 12.832085609436035
          vf_explained_var: 0.8845498561859131
          vf_loss: 12.84404468536377
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 2
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,22,284.68,224000,-40.6625,27,-156,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-06-25_07-48-32
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 27.0
  episode_reward_mean: -31.49
  episode_reward_min: -156.0
  episodes_this_iter: 40
  episodes_total: 786
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.1576613485813141
          entropy_coeff: 0.0
          kl: 0.006038513500243425
          model: {}
          policy_loss: -0.011503388173878193
          total_loss: 10.721055030822754
          vf_explained_var: 0.9005508422851562
          vf_loss: 10.731955528259277
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 3
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,23,298.798,236000,-31.49,27,-156,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-06-25_07-48-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 37.0
  episode_reward_mean: -29.76
  episode_reward_min: -156.0
  episodes_this_iter: 40
  episodes_total: 826
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.1497696191072464
          entropy_coeff: 0.0
          kl: 0.005287510342895985
          model: {}
          policy_loss: -0.014539270661771297
          total_loss: 13.041147232055664
          vf_explained_var: 0.8856202363967896
          vf_loss: 13.055155754089355
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 4
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,24,313.079,248000,-29.76,37,-156,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-06-25_07-49-00
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 37.0
  episode_reward_mean: -28.21
  episode_reward_min: -151.0
  episodes_this_iter: 40
  episodes_total: 866
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.1370258629322052
          entropy_coeff: 0.0
          kl: 0.004610737320035696
          model: {}
          policy_loss: -0.012086671777069569
          total_loss: 11.235773086547852
          vf_explained_var: 0.897825300693512
          vf_loss: 11.24739933013916
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 5
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,25,327.106,260000,-28.21,37,-151,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-06-25_07-49-14
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 34.0
  episode_reward_mean: -28.32
  episode_reward_min: -151.0
  episodes_this_iter: 40
  episodes_total: 906
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.13142737746238708
          entropy_coeff: 0.0
          kl: 0.005982597824186087
          model: {}
          policy_loss: -0.008572033606469631
          total_loss: 10.692255973815918
          vf_explained_var: 0.901116669178009
          vf_loss: 10.700529098510742
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 6
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,26,341.167,272000,-28.32,34,-151,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-06-25_07-49-28
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 38.0
  episode_reward_mean: -23.38
  episode_reward_min: -118.0
  episodes_this_iter: 40
  episodes_total: 946
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.11703968048095703
          entropy_coeff: 0.0
          kl: 0.006948629394173622
          model: {}
          policy_loss: -0.009688643738627434
          total_loss: 10.029151916503906
          vf_explained_var: 0.9070084691047668
          vf_loss: 10.038494110107422
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 7
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,27,355.251,284000,-23.38,38,-118,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-06-25_07-49-42
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 38.0
  episode_reward_mean: -19.58
  episode_reward_min: -97.0
  episodes_this_iter: 40
  episodes_total: 986
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.11398895829916
          entropy_coeff: 0.0
          kl: 0.00599390035495162
          model: {}
          policy_loss: -0.010935189202427864
          total_loss: 10.020110130310059
          vf_explained_var: 0.9087626338005066
          vf_loss: 10.030744552612305
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 8
  node_ip:

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00006,RUNNING,172.28.0.2:3008,8,153,12000,28,369.385,296000,-19.58,38,-97,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


Result for PPO_WasteNetEnv_8c324_00006:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-06-25_07-49-57
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 38.0
  episode_reward_mean: -16.52
  episode_reward_min: -85.0
  episodes_this_iter: 40
  episodes_total: 1026
  experiment_id: 15e9dba8cc4d491bb0c6ac701a911b06
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.11062435060739517
          entropy_coeff: 0.0
          kl: 0.0064040543511509895
          model: {}
          policy_loss: -0.012051939964294434
          total_loss: 9.677984237670898
          vf_explained_var: 0.9103075861930847
          vf_loss: 9.689715385437012
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 9
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00003,PENDING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300


[2m[36m(pid=3118)[0m Instructions for updating:
[2m[36m(pid=3118)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3118)[0m 2021-06-25 07:50:01,826	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=3118)[0m 2021-06-25 07:50:01,826	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,,24,14043,28086,17,259.495,260430,-125.53,-61,-243,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300


[2m[36m(pid=3117)[0m Instructions for updating:
[2m[36m(pid=3117)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3117)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3117)[0m are not present in its tracked objects:
[2m[36m(pid=3117)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3117)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=3117)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=3117)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=3118)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3118)[0m are not present in its tracked objects:
[2m[36m(pid=3118)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3118)[0m It is possible that this is intended behavior, but it is more likely
[2

Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 288516
  custom_metrics: {}
  date: 2021-06-25_07-50-37
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -63.0
  episode_reward_mean: -116.26881720430107
  episode_reward_min: -199.0
  episodes_this_iter: 93
  episodes_total: 961
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 9.999999747378752e-05
          entropy: 0.17094029486179352
          entropy_coeff: 0.0
          kl: 0.0036215768195688725
          model: {}
          policy_loss: -0.00594819150865078
          total_loss: 99.448486328125
          vf_explained_var: 0.48912596702575684
          vf_loss: 99.45370483398438
    num_agent_steps_sampled: 288516
    num_agent_steps_trained: 288516
    num_steps_sampled: 288516
    num_steps_trained: 288516
  iterations_since_resto

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:3118,24,14043,28086,18,287.313,288516,-116.269,-63,-199,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 316602
  custom_metrics: {}
  date: 2021-06-25_07-51-05
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -46.0
  episode_reward_mean: -114.7
  episode_reward_min: -209.0
  episodes_this_iter: 94
  episodes_total: 1055
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 9.999999747378752e-05
          entropy: 0.16293972730636597
          entropy_coeff: 0.0
          kl: 0.0028604513499885798
          model: {}
          policy_loss: -0.004366498440504074
          total_loss: 95.93584442138672
          vf_explained_var: 0.4981040060520172
          vf_loss: 95.93992614746094
    num_agent_steps_sampled: 316602
    num_agent_steps_trained: 316602
    num_steps_sampled: 316602
    num_steps_trained: 316602
  iterations_since_restore: 2
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:3118,24,14043,28086,19,314.711,316602,-114.7,-46,-209,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 344688
  custom_metrics: {}
  date: 2021-06-25_07-51-32
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -43.0
  episode_reward_mean: -100.73
  episode_reward_min: -170.0
  episodes_this_iter: 93
  episodes_total: 1148
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 9.999999747378752e-05
          entropy: 0.1579618752002716
          entropy_coeff: 0.0
          kl: 0.0030286009423434734
          model: {}
          policy_loss: -0.0044959294609725475
          total_loss: 81.97824096679688
          vf_explained_var: 0.5288386344909668
          vf_loss: 81.98257446289062
    num_agent_steps_sampled: 344688
    num_agent_steps_trained: 344688
    num_steps_sampled: 344688
    num_steps_trained: 344688
  iterations_since_restore: 3
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:3118,24,14043,28086,20,342.386,344688,-100.73,-43,-170,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 372774
  custom_metrics: {}
  date: 2021-06-25_07-52-00
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -37.0
  episode_reward_mean: -94.23
  episode_reward_min: -197.0
  episodes_this_iter: 94
  episodes_total: 1242
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 9.999999747378752e-05
          entropy: 0.14906863868236542
          entropy_coeff: 0.0
          kl: 0.003601935226470232
          model: {}
          policy_loss: -0.005031559616327286
          total_loss: 81.82023620605469
          vf_explained_var: 0.5288276672363281
          vf_loss: 81.82518005371094
    num_agent_steps_sampled: 372774
    num_agent_steps_trained: 372774
    num_steps_sampled: 372774
    num_steps_trained: 372774
  iterations_since_restore: 4
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00003,RUNNING,172.28.0.2:3118,24,14043,28086,21,369.738,372774,-94.23,-37,-197,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300


2021-06-25 07:52:27,990	INFO pbt.py:543 -- [exploit] transferring weights from trial PPO_WasteNetEnv_8c324_00006 (score -16.52) -> PPO_WasteNetEnv_8c324_00003 (score -89.58)
2021-06-25 07:52:27,992	INFO pbt.py:558 -- [explore] perturbed config from {'lambda': 0.7200000000000001, 'clip_param': 0.35738641637099167, 'lr': 0.0001, 'num_sgd_iter': 8, 'sgd_minibatch_size': 153, 'train_batch_size': 12000} -> {'lambda': 0.5760000000000001, 'clip_param': 0.26925647901362854, 'lr': 5e-05, 'num_sgd_iter': 6, 'sgd_minibatch_size': 122, 'train_batch_size': 14400}


Result for PPO_WasteNetEnv_8c324_00003:
  agent_timesteps_total: 400860
  custom_metrics: {}
  date: 2021-06-25_07-52-27
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -29.0
  episode_reward_mean: -89.58
  episode_reward_min: -175.0
  episodes_this_iter: 94
  episodes_total: 1336
  experiment_id: d836959d0f864d29893a693a78425543
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 9.999999747378752e-05
          entropy: 0.14309294521808624
          entropy_coeff: 0.0
          kl: 0.0033971548546105623
          model: {}
          policy_loss: -0.0039706360548734665
          total_loss: 75.17327880859375
          vf_explained_var: 0.5503721237182617
          vf_loss: 75.17721557617188
    num_agent_steps_sampled: 400860
    num_agent_steps_trained: 400860
    num_steps_sampled: 400860
    num_steps_trained: 400860
  iterations_since_restore: 5
  

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00001,PENDING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300


[2m[36m(pid=3257)[0m Instructions for updating:
[2m[36m(pid=3257)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3257)[0m 2021-06-25 07:52:32,898	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=3257)[0m 2021-06-25 07:52:32,898	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


[2m[36m(pid=3261)[0m Instructions for updating:
[2m[36m(pid=3261)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3261)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3261)[0m are not present in its tracked objects:
[2m[36m(pid=3261)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3261)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=3261)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=3261)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=3257)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3257)[0m are not present in its tracked objects:
[2m[36m(pid=3257)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3257)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,,20,512,20000,12,272.614,240000,-55.76,24,-151,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


[2m[36m(pid=3257)[0m 2021-06-25 07:52:41,117	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00003_3_num_sgd_iter=10,sgd_minibatch_size=2048,train_batch_size=10000_2021-06-25_07-01-59/tmpivd5xd4krestore_from_object/checkpoint-29
[2m[36m(pid=3257)[0m 2021-06-25 07:52:41,118	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 29, '_timesteps_total': None, '_time_total': 383.74520325660706, '_episodes_total': 1026}
[2m[36m(pid=3347)[0m Instructions for updating:
[2m[36m(pid=3347)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3347)[0m 2021-06-25 07:52:45,839	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=3347)[0m 2021-06-25 07:52:45,840	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pi

Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-06-25_07-53-16
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 19.0
  episode_reward_mean: -54.13636363636363
  episode_reward_min: -159.0
  episodes_this_iter: 66
  episodes_total: 866
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.22176547348499298
          entropy_coeff: 0.0
          kl: 0.0033552406821399927
          model: {}
          policy_loss: -0.01111614890396595
          total_loss: 31.901126861572266
          vf_explained_var: 0.756003201007843
          vf_loss: 31.911571502685547
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_resto

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:3347,20,512,20000,13,295.415,260000,-54.1364,19,-159,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-53-39
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 19.0
  episode_reward_mean: -40.91
  episode_reward_min: -143.0
  episodes_this_iter: 67
  episodes_total: 933
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.20842640101909637
          entropy_coeff: 0.0
          kl: 0.0032525998540222645
          model: {}
          policy_loss: -0.009586693719029427
          total_loss: 24.48183822631836
          vf_explained_var: 0.8004802465438843
          vf_loss: 24.4910945892334
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 2
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:3347,20,512,20000,14,317.911,280000,-40.91,19,-143,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-06-25_07-54-01
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 25.0
  episode_reward_mean: -36.7
  episode_reward_min: -178.0
  episodes_this_iter: 67
  episodes_total: 1000
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.19259105622768402
          entropy_coeff: 0.0
          kl: 0.0034473431296646595
          model: {}
          policy_loss: -0.009896846488118172
          total_loss: 24.02016258239746
          vf_explained_var: 0.802767813205719
          vf_loss: 24.029888153076172
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 3
  node

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:3347,20,512,20000,15,340.329,300000,-36.7,25,-178,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-06-25_07-54-24
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 33.0
  episode_reward_mean: -34.03
  episode_reward_min: -178.0
  episodes_this_iter: 66
  episodes_total: 1066
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.1825794130563736
          entropy_coeff: 0.0
          kl: 0.0027918173000216484
          model: {}
          policy_loss: -0.0078194048255682
          total_loss: 23.333370208740234
          vf_explained_var: 0.8065547943115234
          vf_loss: 23.34111976623535
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 4
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:3347,20,512,20000,16,362.871,320000,-34.03,33,-178,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-06-25_07-54-46
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 33.0
  episode_reward_mean: -25.9
  episode_reward_min: -134.0
  episodes_this_iter: 67
  episodes_total: 1133
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.17305752635002136
          entropy_coeff: 0.0
          kl: 0.0030481978319585323
          model: {}
          policy_loss: -0.00737231457605958
          total_loss: 20.486425399780273
          vf_explained_var: 0.8275449872016907
          vf_loss: 20.49376106262207
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 5
  nod

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,RUNNING,172.28.0.2:3347,20,512,20000,17,385.64,340000,-25.9,33,-134,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


Result for PPO_WasteNetEnv_8c324_00001:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-06-25_07-55-09
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 27.0
  episode_reward_mean: -21.23
  episode_reward_min: -105.0
  episodes_this_iter: 67
  episodes_total: 1200
  experiment_id: 05aca3ff1f2b4512964a978c066636af
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.1669168323278427
          entropy_coeff: 0.0
          kl: 0.0031135790050029755
          model: {}
          policy_loss: -0.00817768182605505
          total_loss: 21.83722496032715
          vf_explained_var: 0.8188683390617371
          vf_loss: 21.845382690429688
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 6
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00002,PAUSED,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00000,PENDING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300


[2m[36m(pid=3475)[0m Instructions for updating:
[2m[36m(pid=3475)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3475)[0m 2021-06-25 07:55:14,260	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=3475)[0m 2021-06-25 07:55:14,260	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


[2m[36m(pid=3473)[0m Instructions for updating:
[2m[36m(pid=3473)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3473)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3473)[0m are not present in its tracked objects:
[2m[36m(pid=3473)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3473)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=3473)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=3473)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=3475)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3475)[0m are not present in its tracked objects:
[2m[36m(pid=3475)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3475)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,,20,512,20000,12,273.685,240000,-55.85,31,-167,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


[2m[36m(pid=3475)[0m 2021-06-25 07:55:22,558	INFO trainable.py:378 -- Restored on 172.28.0.2 from checkpoint: /content/ray_results/wastenet_ppo_tune/PPO_WasteNetEnv_8c324_00000_0_num_sgd_iter=20,sgd_minibatch_size=512,train_batch_size=20000_2021-06-25_06-57-04/tmpzhu5l7txrestore_from_object/checkpoint-12
[2m[36m(pid=3475)[0m 2021-06-25 07:55:22,558	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 12, '_timesteps_total': None, '_time_total': 273.68507981300354, '_episodes_total': 800}


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-06-25_07-55-45
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 19.0
  episode_reward_mean: -46.27272727272727
  episode_reward_min: -149.0
  episodes_this_iter: 66
  episodes_total: 866
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.21910513937473297
          entropy_coeff: 0.0
          kl: 0.0033637864980846643
          model: {}
          policy_loss: -0.010359002277255058
          total_loss: 27.367115020751953
          vf_explained_var: 0.7801476716995239
          vf_loss: 27.376802444458008
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_res

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:3475,20,512,20000,13,296.288,260000,-46.2727,19,-149,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-06-25_07-56-07
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 21.0
  episode_reward_mean: -41.84
  episode_reward_min: -149.0
  episodes_this_iter: 67
  episodes_total: 933
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.20801791548728943
          entropy_coeff: 0.0
          kl: 0.0031978939659893513
          model: {}
          policy_loss: -0.00948968157172203
          total_loss: 25.699951171875
          vf_explained_var: 0.7911882400512695
          vf_loss: 25.709123611450195
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 2
  node_i

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:3475,20,512,20000,14,318.609,280000,-41.84,21,-149,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-06-25_07-56-30
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 21.0
  episode_reward_mean: -36.46
  episode_reward_min: -153.0
  episodes_this_iter: 67
  episodes_total: 1000
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.194907084107399
          entropy_coeff: 0.0
          kl: 0.003489319235086441
          model: {}
          policy_loss: -0.010363240726292133
          total_loss: 24.595304489135742
          vf_explained_var: 0.7981825470924377
          vf_loss: 24.60549545288086
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 3
  node_

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:3475,20,512,20000,15,341.067,300000,-36.46,21,-153,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-06-25_07-56-52
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 19.0
  episode_reward_mean: -35.18
  episode_reward_min: -153.0
  episodes_this_iter: 66
  episodes_total: 1066
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.18253228068351746
          entropy_coeff: 0.0
          kl: 0.0023180863354355097
          model: {}
          policy_loss: -0.007743495516479015
          total_loss: 23.988557815551758
          vf_explained_var: 0.8014258146286011
          vf_loss: 23.99624252319336
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 4
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:3475,20,512,20000,16,363.509,320000,-35.18,19,-153,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-06-25_07-57-15
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 25.0
  episode_reward_mean: -29.84
  episode_reward_min: -112.0
  episodes_this_iter: 67
  episodes_total: 1133
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.17349490523338318
          entropy_coeff: 0.0
          kl: 0.003071987070143223
          model: {}
          policy_loss: -0.008360577747225761
          total_loss: 23.019790649414062
          vf_explained_var: 0.8102847933769226
          vf_loss: 23.02811050415039
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 5
  no

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,RUNNING,172.28.0.2:3475,20,512,20000,17,386.093,340000,-29.84,25,-112,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


Result for PPO_WasteNetEnv_8c324_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-06-25_07-57-37
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: 29.0
  episode_reward_mean: -22.56
  episode_reward_min: -106.0
  episodes_this_iter: 67
  episodes_total: 1200
  experiment_id: 819930c5485f4fcdb4a26d5014668a4f
  hostname: c5de5901e1d0
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.16453750431537628
          entropy_coeff: 0.0
          kl: 0.0036028986796736717
          model: {}
          policy_loss: -0.008074185810983181
          total_loss: 20.57709503173828
          vf_explained_var: 0.8261702060699463
          vf_loss: 20.58514976501465
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 6
  n

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,18,408.662,360000,-22.56,29,-106,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00005,PAUSED,,12,153,12000,26,372.808,292000,-70.83,1,-161,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00002,PENDING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300


[2m[36m(pid=3593)[0m Instructions for updating:
[2m[36m(pid=3593)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3593)[0m 2021-06-25 07:57:42,500	INFO trainer.py:671 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=3593)[0m 2021-06-25 07:57:42,500	INFO trainer.py:698 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,18,408.662,360000,-22.56,29,-106,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,26,372.808,292000,-70.83,1,-161,300


[2m[36m(pid=3590)[0m Instructions for updating:
[2m[36m(pid=3590)[0m experimental_compile is deprecated, use jit_compile instead
[2m[36m(pid=3590)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3590)[0m are not present in its tracked objects:
[2m[36m(pid=3590)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3590)[0m It is possible that this is intended behavior, but it is more likely
[2m[36m(pid=3590)[0m an omission. This is a strong indication that this layer should be
[2m[36m(pid=3590)[0m formulated as a subclassed Layer rather than a Lambda layer.
[2m[36m(pid=3593)[0m The following Variables were used a Lambda layer's call (lambda), but
[2m[36m(pid=3593)[0m are not present in its tracked objects:
[2m[36m(pid=3593)[0m   <tf.Variable 'default_policy/log_std:0' shape=(1,) dtype=float32>
[2m[36m(pid=3593)[0m It is possible that this is intended behavior, but it is more likely
[2

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_WasteNetEnv_8c324_00002,RUNNING,,20,2048,10000,36,365.104,360000,-55.74,7,-137,300
PPO_WasteNetEnv_8c324_00000,PAUSED,,20,512,20000,18,408.662,360000,-22.56,29,-106,300
PPO_WasteNetEnv_8c324_00001,PAUSED,,20,512,20000,18,407.923,360000,-21.23,27,-105,300
PPO_WasteNetEnv_8c324_00003,PAUSED,,6,122,14400,22,397.358,400860,-89.58,-29,-175,300
PPO_WasteNetEnv_8c324_00004,PAUSED,,10,128,10000,30,384.725,300000,-22.91,34,-162,300
PPO_WasteNetEnv_8c324_00006,PAUSED,,8,153,12000,29,383.745,308000,-16.52,38,-85,300
PPO_WasteNetEnv_8c324_00007,PAUSED,,10,128,20000,15,381.21,300000,-28.27,27,-112,300
PPO_WasteNetEnv_8c324_00005,PENDING,,12,153,12000,26,372.808,292000,-70.83,1,-161,300


In [None]:
best_config = {
    "observation_filter": "MeanStdFilter",
    "model": {"free_log_std": True},
    "num_sgd_iter": 10,
    "sgd_minibatch_size": 128,
    "lambda": 0.731396,
    "clip_param": 0.317651,
    "lr": 5e-05,
    "train_batch_size": 18812,
}

### Training

In [None]:
ppo = PPOAgent("wastenet_ppo_train", best_config, WasteNetEnv, {})
ppo.train(num_iter=200)

In [None]:
policy = ppo.agent.get_policy()
model = policy.model
print(model.base_model.summary())

### Testing

In [None]:
# ppo = PPOAgent("wastenet_ppo_test", best_config, WasteNetEnv, {})
# ppo.load("checkpoints/checkpoint-best")
ppo.test(num_episodes=1000)

### Visualization

In [None]:
!zip -r /content/ray_results.zip /content/ray_results

In [None]:
%load_ext tensorboard 
%tensorboard --logdir="/content/ray_results/"