In [1]:
## Import All Needed Libraries

import os
import gym
import ray
from animalai.envs.arena_config import ArenaConfig
from animalai.envs.gym.environment import AnimalAIGym
from ray.rllib.agents import ppo

from ray.tune import register_env, tune

from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.models import ModelCatalog
from ray.tune.logger import pretty_print

from cache_model import *
from config import get_cfg
from custom_model import *

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
## Reuse Wrapper for AnimalAI Environment

class UnityEnvWrapper(gym.Env):
    def __init__(self, env_config):
        self.vector_index = env_config.vector_index
        self.worker_index = env_config.worker_index
        self.worker_id = env_config["unity_worker_id"] + env_config.worker_index
        self.env = AnimalAIGym(
            environment_filename = "../examples/env/AnimalAI",
            worker_id = self.worker_id,
            flatten_branched = True,
            uint8_visual = True,
            arenas_configurations = ArenaConfig(env_config['arena_to_train'])
        )
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        
    def reset(self):
        return self.env.reset()
    
    def step(self, action):
        return self.env.step(action)

In [3]:
arena_configurations = ['0.yml', '1.yml', '2.yml', '3.yml', '4.yml']

result_per_arena = [0.8, 0.8, 0.8, 0.6, 0.2]
MIN_NUMBER_OF_EPISODES = 100
checkpoint = ""

for item in range(len(arena_configurations)):
    
    ## Setup configuration to use
    conf = {
        "num_workers": 0,
        "env_config": {
            "unity_worker_id": 60,
            "arena_to_train": '../examples/configurations/curriculum/' + arena_configurations[item],
            },
            "model": {
                "custom_model": 'my_cnn_rnn_model',
                "custom_model_config": {},
            },
            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "2")),
            "num_workers": 1,  # parallelism
            "framework": "torch",
            "train_batch_size": 500
           }
    
    
    ## Setup and register environment
    ray.shutdown()
    ray.init()

    # Register custom models so that we can give the ID to the policy trainer
    # ModelCatalog.register_custom_model("my_fc_model", MyFCForwardModel)
    ModelCatalog.register_custom_model("my_rnn_model", MyRNNModel)
    ModelCatalog.register_custom_model("my_convgru_model", MyConvGRUModel)  # NOTE: Only works with image observations.
    ModelCatalog.register_custom_model("my_cnn_rnn_model", MyCNNRNNModel)

    register_env("unity_env", lambda config: UnityEnvWrapper(config))
    
    ## Setup trainer
    trainer = PPOTrainer(config=conf, env= "unity_env")
    
    ## Loading and restoring a trained agent from a checkpoint 
    if checkpoint:    
        trainer.restore(checkpoint)
    
    result = trainer.train()
    min_episodes = MIN_NUMBER_OF_EPISODES
    
    ## Train the model until performance improves to a certain threshold
    while result['episode_reward_mean'] < result_per_arena[item] and min_episodes > 0 and not result['done']:
        result = trainer.train()
        min_episodes -= 1

    checkpoint = trainer.save()
    print("checkpoint saved at", checkpoint)
    print(pretty_print(result))

2021-04-05 15:24:51,950	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
2021-04-05 15:24:53,610	INFO trainer.py:643 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=54642)[0m Instructions for updating:
[2m[36m(pid=54642)[0m non-resource variables are not supported in the long term


[2m[36m(pid=54642)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=54642)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=54642)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=54642)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=54642)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=54642)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=54642)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=54642)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=54642)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log


[2m[36m(pid=54642)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=54642)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=54642)[0m AnimalAI?team=0
[2m[36m(pid=54642)[0m INFO:gym_unity:1 agents within environment.


checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_15-24-53jft6kf3m/checkpoint_3/checkpoint-3
custom_metrics: {}
date: 2021-04-05_15-26-11
done: false
episode_len_mean: 35.46
episode_reward_max: 2.1800550371408463
episode_reward_mean: 0.823587782047689
episode_reward_min: -0.9999999310821295
episodes_this_iter: 31
episodes_total: 50
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.45000000000000007
      cur_lr: 5.0e-05
      entropy: 2.06179416179657
      entropy_coeff: 0.0
      kl: 0.023325731977820396
      policy_loss: -0.18025227123871446
      total_loss: -0.12271115649491549
      vf_explained_var: 0.7569695115089417
      vf_loss: 0.04704452259466052
  num_steps_sampled: 1800
  num_steps_trained: 1800
iterations_since_restore: 3
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 38.52
  ram_ut

2021-04-05 15:26:13,985	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=55272)[0m Instructions for updating:
[2m[36m(pid=55272)[0m non-resource variables are not supported in the long term
[2m[36m(pid=55272)[0m Instructions for updating:
[2m[36m(pid=55272)[0m non-resource variables are not supported in the long term


[2m[36m(pid=55272)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=55272)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=55272)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=55272)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=55272)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=55272)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=55272)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=55272)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=55272)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log
[2m[36m(pid=55272)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=55272)[0m Mono path[0] = '/home/azibit/COMS_

[2m[36m(pid=55272)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=55272)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=55272)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=55272)[0m AnimalAI?team=0
[2m[36m(pid=55272)[0m INFO:gym_unity:1 agents within environment.
[2m[36m(pid=55272)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=55272)[0m AnimalAI?team=0
[2m[36m(pid=55272)[0m INFO:gym_unity:1 agents within environment.
2021-04-05 15:26:21,377	INFO trainable.py:372 -- Restored on 192.168.1.4 from checkpoint: /home/azibit/ray_results/PPO_unity_env_2021-04-05_15-24-53jft6kf3m/checkpoint_3/checkpoint-3
2021-04-05 15:26:21,378	INFO trainable.py:379 -- Current state after restoring: {'_iteration': 3, '_timesteps_total': None, '_time_total': 71.59993720054626, '_episodes_total': 50

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_15-26-153e9hko9h/checkpoint_104/checkpoint-104
custom_metrics: {}
date: 2021-04-05_16-14-50
done: false
episode_len_mean: 101.84
episode_reward_max: 0.8763333689421415
episode_reward_mean: 0.4229967012722045
episode_reward_min: -0.9999999310821295
episodes_this_iter: 5
episodes_total: 414
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.8202696740627289
      entropy_coeff: 0.0
      kl: 0.010851967614144087
      policy_loss: -0.2751939631998539
      total_loss: -0.22057853639125824
      vf_explained_var: 0.48086193203926086
      vf_loss: 0.03813400189392269
  num_steps_sampled: 62400
  num_steps_trained: 62400
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 36.

2021-04-05 16:14:53,396	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=59662)[0m Instructions for updating:
[2m[36m(pid=59662)[0m non-resource variables are not supported in the long term
[2m[36m(pid=59662)[0m Instructions for updating:
[2m[36m(pid=59662)[0m non-resource variables are not supported in the long term
[2m[36m(pid=59662)[0m Instructions for updating:
[2m[36m(pid=59662)[0m non-resource variables are not supported in the long term


[2m[36m(pid=59662)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=59662)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=59662)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=59662)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=59662)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=59662)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=59662)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=59662)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/Animal

[2m[36m(pid=59662)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=59662)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=59662)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=59662)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=59662)[0m AnimalAI?team=0
[2m[36m(pid=59662)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=59662)[0m AnimalAI?team=0
[2m[36m(pid=59662)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=59662)[0m AnimalAI?team=0
[2m[36m(pid=59662)[0m INFO:gym_unity:1 agents within environment.
[2m[36m(pid=59662)[0m INFO:gym_unity:1 agents within environment.
[2m[36m(pid=59662)[0m INFO:gym_unity:1 agents within environment.
2021-04-05 16:15:01,712	INFO trainable.py:3

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_16-14-55b2y29l2j/checkpoint_205/checkpoint-205
custom_metrics: {}
date: 2021-04-05_17-03-36
done: false
episode_len_mean: 135.71
episode_reward_max: 0.8870000122115016
episode_reward_mean: 0.05737337528727949
episode_reward_min: -0.9999999310821295
episodes_this_iter: 7
episodes_total: 803
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.6005148410797119
      entropy_coeff: 0.0
      kl: 0.010617264173924923
      policy_loss: -0.113603431917727
      total_loss: -0.07719312533736229
      vf_explained_var: 0.7046387791633606
      vf_loss: 0.012222838308662175
  num_steps_sampled: 123000
  num_steps_trained: 123000
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 17.50882352

2021-04-05 17:03:39,100	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=64805)[0m Instructions for updating:
[2m[36m(pid=64805)[0m non-resource variables are not supported in the long term
[2m[36m(pid=64805)[0m Instructions for updating:
[2m[36m(pid=64805)[0m non-resource variables are not supported in the long term
[2m[36m(pid=64805)[0m Instructions for updating:
[2m[36m(pid=64805)[0m non-resource variables are not supported in the long term
[2m[36m(pid=64805)[0m Instructions for updating:
[2m[36m(pid=64805)[0m non-resource variables are not supported in the long term


[2m[36m(pid=64805)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=64805)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=64805)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=64805)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=64805)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=64805)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=64805)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=64805)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=64805)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log
[2m[36m(pid=64805)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=64805)[0m Mono path[0] = '/home/azibit/COMS_

[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=64805)[0m AnimalAI?team=0
[2m[36m(pid=64805)[0m INFO:gym_unity:1 agents within environment.
[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=64805)[0m AnimalAI?team=0
[2m[36m(pid=64805)[0m INFO:gym_unity:1 agents within environment.
[2m[36m(pid=64805)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_17-03-40pucr37ea/checkpoint_306/checkpoint-306
custom_metrics: {}
date: 2021-04-05_17-38-28
done: false
episode_len_mean: 245.87
episode_reward_max: 0.489666691981256
episode_reward_mean: -0.9034265980590135
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1047
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187500000000003
      cur_lr: 5.0e-05
      entropy: 1.667035174369812
      entropy_coeff: 0.0
      kl: 0.013021756522357463
      policy_loss: -0.09588294923305511
      total_loss: -0.07088536769151688
      vf_explained_var: 0.4998610019683838
      vf_loss: 0.0052207885310053825
  num_steps_sampled: 183600
  num_steps_trained: 183600
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent:

2021-04-05 17:38:31,053	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=68242)[0m Instructions for updating:
[2m[36m(pid=68242)[0m non-resource variables are not supported in the long term
[2m[36m(pid=68242)[0m Instructions for updating:
[2m[36m(pid=68242)[0m non-resource variables are not supported in the long term
[2m[36m(pid=68242)[0m Instructions for updating:
[2m[36m(pid=68242)[0m non-resource variables are not supported in the long term
[2m[36m(pid=68242)[0m Instructions for updating:
[2m[36m(pid=68242)[0m non-resource variables are not supported in the long term
[2m[36m(pid=68242)[0m Instructions for updating:
[2m[36m(pid=68242)[0m non-resource variables are not supported in the long term


[2m[36m(pid=68242)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=68242)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=68242)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=68242)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=68242)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=68242)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=68242)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=68242)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/Animal

[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=68242)[0m AnimalAI?team=0
[2m[36m(pid=68242)[0m INFO:gym_unity:1 agents within environment.
[2m[36m(pid=68242)[0m INFO:mlagents_envs:Connected new brain:
[2m[36m(pid=68242)[0m AnimalAI?team=0

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_17-38-31ajn4r3p6/checkpoint_407/checkpoint-407
custom_metrics: {}
date: 2021-04-05_18-15-04
done: false
episode_len_mean: 248.8
episode_reward_max: 0.481666692532599
episode_reward_mean: -0.9851832648459822
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 1290
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5851064920425415
      entropy_coeff: 0.0
      kl: 0.012575673684477807
      policy_loss: -0.10114725157618523
      total_loss: -0.06987666636705399
      vf_explained_var: 0.6344132423400879
      vf_loss: 0.0026216533384285866
  num_steps_sampled: 244200
  num_steps_trained: 244200
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 26.137837

In [5]:
## Training for Object permanence and spatial reasoning
arena_configurations = ['1_behind.yml', '2_inside.yml', '3_where_is_object.yml', 
                        '4_previously_seen_object.yml', '5_inside_or_behind.yml', '6_spatial_reasoning.yml']

result_per_arena = [0.8, 0.8, 0.8, 0.7, 0.7, 0.6]
MIN_NUMBER_OF_EPISODES = 100
checkpoint = "/home/azibit/ray_results/PPO_unity_env_2021-04-05_17-38-31ajn4r3p6/checkpoint_407/checkpoint-407"

for item in range(len(arena_configurations)):
    
    ## Setup configuration to use
    conf = {
        "num_workers": 0,
        "env_config": {
            "unity_worker_id": 60,
            "arena_to_train": '../examples/configurations/curriculum/' + arena_configurations[item],
            },
            "model": {
                "custom_model": 'my_cnn_rnn_model',
                "custom_model_config": {},
            },
            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "2")),
            "num_workers": 1,  # parallelism
            "framework": "torch",
            "train_batch_size": 500
           }
    
    
    ## Setup and register environment
    ray.shutdown()
    ray.init()

    # Register custom models so that we can give the ID to the policy trainer
    # ModelCatalog.register_custom_model("my_fc_model", MyFCForwardModel)
    ModelCatalog.register_custom_model("my_rnn_model", MyRNNModel)
    ModelCatalog.register_custom_model("my_convgru_model", MyConvGRUModel)  # NOTE: Only works with image observations.
    ModelCatalog.register_custom_model("my_cnn_rnn_model", MyCNNRNNModel)

    register_env("unity_env", lambda config: UnityEnvWrapper(config))
    
    ## Setup trainer
    trainer = PPOTrainer(config=conf, env= "unity_env")
    
    ## Loading and restoring a trained agent from a checkpoint 
    if checkpoint:    
        trainer.restore(checkpoint)
    
    result = trainer.train()
    min_episodes = MIN_NUMBER_OF_EPISODES
    
    ## Train the model until performance improves to a certain threshold
    while result['episode_reward_mean'] < result_per_arena[item] and min_episodes > 0 and not result['done']:
        result = trainer.train()
        min_episodes -= 1

    checkpoint = trainer.save()
    print("checkpoint saved at", checkpoint)
    print(pretty_print(result))

2021-04-05 22:36:21,124	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90739)[0m Instructions for updating:
[2m[36m(pid=90739)[

[2m[36m(pid=90739)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=90739)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=90739)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=90739)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=90739)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=90739)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=90739)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=90739)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=90739)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log
[2m[36m(pid=90739)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=90739)[0m Mono path[0] = '/home/azibit/COMS_

[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=90739)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_22-36-21xmetbcrb/checkpoint_508/checkpoint-508
custom_metrics: {}
date: 2021-04-05_23-13-43
done: false
episode_len_mean: 130.03
episode_reward_max: 0.7923333747312427
episode_reward_mean: 0.20009337390773
episode_reward_min: -0.9999999310821295
episodes_this_iter: 5
episodes_total: 1625
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.5507797300815582
      entropy_coeff: 0.0
      kl: 0.013586960267275572
      policy_loss: -0.19848722219467163
      total_loss: -0.09353464841842651
      vf_explained_var: 0.024836093187332153
      vf_loss: 0.0739997299388051
  num_steps_sampled: 304800
  num_steps_trained: 304800
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 19.29333333

2021-04-05 23:13:45,997	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[0m non-resource variables are not supported in the long term
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[0m non-resource variables are not supported in the long term
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[0m non-resource variables are not supported in the long term
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[0m non-resource variables are not supported in the long term
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[0m non-resource variables are not supported in the long term
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[0m non-resource variables are not supported in the long term
[2m[36m(pid=95742)[0m Instructions for updating:
[2m[36m(pid=95742)[

[2m[36m(pid=95742)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=95742)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=95742)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=95742)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=95742)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=95742)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=95742)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=95742)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=95742)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=95742)[0m Preloaded 'libgrpc

[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=95742)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_23-13-47b0l5ap7r/checkpoint_509/checkpoint-509
custom_metrics: {}
date: 2021-04-05_23-14-13
done: false
episode_len_mean: 66.0
episode_reward_max: 0.985800038673915
episode_reward_mean: 0.985800038673915
episode_reward_min: 0.985800038673915
episodes_this_iter: 1
episodes_total: 1626
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.2
      cur_lr: 5.0e-05
      entropy: 1.470314770936966
      entropy_coeff: 0.0
      kl: 0.024587628431618214
      policy_loss: -0.23923810571432114
      total_loss: -0.2006707526743412
      vf_explained_var: -0.6498035788536072
      vf_loss: 0.033649828867055476
  num_steps_sampled: 305400
  num_steps_trained: 305400
iterations_since_restore: 1
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 22.644827586206894
  ra

2021-04-05 23:14:16,304	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[0m non-resource variables are not supported in the long term
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[0m non-resource variables are not supported in the long term
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[0m non-resource variables are not supported in the long term
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[0m non-resource variables are not supported in the long term
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[0m non-resource variables are not supported in the long term
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[0m non-resource variables are not supported in the long term
[2m[36m(pid=96050)[0m Instructions for updating:
[2m[36m(pid=96050)[

[2m[36m(pid=96050)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=96050)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=96050)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=96050)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=96050)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=96050)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=96050)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=96050)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=96050)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log
[2m[36m(pid=96050)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=96050)[0m Mono path[0] = '/home/azibit/COMS_

[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=96050)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version

checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_23-14-16s5qm07gh/checkpoint_610/checkpoint-610
custom_metrics: {}
date: 2021-04-05_23-47-19
done: false
episode_len_mean: 40.0
episode_reward_max: -1.0000000894069672
episode_reward_mean: -1.0000000894069672
episode_reward_min: -1.0000000894069672
episodes_this_iter: 15
episodes_total: 3141
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.4283073246479034
      entropy_coeff: 0.0
      kl: 0.014346184208989143
      policy_loss: 0.004215223714709282
      total_loss: 0.04179629310965538
      vf_explained_var: 0.922919511795044
      vf_loss: 0.004898671293631196
  num_steps_sampled: 366000
  num_steps_trained: 366000
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.2115384

2021-04-05 23:47:21,120	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m(pid=100869)[0m non-resource variables are not supported in the long term
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m(pid=100869)[0m non-resource variables are not supported in the long term
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m(pid=100869)[0m non-resource variables are not supported in the long term
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m(pid=100869)[0m non-resource variables are not supported in the long term
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m(pid=100869)[0m non-resource variables are not supported in the long term
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m(pid=100869)[0m non-resource variables are not supported in the long term
[2m[36m(pid=100869)[0m Instructions for updating:
[2m[36m

[2m[36m(pid=100869)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=100869)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=100869)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=100869)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=100869)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=100869)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=100869)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=100869)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=100869)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log
[2m[36m(pid=100869)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=100869)[0m Mono path[0] = '/home/a

[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=100869)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication 

2021-04-05 23:47:29,910	INFO trainable.py:379 -- Current state after restoring: {'_iteration': 610, '_timesteps_total': None, '_time_total': 14388.659699201584, '_episodes_total': 3141}


checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-05_23-47-224n0bt5zf/checkpoint_711/checkpoint-711
custom_metrics: {}
date: 2021-04-06_00-20-25
done: false
episode_len_mean: 463.17
episode_reward_max: 0.8996666637249291
episode_reward_mean: -0.8363699352554977
episode_reward_min: -0.9999999310821295
episodes_this_iter: 2
episodes_total: 3271
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 2.278125
      cur_lr: 5.0e-05
      entropy: 1.2387224733829498
      entropy_coeff: 0.0
      kl: 0.010367344482801855
      policy_loss: 0.20452000945806503
      total_loss: 0.22980708628892899
      vf_explained_var: -1.0
      vf_loss: 0.0016690015472704545
  num_steps_sampled: 426600
  num_steps_trained: 426600
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.123076923076923
  ra

2021-04-06 00:20:28,006	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m(pid=106316)[0m non-resource variables are not supported in the long term
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m(pid=106316)[0m non-resource variables are not supported in the long term
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m(pid=106316)[0m non-resource variables are not supported in the long term
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m(pid=106316)[0m non-resource variables are not supported in the long term
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m(pid=106316)[0m non-resource variables are not supported in the long term
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m(pid=106316)[0m non-resource variables are not supported in the long term
[2m[36m(pid=106316)[0m Instructions for updating:
[2m[36m

[2m[36m(pid=106316)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=106316)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=106316)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=106316)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=106316)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=106316)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=106316)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=106316)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/en

[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=106316)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication 

2021-04-06 00:20:35,147	INFO trainable.py:372 -- Restored on 192.168.1.4 from checkpoint: /home/azibit/ray_results/PPO_unity_env_2021-04-05_23-47-224n0bt5zf/checkpoint_711/checkpoint-711
2021-04-06 00:20:35,147	INFO trainable.py:379 -- Current state after restoring: {'_iteration': 711, '_timesteps_total': None, '_time_total': 16362.801066875458, '_episodes_total': 3271}


checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-06_00-20-28pxxm9n22/checkpoint_812/checkpoint-812
custom_metrics: {}
date: 2021-04-06_00-56-06
done: false
episode_len_mean: 16.1
episode_reward_max: 0.6790000386536121
episode_reward_mean: 0.6779333711415529
episode_reward_min: 0.6523333601653576
episodes_this_iter: 38
episodes_total: 6456
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.854296875
      cur_lr: 5.0e-05
      entropy: 0.8145612597465515
      entropy_coeff: 0.0
      kl: 0.013488577120006085
      policy_loss: -0.03631351934745908
      total_loss: -0.022879181802272795
      vf_explained_var: 0.7892518639564514
      vf_loss: 0.0019110968336462975
  num_steps_sampled: 487200
  num_steps_trained: 487200
iterations_since_restore: 101
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 18.256

2021-04-06 00:56:09,257	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m(pid=110249)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m(pid=110249)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m(pid=110249)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m(pid=110249)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m(pid=110249)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m(pid=110249)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110249)[0m Instructions for updating:
[2m[36m

[2m[36m(pid=110249)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=110249)[0m Mono path[0] = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/Managed'
[2m[36m(pid=110249)[0m Mono config path = '/home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI_Data/MonoBleedingEdge/etc'
[2m[36m(pid=110249)[0m Preloaded 'lib_burst_generated.so'
[2m[36m(pid=110249)[0m Preloaded 'libgrpc_csharp_ext.x64.so'
[2m[36m(pid=110249)[0m Preloaded 'ScreenSelector.so'
[2m[36m(pid=110249)[0m Display 0 '0': 1920x1080 (primary device).
[2m[36m(pid=110249)[0m Display 1 'C32F391 32"': 1920x1080 (secondary device).
[2m[36m(pid=110249)[0m Logging to /home/azibit/.config/unity3d/Unity Technologies/UnityEnvironment/Player.log
[2m[36m(pid=110249)[0m Found path: /home/azibit/COMS_673/AnimalAI-Olympics/cachey/../examples/env/AnimalAI.x86_64
[2m[36m(pid=110249)[0m Mono path[0] = '/home/a

[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication version 0.15.0
[2m[36m(pid=110249)[0m INFO:mlagents_envs:Connected to Unity environment with package version 0.15.0-preview and communication 

2021-04-06 00:56:15,541	INFO trainable.py:372 -- Restored on 192.168.1.4 from checkpoint: /home/azibit/ray_results/PPO_unity_env_2021-04-06_00-20-28pxxm9n22/checkpoint_812/checkpoint-812
2021-04-06 00:56:15,541	INFO trainable.py:379 -- Current state after restoring: {'_iteration': 812, '_timesteps_total': None, '_time_total': 18493.03905749321, '_episodes_total': 6456}


checkpoint saved at /home/azibit/ray_results/PPO_unity_env_2021-04-06_00-56-093qzpcrxa/checkpoint_813/checkpoint-813
custom_metrics: {}
date: 2021-04-06_00-56-33
done: false
episode_len_mean: .nan
episode_reward_max: .nan
episode_reward_mean: .nan
episode_reward_min: .nan
episodes_this_iter: 0
episodes_total: 6456
experiment_id: 8282e29d6bd049c5b9713d59cbf0a763
hostname: azibit-Lenovo-Y520-15IKBM
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.2
      cur_lr: 5.0e-05
      entropy: 0.9787910431623459
      entropy_coeff: 0.0
      kl: 0.02692586090415716
      policy_loss: -0.05564086697995663
      total_loss: -0.03235322143882513
      vf_explained_var: -1.0
      vf_loss: 0.017902474151924253
  num_steps_sampled: 487800
  num_steps_trained: 487800
iterations_since_restore: 1
node_ip: 192.168.1.4
num_healthy_workers: 1
off_policy_estimator: {}
perf:
  cpu_util_percent: 20.1
  ram_util_percent: 90.4925925925926
pid: 54132
policy_reward_max: {}
p