In [1]:
import inspect
import time
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent
from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers.EnumActionWrapper import EnumActionWrapper
from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
from CybORG.Agents.Wrappers.ReduceActionSpaceWrapper import ReduceActionSpaceWrapper
from CybORG.Agents.Wrappers import ChallengeWrapper
import os

from ray.tune.registry import register_env
from CybORG.Agents.Wrappers.rllib_wrapper import RLlibWrapper
import warnings
import numpy as np
from ray import air, tune
warnings.filterwarnings('ignore')

In [2]:
def env_creator(env_config: dict):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = RLlibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
    return env

def print_results(results_dict):
    train_iter = results_dict["training_iteration"]
    r_mean = results_dict["episode_reward_mean"]
    r_max = results_dict["episode_reward_max"]
    r_min = results_dict["episode_reward_min"]
    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")

register_env(name="CybORG", env_creator=env_creator)

In [None]:
from ray.rllib.agents.callbacks import RE3UpdateCallbacks

class RE3Callbacks(RE3UpdateCallbacks, config["callbacks"]):
            pass

"callbacks": = RE3Callbacks
"exploration_config": = {
    "type": "RE3"}

In [None]:
from ray import air, tune

tune.Tuner(
        "PPO",
        run_config=air.RunConfig(
            stop={"timesteps_total": 1.5e6},
            local_dir='results/APPO', name="tune",
            checkpoint_config=air.CheckpointConfig(
                checkpoint_frequency=500, 
            ),
        ),
        param_space={
            # CC3 specific.
            "env": "CybORG",
            # General
            "num_gpus": 1,
            "num_workers": 30,
            "horizon": 100,
            "num_envs_per_worker": 2,
            #algo params
            "train_batch_size": tune.grid_search([6000, 4000, 2000]),
            "lr": tune.grid_search([0.0005, 0.0001]),
            "gamma": tune.grid_search([0.97]),
            "framework": 'tf',
            "model": {
                    "fcnet_hiddens": tune.grid_search([[512, 512],[256, 256]]),
                    "fcnet_activation": tune.grid_search(["relu", "tanh"])
                },
            
 
        },
    ).fit()

2022-12-01 23:26:05,857	INFO worker.py:1528 -- Started a local Ray instance.


0,1
Current time:,2022-12-02 11:10:01
Running for:,11:43:53.81
Memory:,79.0/125.8 GiB

Trial name,status,loc,gamma,lr,model/fcnet_activati on,model/fcnet_hiddens,train_batch_size,iter,total time (s),ts,reward,num_recreated_worker s,episode_reward_max,episode_reward_min
PPO_CybORG_886ac_00013,RUNNING,172.28.0.2:25182,0.97,0.0001,relu,"[256, 256]",4000,286.0,2311.51,1149720.0,-46.454,0.0,-9.8,-156.8
PPO_CybORG_886ac_00014,PENDING,,0.97,0.0005,tanh,"[256, 256]",4000,,,,,,,
PPO_CybORG_886ac_00015,PENDING,,0.97,0.0001,tanh,"[256, 256]",4000,,,,,,,
PPO_CybORG_886ac_00016,PENDING,,0.97,0.0005,relu,"[512, 512]",2000,,,,,,,
PPO_CybORG_886ac_00017,PENDING,,0.97,0.0001,relu,"[512, 512]",2000,,,,,,,
PPO_CybORG_886ac_00018,PENDING,,0.97,0.0005,tanh,"[512, 512]",2000,,,,,,,
PPO_CybORG_886ac_00019,PENDING,,0.97,0.0001,tanh,"[512, 512]",2000,,,,,,,
PPO_CybORG_886ac_00020,PENDING,,0.97,0.0005,relu,"[256, 256]",2000,,,,,,,
PPO_CybORG_886ac_00021,PENDING,,0.97,0.0001,relu,"[256, 256]",2000,,,,,,,
PPO_CybORG_886ac_00022,PENDING,,0.97,0.0005,tanh,"[256, 256]",2000,,,,,,,


[2m[36m(PPO pid=40917)[0m 2022-12-01 23:26:15,314	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=40917)[0m 2022-12-01 23:26:15,314	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=40917)[0m 2022-12-01 23:26:15,316	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=40917)[0m 2022-12-01 23:26:33,721	INFO trainable.py:164 -- Trainable.setup took 18.409 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_recreated_workers,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
PPO_CybORG_886ac_00000,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_00-16-05,True,100,{},-9.4,-83.894,-942.8,60,15000,b5e399f05eaf4fa3bf5cab998ee1c0c1,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 0.0005000000237487257, 'total_loss': 6.145261, 'policy_loss': -0.072524495, 'vf_loss': 6.214124, 'vf_explained_var': -0.4529026, 'kl': 0.018305715, 'entropy': 1.9857411, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 34.099999999999994, 'ram_util_percent': 63.2}",40917,{},{},{},"{'mean_raw_obs_processing_ms': 3.824441979991765, 'mean_inference_ms': 2.0151192474298023, 'mean_action_processing_ms': 0.20327077246115244, 'mean_env_wait_ms': 23.244759228610484, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -9.39999999999999, 'episode_reward_min': -942.8000000000012, 'episode_reward_mean': -83.89399999999996, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-11.999999999999988, -93.2, -109.69999999999989, -112.79999999999986, -100.8, -81.59999999999981, -14.399999999999993, -13.399999999999988, -120.19999999999999, -102.69999999999969, -71.79999999999987, -108.9, -12.89999999999998, -11.399999999999993, -30.70000000000006, -15.099999999999984, -111.79999999999981, -12.89999999999998, -123.79999999999983, -12.099999999999987, -12.49999999999998, -11.699999999999983, -17.599999999999984, -99.7999999999999, -126.7999999999997, -80.69999999999999, -105.7, -165.29999999999995, -430.6, -29.800000000000107, -30.700000000000088, -105.4, -114.4, -30.40000000000005, -84.1, -98.6, -54.700000000000095, -40.80000000000005, -115.79999999999978, -165.9, -124.19999999999978, -104.79999999999984, -168.79999999999956, -123.79999999999978, -64.80000000000007, -20.700000000000017, -104.2, -121.69999999999975, -24.800000000000036, -144.2, -104.1, -14.799999999999986, -15.299999999999978, -45.79999999999999, -117.79999999999978, -104.40000000000005, -111.2, -135.9, -16.499999999999996, -13.79999999999999, -9.39999999999999, -13.799999999999994, -942.8000000000012, -111.2, -96.79999999999988, -155.29999999999998, -24.800000000000022, -84.10000000000001, -68.10000000000004, -105.79999999999986, -102.2, -93.7999999999999, -21.800000000000022, -101.2, -14.199999999999985, -99.4, -109.79999999999981, -12.799999999999983, -115.59999999999975, -13.499999999999975, -76.79999999999998, -19.8, -101.8, -55.80000000000006, -122.79999999999981, -22.20000000000005, -108.4, -15.999999999999984, -14.599999999999985, -76.69999999999996, -86.79999999999995, -14.299999999999992, -15.699999999999982, -96.9, -16.799999999999986, -100.69999999999986, -20.800000000000043, -101.79999999999987, -16.39999999999999, -132.79999999999978], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.824441979991765, 'mean_inference_ms': 2.0151192474298023, 'mean_action_processing_ms': 0.20327077246115244, 'mean_env_wait_ms': 23.244759228610484, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2959.57,11.799,2959.57,"{'training_iteration_time_ms': 11794.659, 'load_time_ms': 2.856, 'load_throughput': 2100530.353, 'learn_time_ms': 8558.128, 'learn_throughput': 701.088, 'synch_weights_time_ms': 12.22}",1669940165,0,1500000,250,886ac_00000,18.4293
PPO_CybORG_886ac_00001,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_01-06-51,True,100,{},-11.4,-66.647,-550.5,60,15000,440e92abfd344946842008ea29b0e9af,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 9.999999747378752e-05, 'total_loss': 6.248784, 'policy_loss': -0.05048617, 'vf_loss': 6.2960596, 'vf_explained_var': -0.2688931, 'kl': 0.016049987, 'entropy': 1.1900938, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 33.699999999999996, 'ram_util_percent': 63.2}",2114,{},{},{},"{'mean_raw_obs_processing_ms': 3.9504583420140955, 'mean_inference_ms': 2.408198222572037, 'mean_action_processing_ms': 0.2074477807769207, 'mean_env_wait_ms': 25.51555596637362, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -11.399999999999983, 'episode_reward_min': -550.5, 'episode_reward_mean': -66.647, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-77.8, -23.599999999999994, -178.6999999999997, -17.300000000000004, -70.30000000000003, -50.8000000000001, -51.80000000000007, -69.5, -56.8000000000001, -84.4, -11.399999999999983, -17.7, -86.4, -58.5, -72.0, -58.4, -41.10000000000004, -30.200000000000053, -33.60000000000002, -22.900000000000013, -68.80000000000003, -42.800000000000075, -51.800000000000104, -69.5, -54.70000000000008, -65.7, -26.700000000000077, -46.70000000000009, -77.19999999999996, -13.999999999999975, -26.100000000000065, -86.79999999999991, -75.79999999999998, -11.899999999999984, -264.79999999999984, -85.79999999999994, -48.80000000000008, -59.30000000000006, -51.80000000000008, -542.2, -35.80000000000005, -148.39999999999995, -17.800000000000008, -26.100000000000072, -63.9, -63.4, -46.20000000000003, -52.40000000000001, -15.299999999999985, -43.40000000000003, -47.80000000000008, -44.80000000000009, -36.10000000000005, -95.79999999999994, -71.9, -13.199999999999983, -85.79999999999995, -70.80000000000001, -52.8000000000001, -48.800000000000075, -56.80000000000008, -69.20000000000006, -11.799999999999992, -109.60000000000002, -15.699999999999976, -15.399999999999977, -227.00000000000003, -24.100000000000044, -26.200000000000063, -38.400000000000084, -85.69999999999995, -45.800000000000075, -93.69999999999987, -53.500000000000085, -13.799999999999981, -71.8, -11.499999999999982, -54.80000000000008, -65.5, -133.79999999999976, -53.80000000000009, -51.70000000000009, -168.39999999999998, -550.5, -51.800000000000075, -66.5, -17.499999999999986, -66.4, -83.69999999999997, -27.900000000000098, -34.600000000000065, -66.4, -19.80000000000002, -24.400000000000027, -18.499999999999993, -56.70000000000009, -15.899999999999977, -21.700000000000006, -30.700000000000045, -48.800000000000104], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.9504583420140955, 'mean_inference_ms': 2.408198222572037, 'mean_action_processing_ms': 0.2074477807769207, 'mean_env_wait_ms': 25.51555596637362, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",3003.97,11.9684,3003.97,"{'training_iteration_time_ms': 11988.926, 'load_time_ms': 2.598, 'load_throughput': 2309448.008, 'learn_time_ms': 8211.345, 'learn_throughput': 730.696, 'synch_weights_time_ms': 11.969}",1669943211,0,1500000,250,886ac_00001,20.2123
PPO_CybORG_886ac_00002,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_01-57-30,True,100,{},-10.7,-80.869,-611.8,60,15000,a5c2b14e81364c64aeafb5216f836c0f,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 0.0005000000237487257, 'total_loss': 6.806364, 'policy_loss': -0.06868108, 'vf_loss': 6.8607855, 'vf_explained_var': -0.24313569, 'kl': 0.07129704, 'entropy': 1.7946182, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 33.73529411764706, 'ram_util_percent': 63.2}",4024,{},{},{},"{'mean_raw_obs_processing_ms': 4.135767056278162, 'mean_inference_ms': 2.6593114237437656, 'mean_action_processing_ms': 0.21091466330576647, 'mean_env_wait_ms': 26.55626631762311, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -10.699999999999983, 'episode_reward_min': -611.8000000000012, 'episode_reward_mean': -80.86899999999993, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-84.69999999999996, -135.7999999999997, -140.79999999999964, -111.79999999999987, -29.600000000000072, -98.7999999999999, -147.79999999999976, -85.69999999999999, -54.800000000000075, -110.69999999999987, -22.800000000000022, -611.8000000000012, -14.599999999999977, -101.7999999999999, -98.79999999999991, -20.7, -15.19999999999998, -16.099999999999973, -92.7999999999999, -19.40000000000002, -120.79999999999984, -15.799999999999972, -47.70000000000003, -107.79999999999991, -18.199999999999996, -49.7000000000001, -154.79999999999973, -73.80000000000004, -43.700000000000045, -21.40000000000001, -27.600000000000076, -19.099999999999998, -45.70000000000008, -159.79999999999973, -35.70000000000008, -22.700000000000003, -15.799999999999978, -28.99999999999999, -26.800000000000054, -29.40000000000009, -151.69999999999973, -23.700000000000056, -96.79999999999984, -151.69999999999976, -92.7999999999999, -104.79999999999983, -13.799999999999978, -62.80000000000008, -139.6999999999998, -151.79999999999976, -12.699999999999978, -130.7999999999998, -150.69999999999976, -135.79999999999978, -16.499999999999982, -27.70000000000006, -20.800000000000026, -128.69999999999982, -144.79999999999976, -19.39999999999999, -158.69999999999973, -151.79999999999976, -66.80000000000007, -199.79999999999964, -143.79999999999978, -12.499999999999982, -113.69999999999986, -30.80000000000006, -14.899999999999977, -121.69999999999985, -131.7999999999998, -26.800000000000043, -106.69999999999985, -156.69999999999973, -33.3, -16.599999999999984, -17.800000000000004, -15.799999999999974, -41.700000000000074, -119.79999999999977, -189.69999999999965, -148.69999999999976, -15.29999999999998, -150.79999999999976, -156.69999999999965, -151.79999999999973, -15.699999999999982, -17.19999999999999, -47.10000000000003, -15.299999999999974, -136.79999999999978, -72.80000000000004, -38.800000000000054, -113.69999999999989, -26.30000000000006, -62.80000000000006, -13.799999999999976, -15.299999999999976, -148.79999999999976, -10.699999999999983], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 4.135767056278162, 'mean_inference_ms': 2.6593114237437656, 'mean_action_processing_ms': 0.21091466330576647, 'mean_env_wait_ms': 26.55626631762311, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2995.58,11.9138,2995.58,"{'training_iteration_time_ms': 11934.252, 'load_time_ms': 2.513, 'load_throughput': 2387309.586, 'learn_time_ms': 8098.66, 'learn_throughput': 740.863, 'synch_weights_time_ms': 11.788}",1669946250,0,1500000,250,886ac_00002,20.5774
PPO_CybORG_886ac_00003,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_02-48-18,True,100,{},-11.3,-61.795,-261.8,60,15000,a9b5f2e36e1f4eefaa726c33c4231316,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 9.999999747378752e-05, 'total_loss': 7.1972218, 'policy_loss': -0.07955466, 'vf_loss': 7.2709723, 'vf_explained_var': -0.29602626, 'kl': 0.029018858, 'entropy': 1.4417502, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 34.7, 'ram_util_percent': 63.2}",6051,{},{},{},"{'mean_raw_obs_processing_ms': 3.995877067955918, 'mean_inference_ms': 2.4634520482085933, 'mean_action_processing_ms': 0.2071228713915775, 'mean_env_wait_ms': 25.688145526324792, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -11.29999999999998, 'episode_reward_min': -261.79999999999995, 'episode_reward_mean': -61.795, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-72.69999999999989, -12.699999999999978, -69.6, -87.79999999999991, -16.59999999999998, -27.80000000000002, -80.19999999999999, -101.79999999999984, -44.40000000000008, -21.300000000000054, -79.59999999999995, -48.80000000000006, -165.4, -45.8, -100.79999999999987, -86.39999999999993, -69.2, -19.69999999999998, -32.80000000000003, -83.79999999999995, -22.700000000000045, -53.00000000000004, -43.8000000000001, -57.900000000000034, -38.800000000000104, -22.200000000000035, -261.79999999999995, -43.20000000000002, -43.300000000000054, -58.80000000000001, -26.800000000000036, -65.8, -24.800000000000008, -72.80000000000001, -63.699999999999996, -55.800000000000104, -48.60000000000007, -27.30000000000006, -38.80000000000009, -40.700000000000095, -52.30000000000006, -54.00000000000008, -51.20000000000004, -73.8, -34.8000000000001, -90.09999999999997, -79.79999999999997, -42.8000000000001, -47.60000000000007, -89.79999999999993, -31.20000000000003, -13.699999999999987, -85.69999999999985, -85.59999999999992, -174.8, -89.79999999999993, -66.1, -102.79999999999986, -40.10000000000002, -94.69999999999989, -83.69999999999987, -62.80000000000007, -73.79999999999998, -27.80000000000006, -63.800000000000104, -20.50000000000002, -52.50000000000009, -22.700000000000024, -47.800000000000026, -103.79999999999986, -98.79999999999987, -29.400000000000045, -63.70000000000004, -95.49999999999989, -106.79999999999988, -40.90000000000007, -79.79999999999997, -90.79999999999984, -16.69999999999999, -91.7, -29.700000000000053, -26.80000000000009, -75.79999999999997, -11.29999999999998, -30.800000000000054, -92.5, -24.70000000000002, -191.6999999999997, -13.499999999999979, -35.20000000000008, -20.499999999999986, -47.80000000000008, -89.59999999999987, -36.700000000000045, -42.400000000000055, -64.80000000000004, -45.800000000000054, -80.79999999999997, -80.69999999999999, -86.59999999999997], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.995877067955918, 'mean_inference_ms': 2.4634520482085933, 'mean_action_processing_ms': 0.2071228713915775, 'mean_env_wait_ms': 25.688145526324792, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",3001.87,12.2069,3001.87,"{'training_iteration_time_ms': 11925.422, 'load_time_ms': 2.469, 'load_throughput': 2430189.175, 'learn_time_ms': 8187.825, 'learn_throughput': 732.795, 'synch_weights_time_ms': 11.458}",1669949298,0,1500000,250,886ac_00003,20.7607
PPO_CybORG_886ac_00004,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_03-38-43,True,100,{},-10.9,-65.735,-453.8,60,15000,a9d13f17000b4d70b14ee27efbcda4c6,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 0.0005000000237487257, 'total_loss': 6.241515, 'policy_loss': -0.07851871, 'vf_loss': 6.3156433, 'vf_explained_var': -0.2444029, 'kl': 0.021952588, 'entropy': 1.7150981, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 33.588235294117645, 'ram_util_percent': 62.7}",7948,{},{},{},"{'mean_raw_obs_processing_ms': 4.030609045823706, 'mean_inference_ms': 2.3634809240038535, 'mean_action_processing_ms': 0.20744841959197555, 'mean_env_wait_ms': 25.890381856778816, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -10.89999999999998, 'episode_reward_min': -453.8000000000014, 'episode_reward_mean': -65.73499999999999, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-13.299999999999997, -18.89999999999998, -14.299999999999976, -17.6, -32.200000000000024, -78.50000000000001, -24.300000000000065, -182.7, -17.299999999999997, -32.700000000000024, -70.0, -22.000000000000018, -86.5, -138.09999999999994, -36.40000000000005, -16.499999999999993, -64.7000000000001, -20.099999999999998, -14.99999999999999, -91.3, -18.9, -23.000000000000032, -175.8, -87.6, -14.899999999999991, -16.599999999999987, -11.29999999999998, -66.80000000000004, -103.29999999999981, -91.89999999999995, -15.499999999999986, -92.79999999999991, -16.599999999999987, -14.899999999999988, -76.89999999999999, -79.79999999999997, -85.10000000000005, -34.00000000000002, -141.7999999999996, -53.10000000000003, -71.80000000000001, -79.5, -33.699999999999996, -14.499999999999986, -15.099999999999978, -16.49999999999999, -16.799999999999997, -36.800000000000075, -24.200000000000035, -54.400000000000055, -15.499999999999979, -74.9, -40.100000000000044, -49.80000000000004, -13.499999999999984, -15.399999999999991, -78.5, -16.29999999999998, -57.80000000000006, -170.79999999999956, -17.99999999999999, -112.7999999999998, -64.3, -69.39999999999993, -118.6, -66.80000000000001, -212.79999999999953, -108.09999999999991, -144.69999999999956, -87.6, -15.799999999999981, -71.3, -85.70000000000002, -14.099999999999985, -11.099999999999989, -182.1, -49.2, -56.80000000000009, -165.79999999999973, -453.8000000000014, -93.50000000000006, -10.89999999999998, -59.800000000000054, -41.60000000000003, -84.2, -58.10000000000008, -16.099999999999998, -15.099999999999993, -113.7999999999998, -257.2, -11.399999999999999, -17.299999999999997, -55.30000000000004, -64.00000000000004, -102.80000000000004, -21.700000000000017, -17.999999999999993, -177.39999999999992, -50.200000000000074, -87.40000000000003], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 4.030609045823706, 'mean_inference_ms': 2.3634809240038535, 'mean_action_processing_ms': 0.20744841959197555, 'mean_env_wait_ms': 25.890381856778816, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2978.82,11.7715,2978.82,"{'training_iteration_time_ms': 11791.172, 'load_time_ms': 2.531, 'load_throughput': 2370846.468, 'learn_time_ms': 8137.312, 'learn_throughput': 737.344, 'synch_weights_time_ms': 10.627}",1669952323,0,1500000,250,886ac_00004,20.3596
PPO_CybORG_886ac_00005,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_04-28-22,True,100,{},-10.2,-55.252,-1057.7,60,15000,01c77bf9068a42d4b793710beb2af29b,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 9.999999747378752e-05, 'total_loss': 5.3005557, 'policy_loss': -0.02554541, 'vf_loss': 5.323387, 'vf_explained_var': -0.054122113, 'kl': 0.013567673, 'entropy': 0.983912, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 34.4375, 'ram_util_percent': 62.5}",9835,{},{},{},"{'mean_raw_obs_processing_ms': 3.8870410405586036, 'mean_inference_ms': 2.1733209641363285, 'mean_action_processing_ms': 0.20771393521309578, 'mean_env_wait_ms': 24.73891529200294, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -10.199999999999998, 'episode_reward_min': -1057.7000000000007, 'episode_reward_mean': -55.25200000000001, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-49.800000000000004, -83.80000000000001, -52.2, -149.89999999999984, -18.899999999999984, -79.4, -41.50000000000001, -51.400000000000006, -89.6, -50.7, -55.500000000000014, -10.399999999999979, -60.5, -56.10000000000001, -50.8, -47.7, -48.3, -13.599999999999978, -50.00000000000001, -20.90000000000003, -12.799999999999988, -10.199999999999998, -15.199999999999987, -1057.7000000000007, -68.89999999999999, -45.30000000000001, -15.499999999999984, -49.40000000000004, -43.5, -87.0, -46.80000000000001, -51.00000000000001, -13.599999999999982, -13.099999999999982, -54.3, -13.799999999999981, -49.90000000000003, -26.900000000000034, -13.69999999999998, -50.40000000000002, -20.80000000000002, -51.00000000000001, -84.2, -51.7, -58.6, -13.699999999999985, -141.3, -90.3, -16.399999999999984, -35.70000000000009, -10.499999999999982, -52.70000000000002, -49.90000000000004, -38.800000000000004, -49.900000000000006, -55.7, -97.7, -18.70000000000001, -57.900000000000006, -56.800000000000004, -12.399999999999988, -12.699999999999985, -52.00000000000002, -16.09999999999998, -176.4, -55.10000000000001, -13.699999999999985, -14.099999999999989, -17.299999999999994, -61.80000000000001, -20.80000000000005, -55.6, -15.399999999999974, -13.699999999999989, -60.2, -91.8, -50.800000000000075, -10.999999999999986, -43.8, -53.4, -50.300000000000026, -14.799999999999983, -67.3, -92.2, -11.099999999999977, -12.499999999999991, -57.7, -15.299999999999986, -56.40000000000006, -55.5, -17.20000000000001, -15.99999999999998, -14.39999999999999, -13.299999999999974, -50.900000000000006, -14.599999999999984, -53.50000000000001, -166.2, -12.999999999999977, -12.599999999999985], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.8870410405586036, 'mean_inference_ms': 2.1733209641363285, 'mean_action_processing_ms': 0.20771393521309578, 'mean_env_wait_ms': 24.73891529200294, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2931.14,11.3208,2931.14,"{'training_iteration_time_ms': 11682.761, 'load_time_ms': 2.612, 'load_throughput': 2297220.787, 'learn_time_ms': 8152.034, 'learn_throughput': 736.013, 'synch_weights_time_ms': 10.749}",1669955302,0,1500000,250,886ac_00005,20.2023
PPO_CybORG_886ac_00006,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_05-19-05,True,100,{},-11.5,-49.97,-179.2,60,15000,a76fc66a878f47999882824515549a17,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 0.0005000000237487257, 'total_loss': 6.181749, 'policy_loss': -0.08044623, 'vf_loss': 6.251637, 'vf_explained_var': -0.2760126, 'kl': 0.052795332, 'entropy': 1.1086559, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 32.68333333333333, 'ram_util_percent': 62.5}",11727,{},{},{},"{'mean_raw_obs_processing_ms': 3.934890117597671, 'mean_inference_ms': 2.273445959967289, 'mean_action_processing_ms': 0.206069127032127, 'mean_env_wait_ms': 25.372305573262896, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -11.499999999999998, 'episode_reward_min': -179.2, 'episode_reward_mean': -49.970000000000006, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-31.20000000000003, -41.000000000000036, -12.299999999999994, -33.00000000000003, -45.60000000000008, -32.40000000000002, -12.899999999999993, -19.500000000000014, -14.69999999999998, -36.90000000000003, -28.100000000000044, -18.50000000000001, -20.70000000000002, -11.799999999999988, -26.200000000000024, -15.499999999999991, -106.30000000000004, -53.70000000000008, -101.7999999999999, -17.999999999999993, -166.8, -21.30000000000002, -73.69999999999985, -52.70000000000007, -15.099999999999984, -12.899999999999991, -16.59999999999998, -19.099999999999987, -58.40000000000006, -13.599999999999982, -56.10000000000007, -20.700000000000003, -15.799999999999994, -24.700000000000014, -18.0, -103.7999999999998, -15.099999999999987, -85.8, -12.599999999999996, -77.69999999999996, -175.29999999999984, -21.700000000000045, -18.099999999999994, -15.89999999999998, -70.20000000000002, -43.300000000000075, -20.200000000000003, -15.29999999999999, -55.5, -16.39999999999999, -20.199999999999992, -146.1, -72.40000000000005, -69.79999999999991, -92.00000000000003, -71.50000000000007, -19.099999999999994, -47.70000000000005, -76.79999999999998, -78.39999999999998, -17.8, -66.9, -40.20000000000007, -53.10000000000003, -30.000000000000018, -63.5, -17.89999999999999, -46.80000000000007, -34.80000000000001, -22.5, -18.399999999999995, -32.10000000000008, -58.50000000000008, -78.5, -67.50000000000004, -40.90000000000007, -132.29999999999973, -47.80000000000005, -17.399999999999988, -24.100000000000033, -179.2, -11.499999999999998, -129.3, -21.900000000000002, -21.80000000000002, -79.79999999999993, -14.599999999999984, -119.90000000000002, -28.90000000000005, -74.3, -159.3, -70.60000000000002, -29.700000000000056, -22.20000000000003, -55.80000000000003, -57.10000000000005, -141.29999999999993, -24.00000000000003, -54.6, -83.69999999999999], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.934890117597671, 'mean_inference_ms': 2.273445959967289, 'mean_action_processing_ms': 0.206069127032127, 'mean_env_wait_ms': 25.372305573262896, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2994.3,12.0946,2994.3,"{'training_iteration_time_ms': 11928.556, 'load_time_ms': 2.457, 'load_throughput': 2442382.81, 'learn_time_ms': 8236.202, 'learn_throughput': 728.491, 'synch_weights_time_ms': 10.801}",1669958345,0,1500000,250,886ac_00006,20.2301
PPO_CybORG_886ac_00007,1500000,"{'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",{},2022-12-02_06-09-38,True,100,{},-11.8,-36.572,-136.2,60,15000,312e2e77ae164fd5b3fbb3bbd3c3fc92,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 9.999999747378752e-05, 'total_loss': 5.6769614, 'policy_loss': -0.061928403, 'vf_loss': 5.734837, 'vf_explained_var': -0.112468086, 'kl': 0.020263676, 'entropy': 1.2967395, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1500000, 'num_env_steps_trained': 1500000, 'num_agent_steps_sampled': 1500000, 'num_agent_steps_trained': 1500000}",250,172.28.0.2,1500000,1500000,1500000,6000,1500000,6000,0,30,0,6000,"{'cpu_util_percent': 34.40555555555556, 'ram_util_percent': 62.599999999999994}",13636,{},{},{},"{'mean_raw_obs_processing_ms': 3.9401970875383334, 'mean_inference_ms': 2.252839083287414, 'mean_action_processing_ms': 0.20564141877335765, 'mean_env_wait_ms': 25.371932567556676, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -11.799999999999974, 'episode_reward_min': -136.2, 'episode_reward_mean': -36.572000000000024, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-13.799999999999978, -29.600000000000072, -30.700000000000063, -20.699999999999996, -38.70000000000009, -16.69999999999998, -38.19999999999999, -15.799999999999974, -12.699999999999978, -80.39999999999996, -40.60000000000005, -49.80000000000009, -23.800000000000008, -27.700000000000045, -26.7, -13.799999999999981, -57.70000000000008, -78.79999999999997, -55.50000000000006, -50.80000000000009, -27.700000000000063, -24.80000000000003, -20.8, -68.80000000000005, -21.6, -96.7999999999997, -37.800000000000026, -52.800000000000075, -25.800000000000075, -69.80000000000004, -22.800000000000022, -20.80000000000001, -17.699999999999992, -42.80000000000005, -14.799999999999972, -15.599999999999975, -99.79999999999978, -24.800000000000058, -23.000000000000007, -30.7000000000001, -32.500000000000085, -23.700000000000053, -49.500000000000085, -53.80000000000008, -33.70000000000009, -63.4, -23.50000000000004, -19.10000000000003, -26.800000000000043, -11.799999999999974, -27.100000000000044, -75.6, -19.499999999999996, -25.800000000000022, -81.69999999999997, -20.70000000000002, -33.70000000000002, -22.20000000000005, -22.800000000000026, -15.799999999999974, -33.70000000000005, -24.40000000000003, -22.900000000000013, -79.8, -22.300000000000033, -23.80000000000004, -52.80000000000008, -21.300000000000026, -63.8000000000001, -22.70000000000002, -15.699999999999978, -15.799999999999972, -136.2, -25.80000000000005, -20.500000000000025, -90.79999999999993, -83.70000000000002, -19.800000000000008, -22.100000000000026, -18.80000000000001, -66.80000000000005, -25.80000000000003, -39.50000000000006, -19.800000000000022, -54.60000000000009, -20.80000000000001, -43.500000000000064, -23.50000000000005, -13.799999999999981, -115.69999999999978, -15.399999999999975, -26.800000000000065, -14.799999999999978, -29.700000000000028, -34.700000000000024, -18.800000000000004, -22.500000000000014, -24.800000000000047, -29.700000000000102, -34.800000000000075], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 3.9401970875383334, 'mean_inference_ms': 2.252839083287414, 'mean_action_processing_ms': 0.20564141877335765, 'mean_env_wait_ms': 25.371932567556676, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",2982.9,12.4133,2982.9,"{'training_iteration_time_ms': 11989.603, 'load_time_ms': 3.351, 'load_throughput': 1790422.744, 'learn_time_ms': 8197.768, 'learn_throughput': 731.907, 'synch_weights_time_ms': 11.32}",1669961378,0,1500000,250,886ac_00007,20.121
PPO_CybORG_886ac_00008,1503480,"{'num_env_steps_sampled': 1503480, 'num_env_steps_trained': 1503480, 'num_agent_steps_sampled': 1503480, 'num_agent_steps_trained': 1503480}",{},2022-12-02_07-02-16,True,100,{},-12.2,-80.106,-666.7,60,15000,6911fff9f01e4c13a3f492fe6c49f862,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 0.0005000000237487257, 'total_loss': 6.5486336, 'policy_loss': -0.10670714, 'vf_loss': 6.6501365, 'vf_explained_var': -0.3637098, 'kl': 0.026020415, 'entropy': 2.1058552, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1503480, 'num_env_steps_trained': 1503480, 'num_agent_steps_sampled': 1503480, 'num_agent_steps_trained': 1503480}",374,172.28.0.2,1503480,1503480,1503480,4020,1503480,4020,0,30,0,4020,"{'cpu_util_percent': 32.923076923076934, 'ram_util_percent': 63.10000000000001}",15530,{},{},{},"{'mean_raw_obs_processing_ms': 4.468257341586227, 'mean_inference_ms': 2.3637653316167064, 'mean_action_processing_ms': 0.20539110393858742, 'mean_env_wait_ms': 25.48187003944327, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -12.19999999999998, 'episode_reward_min': -666.7000000000006, 'episode_reward_mean': -80.106, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-22.700000000000014, -61.80000000000009, -78.79999999999998, -62.8000000000001, -40.800000000000054, -78.79999999999995, -47.500000000000085, -150.79999999999973, -105.7999999999999, -45.700000000000074, -108.69999999999975, -55.700000000000045, -479.70000000000124, -64.80000000000007, -142.79999999999978, -78.80000000000004, -14.799999999999978, -77.39999999999986, -56.800000000000075, -104.89999999999992, -87.69999999999995, -89.69999999999997, -58.80000000000009, -114.79999999999981, -123.79999999999984, -79.70000000000003, -118.79999999999974, -85.69999999999972, -26.60000000000004, -26.800000000000054, -48.70000000000003, -106.79999999999976, -22.300000000000004, -22.800000000000004, -26.800000000000043, -666.7000000000006, -121.79999999999961, -56.700000000000095, -124.10000000000002, -83.79999999999994, -21.70000000000004, -55.4000000000001, -277.7000000000002, -56.80000000000009, -64.80000000000008, -22.800000000000033, -62.30000000000001, -51.50000000000003, -72.80000000000004, -81.7, -75.7, -81.79999999999997, -25.100000000000026, -110.79999999999988, -88.7999999999999, -112.79999999999974, -70.80000000000003, -105.69999999999978, -63.2, -59.80000000000008, -18.700000000000014, -39.800000000000104, -77.69999999999996, -30.700000000000063, -138.7999999999997, -82.7, -22.800000000000058, -28.60000000000004, -94.5, -75.8, -12.19999999999998, -16.799999999999986, -32.80000000000007, -70.70000000000003, -102.79999999999986, -21.800000000000036, -83.69999999999996, -96.79999999999984, -97.6999999999997, -51.80000000000008, -82.79999999999995, -22.800000000000054, -53.60000000000006, -73.80000000000001, -48.80000000000007, -103.79999999999984, -111.79999999999984, -34.70000000000009, -56.80000000000009, -18.4, -59.30000000000009, -21.30000000000001, -30.800000000000058, -57.70000000000012, -126.79999999999983, -19.500000000000036, -28.600000000000076, -112.69999999999985, -19.799999999999994, -194.79999999999967], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 4.468257341586227, 'mean_inference_ms': 2.3637653316167064, 'mean_action_processing_ms': 0.20539110393858742, 'mean_env_wait_ms': 25.48187003944327, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",3095.54,8.6838,3095.54,"{'training_iteration_time_ms': 8316.995, 'load_time_ms': 2.404, 'load_throughput': 1671948.802, 'learn_time_ms': 5630.176, 'learn_throughput': 714.01, 'synch_weights_time_ms': 11.325}",1669964536,0,1503480,374,886ac_00008,20.083
PPO_CybORG_886ac_00009,1503480,"{'num_env_steps_sampled': 1503480, 'num_env_steps_trained': 1503480, 'num_agent_steps_sampled': 1503480, 'num_agent_steps_trained': 1503480}",{},2022-12-02_07-54-45,True,100,{},-10.7,-36.164,-152.7,60,15000,2ce0ffb7fbed4660b5020ea2db613eaa,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_kl_coeff': 0.20000000298023224, 'cur_lr': 9.999999747378752e-05, 'total_loss': 4.0672407, 'policy_loss': -0.08751962, 'vf_loss': 4.151053, 'vf_explained_var': -0.046604812, 'kl': 0.018536897, 'entropy': 1.2092108, 'entropy_coeff': 0.0, 'model': {}}, 'train': None}}, 'num_env_steps_sampled': 1503480, 'num_env_steps_trained': 1503480, 'num_agent_steps_sampled': 1503480, 'num_agent_steps_trained': 1503480}",374,172.28.0.2,1503480,1503480,1503480,4020,1503480,4020,0,30,0,4020,"{'cpu_util_percent': 33.57692307692307, 'ram_util_percent': 63.10000000000001}",17507,{},{},{},"{'mean_raw_obs_processing_ms': 4.543865397542109, 'mean_inference_ms': 2.431681849947735, 'mean_action_processing_ms': 0.2068349972305063, 'mean_env_wait_ms': 26.652918579319138, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -10.699999999999978, 'episode_reward_min': -152.69999999999973, 'episode_reward_mean': -36.16399999999999, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 60, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-20.800000000000008, -12.799999999999974, -132.7999999999998, -16.79999999999999, -85.7999999999999, -11.799999999999978, -18.80000000000001, -25.800000000000058, -13.79999999999997, -111.7999999999998, -14.799999999999976, -10.799999999999978, -15.799999999999972, -14.799999999999972, -24.800000000000104, -12.699999999999973, -57.80000000000004, -12.799999999999969, -42.80000000000002, -47.7000000000001, -18.8, -32.800000000000075, -29.80000000000006, -12.799999999999974, -14.799999999999972, -13.799999999999969, -14.799999999999978, -17.799999999999976, -33.400000000000055, -123.79999999999984, -14.799999999999974, -14.79999999999997, -20.8, -112.20000000000002, -35.70000000000008, -129.4, -22.80000000000003, -17.799999999999983, -15.799999999999978, -19.800000000000004, -24.700000000000053, -31.800000000000047, -16.799999999999986, -68.80000000000004, -107.99999999999987, -87.6999999999999, -18.700000000000003, -13.699999999999982, -17.799999999999986, -10.79999999999998, -14.799999999999974, -152.69999999999973, -10.799999999999981, -13.69999999999998, -35.3000000000001, -12.699999999999978, -140.79999999999978, -19.800000000000015, -14.799999999999974, -32.80000000000003, -11.799999999999974, -15.799999999999976, -11.399999999999979, -81.79999999999998, -67.80000000000004, -16.099999999999973, -30.70000000000007, -15.69999999999998, -95.6999999999999, -10.799999999999978, -22.800000000000043, -12.699999999999978, -12.799999999999974, -15.599999999999975, -41.70000000000008, -12.699999999999978, -12.699999999999978, -33.500000000000064, -14.799999999999978, -73.79999999999995, -45.800000000000026, -34.80000000000008, -26.800000000000036, -10.799999999999978, -10.699999999999978, -20.800000000000022, -19.800000000000015, -16.69999999999999, -17.599999999999994, -28.7, -46.80000000000003, -13.79999999999998, -14.799999999999978, -10.799999999999978, -79.79999999999994, -120.79999999999984, -127.79999999999981, -11.799999999999978, -40.800000000000054, -36.70000000000011], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 4.543865397542109, 'mean_inference_ms': 2.431681849947735, 'mean_action_processing_ms': 0.2068349972305063, 'mean_env_wait_ms': 26.652918579319138, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",3083.63,8.45096,3083.63,"{'training_iteration_time_ms': 8298.491, 'load_time_ms': 2.353, 'load_throughput': 1708162.587, 'learn_time_ms': 5525.202, 'learn_throughput': 727.575, 'synch_weights_time_ms': 11.856}",1669967685,0,1503480,374,886ac_00009,20.322


[2m[36m(PPO pid=2114)[0m 2022-12-02 00:16:13,675	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=2114)[0m 2022-12-02 00:16:13,676	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=2114)[0m 2022-12-02 00:16:13,677	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=2114)[0m 2022-12-02 00:16:33,864	INFO trainable.py:164 -- Trainable.setup took 20.191 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=2

[2m[36m(PPO pid=4024)[0m 2022-12-02 01:07:20,210	INFO trainable.py:164 -- Trainable.setup took 20.553 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(PPO pid=6051)[0m 2022-12-02 01:57:39,378	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=6051)[0m 2022-12-02 01:57:39,379	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=6051)[0m 2022-12-02 01:57:39,380	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=6051)[0m 2

[2m[36m(PPO pid=7948)[0m 2022-12-02 02:48:47,241	INFO trainable.py:164 -- Trainable.setup took 20.330 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(PPO pid=9835)[0m 2022-12-02 03:38:52,124	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=9835)[0m 2022-12-02 03:38:52,125	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=9835)[0m 2022-12-02 03:38:52,126	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=9835)[0m 2

[2m[36m(PPO pid=13636)[0m 2022-12-02 05:19:13,711	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=13636)[0m 2022-12-02 05:19:13,711	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=13636)[0m 2022-12-02 05:19:13,713	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=13636)[0m 2022-12-02 05:19:33,796	INFO trainable.py:164 -- Trainable.setup took 20.087 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker p

[2m[36m(PPO pid=15530)[0m 2022-12-02 06:10:06,292	INFO trainable.py:164 -- Trainable.setup took 20.056 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=15597)[0m 2022-12-02 07:02:17,215	ERROR worker.py:763 -- Worker exits with an exit code 1.
[2m[36m(RolloutWorker pid=15597)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=15597)[0m   File "python/ray/_raylet.pyx", line 1032, in ray._raylet.task_execution_handler
[2m[36m(RolloutWorker pid=15597)[0m   File "python/ray/_raylet.pyx", line 812, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=15597)[0m   File "python/ray/_raylet.pyx", line 852, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=15597)[0m   File "python/ray/_raylet.pyx", line 859, in ray._raylet.execute_task
[2m[36m(RolloutWorker pid=15597)[0m   File "python/ray/_raylet.pyx", line 863, in ray._raylet.execute_task
[2m[36m(RolloutW

[2m[36m(PPO pid=19413)[0m 2022-12-02 07:54:53,865	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=19413)[0m 2022-12-02 07:54:53,866	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=19413)[0m 2022-12-02 07:54:53,867	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=19413)[0m 2022-12-02 07:55:14,113	INFO trainable.py:164 -- Trainable.setup took 20.250 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker p

[2m[36m(PPO pid=23250)[0m 2022-12-02 09:40:01,520	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPO pid=23250)[0m 2022-12-02 09:40:01,521	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPO pid=23250)[0m 2022-12-02 09:40:01,522	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(PPO pid=23250)[0m 2022-12-02 09:40:21,111	INFO trainable.py:164 -- Trainable.setup took 19.593 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(PPO pid=25182)

In [None]:
tune.__version__

In [None]:
!pip show ray

In [None]:
path = str(inspect.getfile(CybORG))
path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
agents = {"Red": B_lineAgent, "Green": GreenAgent}
cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
env = RLlibWrapper(env=cyborg, agent_name="Blue", max_steps=100)

In [None]:
env.observation_space