In [None]:
import inspect
import time
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent
from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers.EnumActionWrapper import EnumActionWrapper
from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
from CybORG.Agents.Wrappers.ReduceActionSpaceWrapper import ReduceActionSpaceWrapper
from CybORG.Agents.Wrappers import ChallengeWrapper
import os

from ray.tune.registry import register_env
from CybORG.Agents.Wrappers.rllib_wrapper import RLlibWrapper
import warnings
import numpy as np
from ray import air, tune

warnings.filterwarnings('ignore')

In [None]:
def env_creator(env_config: dict):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2_No_Decoy.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = RLlibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
    return env

def print_results(results_dict):
    train_iter = results_dict["training_iteration"]
    r_mean = results_dict["episode_reward_mean"]
    r_max = results_dict["episode_reward_max"]
    r_min = results_dict["episode_reward_min"]
    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")

register_env(name="CybORG", env_creator=env_creator)

In [3]:
!rm -r logs/APPO/RE3

rm: cannot remove 'logs/APPO/RE3': No such file or directory


In [4]:
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.policy.policy import PolicySpec

config = (
    PPOConfig()
    #Each rollout worker uses a single cpu
    .rollouts(num_rollout_workers=30, num_envs_per_worker=1, horizon=100)\
    .training(train_batch_size=3000, gamma=0.85, lr=0.00005, 
              model={"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh",})\
    .environment(disable_env_checking=True, env = 'CybORG')\
    .resources(num_gpus=1)\
    .framework('tf')\
    .exploration(explore=True, exploration_config={"type": "RE3", "embeds_dim": 128, "beta_schedule": "constant", "sub_exploration": {"type": "StochasticSampling",},})\
    .offline_data(output="logs/APPO/StochasticSampling", output_compress_columns=['prev_actions', 'prev_rewards', 'dones', 't', 'eps_id', 'unroll_id', 'agent_index', 'action_prob', 'action_logp', 'action_dist_inputs', 'advantages', 'value_targets'],
                 output_config={"format": "json"},)
)
trainer = config.build()


2022-12-17 14:15:12,148	INFO worker.py:1528 -- Started a local Ray instance.
2022-12-17 14:15:29,427	INFO trainable.py:164 -- Trainable.setup took 19.549 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [5]:
def print_results(results_dict):
    train_iter = results_dict["training_iteration"]
    r_mean = results_dict["episode_reward_mean"]
    r_max = results_dict["episode_reward_max"]
    r_min = results_dict["episode_reward_min"]
    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")

In [6]:
for i in range(2000):
    print_results(trainer.train())

   1 	r_mean: -590.6 	r_max: -24.2 	r_min: -1079.5
   2 	r_mean: -607.3 	r_max: -24.2 	r_min: -1146.8
   3 	r_mean: -590.0 	r_max: -24.2 	r_min: -1146.8
   4 	r_mean: -551.4 	r_max: -43.5 	r_min: -1146.8
   5 	r_mean: -442.9 	r_max: -43.5 	r_min: -1146.8
   6 	r_mean: -339.9 	r_max: -43.5 	r_min: -1126.1
   7 	r_mean: -284.6 	r_max: -129.4 	r_min: -1109.8
   8 	r_mean: -247.4 	r_max: -54.8 	r_min: -683.7
   9 	r_mean: -218.0 	r_max: -54.8 	r_min: -482.7
  10 	r_mean: -211.2 	r_max: -40.2 	r_min: -861.5
  11 	r_mean: -199.6 	r_max: -33.6 	r_min: -861.5
  12 	r_mean: -185.6 	r_max: -33.6 	r_min: -861.5
  13 	r_mean: -167.9 	r_max: -33.6 	r_min: -275.8
  14 	r_mean: -163.5 	r_max: -40.8 	r_min: -275.8
  15 	r_mean: -157.7 	r_max: -40.8 	r_min: -262.7
  16 	r_mean: -149.9 	r_max: -58.7 	r_min: -262.7
  17 	r_mean: -139.8 	r_max: -41.4 	r_min: -262.7
  18 	r_mean: -130.6 	r_max: -41.4 	r_min: -315.6
  19 	r_mean: -125.0 	r_max: -41.4 	r_min: -315.6
  20 	r_mean: -115.9 	r_max: -21.8 	r_min:

 169 	r_mean: -27.0 	r_max: -10.8 	r_min: -191.8
 170 	r_mean: -25.6 	r_max: -10.8 	r_min: -191.8
 171 	r_mean: -24.8 	r_max: -10.7 	r_min: -89.8
 172 	r_mean: -25.8 	r_max: -10.7 	r_min: -89.8
 173 	r_mean: -27.2 	r_max: -10.7 	r_min: -139.1
 174 	r_mean: -28.7 	r_max: -9.8 	r_min: -149.7
 175 	r_mean: -29.6 	r_max: -9.8 	r_min: -149.7
 176 	r_mean: -28.1 	r_max: -9.8 	r_min: -149.7
 177 	r_mean: -27.2 	r_max: -10.7 	r_min: -116.4
 178 	r_mean: -26.2 	r_max: -10.7 	r_min: -139.8
 179 	r_mean: -26.0 	r_max: -10.6 	r_min: -139.8
 180 	r_mean: -29.1 	r_max: -9.8 	r_min: -151.8
 181 	r_mean: -28.2 	r_max: -9.8 	r_min: -170.8
 182 	r_mean: -27.6 	r_max: -9.8 	r_min: -170.8
 183 	r_mean: -26.2 	r_max: -9.8 	r_min: -175.8
 184 	r_mean: -27.9 	r_max: -10.8 	r_min: -175.8
 185 	r_mean: -27.4 	r_max: -9.8 	r_min: -175.8
 186 	r_mean: -24.7 	r_max: -9.8 	r_min: -152.6
 187 	r_mean: -25.7 	r_max: -9.8 	r_min: -119.8
 188 	r_mean: -24.4 	r_max: -9.8 	r_min: -119.8
 189 	r_mean: -23.7 	r_max: -9.8 

 340 	r_mean: -23.7 	r_max: -10.7 	r_min: -107.8
 341 	r_mean: -26.7 	r_max: -9.8 	r_min: -132.8
 342 	r_mean: -25.6 	r_max: -9.8 	r_min: -132.8
 343 	r_mean: -25.0 	r_max: -9.8 	r_min: -132.8
 344 	r_mean: -24.7 	r_max: -9.8 	r_min: -132.8
 345 	r_mean: -23.0 	r_max: -9.7 	r_min: -62.8
 346 	r_mean: -25.2 	r_max: -9.7 	r_min: -152.8
 347 	r_mean: -24.8 	r_max: -9.7 	r_min: -152.8
 348 	r_mean: -27.5 	r_max: -9.8 	r_min: -152.8
 349 	r_mean: -27.7 	r_max: -9.8 	r_min: -151.7
 350 	r_mean: -26.1 	r_max: -9.8 	r_min: -151.7
 351 	r_mean: -25.2 	r_max: -9.8 	r_min: -145.7
 352 	r_mean: -23.8 	r_max: -10.8 	r_min: -145.7
 353 	r_mean: -24.6 	r_max: -10.8 	r_min: -145.7
 354 	r_mean: -25.1 	r_max: -10.8 	r_min: -113.3
 355 	r_mean: -27.3 	r_max: -10.8 	r_min: -150.8
 356 	r_mean: -26.1 	r_max: -10.8 	r_min: -150.8
 357 	r_mean: -28.3 	r_max: -10.8 	r_min: -152.8
 358 	r_mean: -26.6 	r_max: -10.8 	r_min: -152.8
 359 	r_mean: -26.9 	r_max: -10.8 	r_min: -152.8
 360 	r_mean: -25.5 	r_max: -10.

 510 	r_mean: -27.4 	r_max: -10.1 	r_min: -163.8
 511 	r_mean: -27.8 	r_max: -10.7 	r_min: -163.8
 512 	r_mean: -28.0 	r_max: -9.8 	r_min: -163.8
 513 	r_mean: -23.7 	r_max: -9.8 	r_min: -139.8
 514 	r_mean: -21.8 	r_max: -9.8 	r_min: -139.8
 515 	r_mean: -22.0 	r_max: -9.8 	r_min: -146.7
 516 	r_mean: -22.3 	r_max: -9.7 	r_min: -146.7
 517 	r_mean: -23.0 	r_max: -9.7 	r_min: -146.7
 518 	r_mean: -21.5 	r_max: -9.7 	r_min: -115.2
 519 	r_mean: -23.1 	r_max: -9.7 	r_min: -160.8
 520 	r_mean: -22.7 	r_max: -10.7 	r_min: -160.8
 521 	r_mean: -21.9 	r_max: -10.7 	r_min: -160.8
 522 	r_mean: -24.5 	r_max: -10.8 	r_min: -160.8
 523 	r_mean: -22.4 	r_max: -10.7 	r_min: -134.8
 524 	r_mean: -23.7 	r_max: -10.7 	r_min: -172.8
 525 	r_mean: -25.1 	r_max: -9.8 	r_min: -172.8
 526 	r_mean: -23.4 	r_max: -9.8 	r_min: -172.8
 527 	r_mean: -21.3 	r_max: -9.8 	r_min: -151.8
 528 	r_mean: -24.3 	r_max: -9.8 	r_min: -142.8
 529 	r_mean: -26.0 	r_max: -9.8 	r_min: -142.8
 530 	r_mean: -29.8 	r_max: -9.7 

 681 	r_mean: -20.7 	r_max: -9.5 	r_min: -130.8
 682 	r_mean: -18.8 	r_max: -9.4 	r_min: -130.8
 683 	r_mean: -21.0 	r_max: -9.0 	r_min: -141.8
 684 	r_mean: -20.2 	r_max: -9.0 	r_min: -141.8
 685 	r_mean: -21.6 	r_max: -9.0 	r_min: -141.8
 686 	r_mean: -18.2 	r_max: -8.9 	r_min: -121.8
 687 	r_mean: -17.9 	r_max: -8.9 	r_min: -121.8
 688 	r_mean: -18.0 	r_max: -8.9 	r_min: -121.8
 689 	r_mean: -19.4 	r_max: -9.5 	r_min: -152.8
 690 	r_mean: -20.9 	r_max: -9.5 	r_min: -152.8
 691 	r_mean: -21.5 	r_max: -9.5 	r_min: -156.8
 692 	r_mean: -19.4 	r_max: -9.7 	r_min: -156.8
 693 	r_mean: -20.9 	r_max: -9.7 	r_min: -156.8
 694 	r_mean: -21.3 	r_max: -9.8 	r_min: -110.8
 695 	r_mean: -22.0 	r_max: -9.8 	r_min: -110.8
 696 	r_mean: -21.5 	r_max: -9.8 	r_min: -99.8
 697 	r_mean: -22.5 	r_max: -9.8 	r_min: -121.8
 698 	r_mean: -22.6 	r_max: -9.8 	r_min: -121.8
 699 	r_mean: -23.8 	r_max: -9.8 	r_min: -177.7
 700 	r_mean: -23.4 	r_max: -10.7 	r_min: -177.7
 701 	r_mean: -27.3 	r_max: -10.7 	r_min

 852 	r_mean: -20.6 	r_max: -9.8 	r_min: -127.8
 853 	r_mean: -21.6 	r_max: -9.8 	r_min: -127.8
 854 	r_mean: -19.7 	r_max: -9.8 	r_min: -127.8
 855 	r_mean: -18.1 	r_max: -9.8 	r_min: -95.8
 856 	r_mean: -19.0 	r_max: -9.8 	r_min: -95.8
 857 	r_mean: -20.1 	r_max: -9.8 	r_min: -93.5
 858 	r_mean: -21.1 	r_max: -9.8 	r_min: -93.5
 859 	r_mean: -19.0 	r_max: -9.8 	r_min: -87.7
 860 	r_mean: -20.0 	r_max: -9.0 	r_min: -144.9
 861 	r_mean: -23.1 	r_max: -9.0 	r_min: -151.8
 862 	r_mean: -25.4 	r_max: -9.0 	r_min: -151.8
 863 	r_mean: -26.1 	r_max: -9.0 	r_min: -151.8
 864 	r_mean: -22.8 	r_max: -9.5 	r_min: -90.8
 865 	r_mean: -21.5 	r_max: -9.5 	r_min: -106.7
 866 	r_mean: -20.8 	r_max: -9.5 	r_min: -106.8
 867 	r_mean: -22.5 	r_max: -9.6 	r_min: -106.8
 868 	r_mean: -21.6 	r_max: -9.6 	r_min: -106.8
 869 	r_mean: -20.9 	r_max: -9.7 	r_min: -92.8
 870 	r_mean: -18.8 	r_max: -9.7 	r_min: -89.8
 871 	r_mean: -22.3 	r_max: -9.7 	r_min: -169.7
 872 	r_mean: -20.9 	r_max: -9.6 	r_min: -169.7


1024 	r_mean: -19.4 	r_max: -9.6 	r_min: -132.7
1025 	r_mean: -20.0 	r_max: -9.6 	r_min: -151.8
1026 	r_mean: -19.1 	r_max: -9.6 	r_min: -151.8
1027 	r_mean: -21.6 	r_max: -9.8 	r_min: -152.0
1028 	r_mean: -23.7 	r_max: -9.8 	r_min: -152.0
1029 	r_mean: -26.4 	r_max: -9.8 	r_min: -152.0
1030 	r_mean: -25.0 	r_max: -10.0 	r_min: -152.0
1031 	r_mean: -21.8 	r_max: -8.9 	r_min: -139.7
1032 	r_mean: -23.3 	r_max: -8.9 	r_min: -178.7
1033 	r_mean: -24.7 	r_max: -8.9 	r_min: -196.8
1034 	r_mean: -27.5 	r_max: -9.7 	r_min: -196.8
1035 	r_mean: -24.7 	r_max: -9.7 	r_min: -196.8
1036 	r_mean: -21.7 	r_max: -9.7 	r_min: -196.8
1037 	r_mean: -21.8 	r_max: -9.7 	r_min: -151.7
1038 	r_mean: -21.5 	r_max: -9.7 	r_min: -91.8
1039 	r_mean: -23.2 	r_max: -9.7 	r_min: -157.8
1040 	r_mean: -22.7 	r_max: -9.5 	r_min: -157.8
1041 	r_mean: -22.8 	r_max: -9.5 	r_min: -157.8
1042 	r_mean: -19.7 	r_max: -9.5 	r_min: -147.8
1043 	r_mean: -19.5 	r_max: -6.9 	r_min: -147.8
1044 	r_mean: -21.2 	r_max: -6.9 	r_min:

1196 	r_mean: -21.7 	r_max: -9.8 	r_min: -147.6
1197 	r_mean: -20.4 	r_max: -9.7 	r_min: -147.6
1198 	r_mean: -22.1 	r_max: -9.7 	r_min: -147.6
1199 	r_mean: -20.9 	r_max: -9.7 	r_min: -147.6
1200 	r_mean: -21.9 	r_max: -9.7 	r_min: -146.8
1201 	r_mean: -21.1 	r_max: -9.7 	r_min: -146.8
1202 	r_mean: -18.7 	r_max: -9.8 	r_min: -111.7
1203 	r_mean: -19.6 	r_max: -9.1 	r_min: -143.7
1204 	r_mean: -21.0 	r_max: -9.1 	r_min: -143.7
1205 	r_mean: -23.8 	r_max: -8.7 	r_min: -143.8
1206 	r_mean: -22.2 	r_max: -8.7 	r_min: -143.8
1207 	r_mean: -22.1 	r_max: -8.7 	r_min: -143.8
1208 	r_mean: -20.5 	r_max: -9.2 	r_min: -168.0
1209 	r_mean: -21.2 	r_max: -9.2 	r_min: -168.0
1210 	r_mean: -20.0 	r_max: -9.2 	r_min: -168.0
1211 	r_mean: -21.4 	r_max: -9.5 	r_min: -149.8
1212 	r_mean: -22.2 	r_max: -9.5 	r_min: -149.8
1213 	r_mean: -23.9 	r_max: -9.5 	r_min: -149.8
1214 	r_mean: -20.7 	r_max: -9.5 	r_min: -86.8
1215 	r_mean: -20.6 	r_max: -9.7 	r_min: -162.8
1216 	r_mean: -19.6 	r_max: -9.7 	r_min: 

1368 	r_mean: -20.5 	r_max: -9.8 	r_min: -156.8
1369 	r_mean: -21.4 	r_max: -8.5 	r_min: -156.8
1370 	r_mean: -25.4 	r_max: -8.5 	r_min: -156.8
1371 	r_mean: -23.3 	r_max: -8.5 	r_min: -149.8
1372 	r_mean: -26.3 	r_max: -9.7 	r_min: -137.8
1373 	r_mean: -27.1 	r_max: -9.7 	r_min: -159.6
1374 	r_mean: -26.6 	r_max: -9.7 	r_min: -159.6
1375 	r_mean: -26.7 	r_max: -9.7 	r_min: -159.6
1376 	r_mean: -24.8 	r_max: -9.7 	r_min: -148.8
1377 	r_mean: -21.1 	r_max: -9.2 	r_min: -148.7
1378 	r_mean: -22.5 	r_max: -9.2 	r_min: -153.8
1379 	r_mean: -23.2 	r_max: -9.2 	r_min: -153.8
1380 	r_mean: -22.3 	r_max: -9.3 	r_min: -153.8
1381 	r_mean: -20.5 	r_max: -9.7 	r_min: -143.4
1382 	r_mean: -20.7 	r_max: -9.7 	r_min: -143.4
1383 	r_mean: -21.9 	r_max: -9.7 	r_min: -143.4
1384 	r_mean: -23.0 	r_max: -9.7 	r_min: -147.8
1385 	r_mean: -22.9 	r_max: -9.6 	r_min: -147.8
1386 	r_mean: -22.4 	r_max: -9.6 	r_min: -147.8
1387 	r_mean: -19.5 	r_max: -9.6 	r_min: -147.8
1388 	r_mean: -22.2 	r_max: -9.7 	r_min:

1539 	r_mean: -18.6 	r_max: -9.2 	r_min: -146.8
1540 	r_mean: -20.5 	r_max: -9.2 	r_min: -208.7
1541 	r_mean: -19.9 	r_max: -9.2 	r_min: -208.7
1542 	r_mean: -19.0 	r_max: -9.2 	r_min: -208.7
1543 	r_mean: -17.0 	r_max: -9.6 	r_min: -124.8
1544 	r_mean: -17.6 	r_max: -9.6 	r_min: -124.8
1545 	r_mean: -16.9 	r_max: -9.6 	r_min: -124.8
1546 	r_mean: -17.6 	r_max: -9.4 	r_min: -99.8
1547 	r_mean: -20.4 	r_max: -9.4 	r_min: -150.8
1548 	r_mean: -22.9 	r_max: -9.4 	r_min: -150.8
1549 	r_mean: -25.2 	r_max: -9.7 	r_min: -150.8
1550 	r_mean: -25.9 	r_max: -9.0 	r_min: -149.8
1551 	r_mean: -25.1 	r_max: -9.0 	r_min: -146.8
1552 	r_mean: -25.0 	r_max: -9.0 	r_min: -146.8
1553 	r_mean: -22.7 	r_max: -9.3 	r_min: -137.8
1554 	r_mean: -23.8 	r_max: -9.3 	r_min: -131.8
1555 	r_mean: -24.5 	r_max: -9.4 	r_min: -131.8
1556 	r_mean: -23.1 	r_max: -9.4 	r_min: -131.8
1557 	r_mean: -24.7 	r_max: -9.7 	r_min: -172.8
1558 	r_mean: -22.9 	r_max: -9.5 	r_min: -172.8
1559 	r_mean: -24.4 	r_max: -9.5 	r_min: 

1711 	r_mean: -21.3 	r_max: -9.7 	r_min: -149.8
1712 	r_mean: -21.9 	r_max: -8.8 	r_min: -149.8
1713 	r_mean: -19.7 	r_max: -8.8 	r_min: -129.8
1714 	r_mean: -21.2 	r_max: -8.8 	r_min: -129.8
1715 	r_mean: -21.0 	r_max: -9.0 	r_min: -116.4
1716 	r_mean: -20.9 	r_max: -9.0 	r_min: -116.4
1717 	r_mean: -20.3 	r_max: -9.0 	r_min: -116.4
1718 	r_mean: -17.3 	r_max: -9.6 	r_min: -63.8
1719 	r_mean: -18.7 	r_max: -9.6 	r_min: -102.8
1720 	r_mean: -19.3 	r_max: -9.6 	r_min: -102.8
1721 	r_mean: -21.3 	r_max: -9.8 	r_min: -227.8
1722 	r_mean: -21.7 	r_max: -9.8 	r_min: -227.8
1723 	r_mean: -20.9 	r_max: -9.6 	r_min: -227.8
1724 	r_mean: -18.8 	r_max: -9.6 	r_min: -101.8
1725 	r_mean: -19.1 	r_max: -9.6 	r_min: -145.8
1726 	r_mean: -22.8 	r_max: -9.7 	r_min: -169.4
1727 	r_mean: -24.5 	r_max: -9.7 	r_min: -169.4
1728 	r_mean: -27.0 	r_max: -9.1 	r_min: -169.4
1729 	r_mean: -22.3 	r_max: -9.1 	r_min: -149.8
1730 	r_mean: -24.1 	r_max: -9.1 	r_min: -164.4
1731 	r_mean: -23.7 	r_max: -9.1 	r_min: 

1882 	r_mean: -18.8 	r_max: -9.8 	r_min: -88.8
1883 	r_mean: -19.0 	r_max: -9.8 	r_min: -88.8
1884 	r_mean: -19.2 	r_max: -9.8 	r_min: -82.7
1885 	r_mean: -20.8 	r_max: -9.8 	r_min: -161.7
1886 	r_mean: -21.6 	r_max: -9.8 	r_min: -161.7
1887 	r_mean: -21.6 	r_max: -9.8 	r_min: -161.7
1888 	r_mean: -21.4 	r_max: -9.8 	r_min: -152.8
1889 	r_mean: -22.5 	r_max: -9.8 	r_min: -152.8
1890 	r_mean: -20.2 	r_max: -9.7 	r_min: -100.0
1891 	r_mean: -20.3 	r_max: -9.5 	r_min: -105.8
1892 	r_mean: -22.3 	r_max: -9.5 	r_min: -131.8
1893 	r_mean: -22.6 	r_max: -9.5 	r_min: -131.8
1894 	r_mean: -22.6 	r_max: -9.7 	r_min: -131.8
1895 	r_mean: -22.7 	r_max: -9.8 	r_min: -149.8
1896 	r_mean: -23.6 	r_max: -9.8 	r_min: -149.8
1897 	r_mean: -27.1 	r_max: -9.8 	r_min: -149.8
1898 	r_mean: -26.5 	r_max: -9.8 	r_min: -144.8
1899 	r_mean: -25.9 	r_max: -9.8 	r_min: -144.8
1900 	r_mean: -22.9 	r_max: -9.8 	r_min: -138.7
1901 	r_mean: -20.4 	r_max: -9.7 	r_min: -98.8
1902 	r_mean: -21.7 	r_max: -9.6 	r_min: -12

In [None]:
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.policy.policy import PolicySpec

config = (
    PPOConfig()
    #Each rollout worker uses a single cpu
    .rollouts(num_rollout_workers=30, num_envs_per_worker=1, horizon=100)\
    .training(train_batch_size=3000, gamma=0.85, lr=0.00005, 
              model={"fcnet_hiddens": [256, 256], "fcnet_activation": "tanh",})\
    .environment(disable_env_checking=True, env = 'CybORG')\
    .resources(num_gpus=1)\
    .framework('tf')\
    .offline_data(output="logs/APPO/actuallystoch_otherisre3", output_compress_columns=['prev_actions', 'prev_rewards', 'dones', 't', 'eps_id', 'unroll_id', 'agent_index', 'action_prob', 'action_logp', 'action_dist_inputs', 'advantages', 'value_targets'],
                 output_config={"format": "json"},)
)
trainer = config.build()




[2m[1m[36m(scheduler +2h54m49s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.




























In [None]:
for i in range(2000):
    print_results(trainer.train())