In [2]:
import inspect
import time
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent
from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers.EnumActionWrapper import EnumActionWrapper
from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
from CybORG.Agents.Wrappers.ReduceActionSpaceWrapper import ReduceActionSpaceWrapper
from CybORG.Agents.Wrappers import ChallengeWrapper
import os

from ray.tune.registry import register_env
from CybORG.Agents.Wrappers.rllib_wrapper import RLlibWrapper
import warnings
import numpy as np
from ray import air, tune
warnings.filterwarnings('ignore')

In [3]:
def env_creator(env_config: dict):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = RLlibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
    return env

def print_results(results_dict):
    train_iter = results_dict["training_iteration"]
    r_mean = results_dict["episode_reward_mean"]
    r_max = results_dict["episode_reward_max"]
    r_min = results_dict["episode_reward_min"]
    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")

register_env(name="CybORG", env_creator=env_creator)

In [None]:
from ray import air, tune

tune.Tuner(
        "APPO",
        run_config=air.RunConfig(
            stop={"timesteps_total": 2e6},
            local_dir='results/APPO', name="tune",
            checkpoint_config=air.CheckpointConfig(
                checkpoint_frequency=500,
                
            ),
        ),
        param_space={
            # CC3 specific.
            "env": "CybORG",
            # General
            "num_gpus": 1,
            "num_workers": 20,
            "num_envs_per_worker": 1,
            #algo params
            "train_batch_size": tune.grid_search([6000]),
            "lr": tune.grid_search([0.001, 0.0005, 0.0001]),
            "gamma": tune.grid_search([0.99]),
            "framework": 'tf',
            "use_kl_loss": True,
            "kl_target": tune.grid_search([0.1]),
            # Method specific.
        },
    ).fit()

2022-12-01 15:17:46,057	INFO worker.py:1528 -- Started a local Ray instance.


0,1
Current time:,2022-12-01 15:22:01
Running for:,00:04:13.24
Memory:,100.2/125.8 GiB

Trial name,status,loc,gamma,kl_target,lr,train_batch_size,iter,total time (s),ts,reward,num_recreated_worker s,episode_reward_max,episode_reward_min
APPO_CybORG_506fc_00000,RUNNING,172.28.0.2:1387,0.99,0.1,0.001,6000,19.0,216.025,287950.0,-226.863,0.0,-148.8,-294.8
APPO_CybORG_506fc_00001,PENDING,,0.98,0.1,0.001,6000,,,,,,,
APPO_CybORG_506fc_00002,PENDING,,0.97,0.1,0.001,6000,,,,,,,
APPO_CybORG_506fc_00003,PENDING,,0.99,0.2,0.001,6000,,,,,,,
APPO_CybORG_506fc_00004,PENDING,,0.98,0.2,0.001,6000,,,,,,,
APPO_CybORG_506fc_00005,PENDING,,0.97,0.2,0.001,6000,,,,,,,
APPO_CybORG_506fc_00006,PENDING,,0.99,0.1,0.002,6000,,,,,,,
APPO_CybORG_506fc_00007,PENDING,,0.98,0.1,0.002,6000,,,,,,,
APPO_CybORG_506fc_00008,PENDING,,0.97,0.1,0.002,6000,,,,,,,
APPO_CybORG_506fc_00009,PENDING,,0.99,0.2,0.002,6000,,,,,,,


[2m[36m(APPO pid=1387)[0m 2022-12-01 15:17:56,110	INFO algorithm.py:2303 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(APPO pid=1387)[0m 2022-12-01 15:17:56,112	INFO algorithm.py:457 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(APPO pid=1387)[0m 2022-12-01 15:18:14,174	INFO trainable.py:164 -- Trainable.setup took 18.066 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.




Trial name,agent_timesteps_total,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_recreated_workers,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
APPO_CybORG_506fc_00000,303050,"{'num_env_steps_sampled': 303050, 'num_env_steps_trained': 300000, 'num_agent_steps_sampled': 303050, 'num_agent_steps_trained': 300000, 'num_training_step_calls_since_last_synch_worker_weights': 42, 'last_target_update_ts': 303050, 'num_weight_broadcasts': 4899, 'num_target_updates': 4899, 'num_samples_added_to_queue': 300000}",{},2022-12-01_15-22-02,False,100,{},-174.2,-220.072,-301.8,149,3044,3acba7f4372443058afd4735475b4e00,01589170c3ff,"{'learner': {'default_policy': {'learner_stats': {'cur_lr': 0.0010000000474974513, 'total_loss': 614.0576, 'policy_loss': 41.047886, 'entropy': 2.10191, 'var_gnorm': 25.291784, 'vf_loss': 1146.0615, 'vf_explained_var': -0.009481907, 'entropy_coeff': 0.009999999776482582, 'mean_IS': 1.0030365, 'var_IS': 0.0018346476, 'kl': -1.4429752e-10, 'KL_Coeff': 1.0, 'model': {}, 'grad_gnorm': 39.999996}, 'train': None}}, 'num_env_steps_sampled': 303050, 'num_env_steps_trained': 300000, 'num_agent_steps_sampled': 303050, 'num_agent_steps_trained': 300000, 'num_training_step_calls_since_last_synch_worker_weights': 42, 'last_target_update_ts': 303050, 'num_weight_broadcasts': 4899, 'num_target_updates': 4899, 'num_samples_added_to_queue': 300000, 'learner_queue': {'size_count': 50, 'size_mean': 0.0, 'size_std': 0.0, 'size_quantiles': [0.0, 0.0, 0.0, 0.0, 0.0]}, 'timing_breakdown': {'learner_grad_time_ms': 28.052, 'learner_load_time_ms': 24.054, 'learner_load_wait_time_ms': 4530.019, 'learner_dequeue_time_ms': 55160.37}}",20,172.28.0.2,303050,300000,303050,15100,300000,18000,0,20,0,18000,"{'cpu_util_percent': 68.92500000000001, 'ram_util_percent': 79.6}",1387,{},{},{},"{'mean_raw_obs_processing_ms': 1.7272050369171816, 'mean_inference_ms': 1.563778907200527, 'mean_action_processing_ms': 0.15242600630432343, 'mean_env_wait_ms': 10.79610312292778, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -174.19999999999985, 'episode_reward_min': -301.8, 'episode_reward_mean': -220.07181208053666, 'episode_len_mean': 100.0, 'episode_media': {}, 'episodes_this_iter': 149, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-174.19999999999985, -253.7999999999996, -190.79999999999967, -289.79999999999995, -272.79999999999984, -256.2, -292.7, -301.8, -286.6999999999999, -249.79999999999964, -225.7999999999996, -207.79999999999967, -186.79999999999967, -212.79999999999964, -195.79999999999967, -208.79999999999964, -179.7999999999997, -189.7999999999997, -279.7999999999999, -190.79999999999967, -185.7999999999997, -290.7999999999999, -196.79999999999967, -209.69999999999965, -216.7999999999996, -241.69999999999962, -196.69999999999968, -175.7999999999997, -193.69999999999968, -196.79999999999967, -206.79999999999964, -194.69999999999968, -199.79999999999964, -194.79999999999967, -290.69999999999993, -196.69999999999968, -267.7999999999998, -176.7999999999997, -262.7999999999997, -192.79999999999967, -282.6999999999999, -199.79999999999964, -197.79999999999964, -213.79999999999964, -193.7999999999997, -288.79999999999995, -216.69999999999962, -188.7999999999997, -198.79999999999964, -193.79999999999964, -182.79999999999967, -202.79999999999964, -183.7999999999997, -193.79999999999967, -290.7999999999999, -224.7999999999996, -198.79999999999967, -207.7999999999996, -289.69999999999993, -176.7999999999997, -199.79999999999967, -217.69999999999962, -203.79999999999967, -278.7999999999999, -287.7999999999999, -192.79999999999967, -288.79999999999995, -203.79999999999964, -178.69999999999968, -258.79999999999967, -207.69999999999962, -184.7999999999997, -201.69999999999968, -185.7999999999997, -208.79999999999964, -201.69999999999968, -184.7999999999997, -281.79999999999984, -282.6999999999999, -260.79999999999967, -195.69999999999965, -194.69999999999968, -179.89999999999972, -278.6999999999999, -215.7999999999996, -282.7999999999999, -200.69999999999965, -233.3999999999996, -204.79999999999964, -285.7999999999999, -194.69999999999965, -186.7999999999997, -248.7999999999996, -287.7999999999999, -222.69999999999962, -196.79999999999967, -252.7999999999996, -179.7999999999997, -192.4999999999997, -190.7999999999997, -195.79999999999967, -199.7999999999997, -188.69999999999968, -187.69999999999968, -192.79999999999967, -225.7999999999996, -192.79999999999967, -226.7999999999996, -238.3, -221.69999999999962, -183.79999999999967, -196.79999999999967, -192.79999999999967, -205.79999999999964, -277.79999999999984, -187.79999999999967, -189.7999999999997, -190.7999999999997, -192.59999999999968, -284.6999999999999, -226.69999999999962, -176.7, -284.7999999999999, -196.79999999999967, -200.79999999999967, -210.79999999999967, -186.79999999999967, -189.79999999999967, -279.79999999999984, -197.79999999999967, -187.7999999999997, -196.7999999999997, -294.7, -213.79999999999964, -191.79999999999967, -238.69999999999962, -196.7999999999997, -253.7999999999996, -284.6999999999999, -184.29999999999973, -254.69999999999962, -195.79999999999967, -250.7999999999996, -193.79999999999967, -215.7999999999996, -185.69999999999968, -253.7999999999996, -283.79999999999984, -190.79999999999967], 'episode_lengths': [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.7272050369171816, 'mean_inference_ms': 1.563778907200527, 'mean_action_processing_ms': 0.15242600630432343, 'mean_env_wait_ms': 10.79610312292778, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0}",227.419,11.3939,227.419,"{'training_iteration_time_ms': 0.456, 'synch_weights_time_ms': 0.032}",1669908122,0,303050,20,506fc_00000,18.0812




In [None]:
tune.__version__

In [17]:
!pip show ray

Name: ray
Version: 2.1.0
Summary: Ray provides a simple, universal API for building distributed applications.
Home-page: https://github.com/ray-project/ray
Author: Ray Team
Author-email: ray-dev@googlegroups.com
License: Apache 2.0
Location: /usr/local/lib/python3.8/dist-packages
Requires: msgpack, virtualenv, jsonschema, attrs, protobuf, pyyaml, requests, numpy, click, frozenlist, filelock, grpcio, aiosignal
Required-by: 
