In [1]:
import inspect
import os
import json
import numpy as np
import subprocess
from shutil import make_archive
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers import ChallengeWrapper
import ray
from ray import tune
import ray.rllib.algorithms.ppo as ppo
from ray.tune.registry import register_env

In [5]:
def wrap(env):
    return ChallengeWrapper(env=env, agent_name='Blue')

def env_creator(env_config):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = ChallengeWrapper(env=cyborg, agent_name='Blue')
    return env

register_env("cyborg", env_creator)

In [3]:
# Configuration used to train the model (not sure if needed)
config = ppo.DEFAULT_CONFIG.copy()
config = ppo.DEFAULT_CONFIG.copy()
config['framework'] = "tf"
config['env'] = 'cyborg'
config['num_gpus'] = 1
config["num_workers"] = 3
config['horizon'] = 1024
config['train_batch_size'] = 1024
config['sgd_minibatch_size'] = 128
config['rollout_fragment_length'] = 100
config['model'] = {
    "fcnet_hiddens": [512, 512],
    "fcnet_activation": "relu"
}
config['batch_mode'] = "truncate_episodes"
config['lambda'] = 0.95
config['kl_coeff'] = 0.5
config['clip_rewards'] = True
config['clip_param'] = 0.1
config['vf_clip_param'] = 10.0
config['entropy_coeff'] = 0.01
config['vf_share_layers'] = True

# Load the model from path
trained_model_path = "C:/Users/Rafael/ray_results/experiment_2022-07-14_14-38-05/experiment_cyborg_19644_00000_0_2022-07-14_14-38-05/checkpoint_010000/checkpoint-10000"
agent = ppo.PPO(config=config, env="cyborg")
agent.restore(trained_model_path)

2022-07-15 08:22:06,286	INFO trainable.py:157 -- Trainable.setup took 16.713 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2022-07-15 08:22:06,412	INFO trainable.py:589 -- Restored on 127.0.0.1 from checkpoint: C:/Users/Rafael/ray_results/experiment_2022-07-14_14-38-05/experiment_cyborg_19644_00000_0_2022-07-14_14-38-05/checkpoint_010000/checkpoint-10000
2022-07-15 08:22:06,412	INFO trainable.py:598 -- Current state after restoring: {'_iteration': 10000, '_timesteps_total': None, '_time_total': 49130.859721660614, '_episodes_total': 10017}


In [4]:
MAX_EPS = 100
agent_name = 'Blue'
scenarios = ["Scenario2"]
steps = [30, 50, 100]
red_agents = [B_lineAgent, RedMeanderAgent, SleepAgent]

for scenario in scenarios:
    path = (str(inspect.getfile(CybORG))[:-10] + '/Shared/Scenarios/{}.yaml').format(scenario)
    print("EVALUATION FOR", scenario)
    for num_steps in steps:
        for red_agent in red_agents:
            cyborg = CybORG(path, 'sim', agents={'Red': red_agent})
            wrapped_cyborg = wrap(cyborg)
            observation = wrapped_cyborg.reset()
            # observation = cyborg.reset().observation
            action_space = wrapped_cyborg.get_action_space(agent_name)
            # action_space = cyborg.get_action_space(agent_name)
            total_reward = []
            actions = []
            for i in range(MAX_EPS):
                r = []
                a = []
                # cyborg.env.env.tracker.render()
                for j in range(num_steps):
                    # action = agent.get_action(observation, action_space)
                    # action, _states = agent.predict(observation)
                    action = agent.compute_single_action(observation) # RLlib 
                    # action, state_out, _ = agent.compute_single_action(observation, state) # RLlib with Attention 
                    observation, rew, done, info = wrapped_cyborg.step(action)
                    # result = cyborg.step(agent_name, action)
                    r.append(rew)
                    # r.append(result.reward)
                    a.append((str(cyborg.get_last_action('Blue')), str(cyborg.get_last_action('Red'))))
                total_reward.append(sum(r))
                actions.append(a)
                # observation = cyborg.reset().observation
                observation = wrapped_cyborg.reset()
            print(f'Average reward for red agent {red_agent.__name__} and steps {num_steps} is: {mean(total_reward)} with a standard deviation of {stdev(total_reward)}')

EVALUATION FOR Scenario2
Average reward for red agent B_lineAgent and steps 30 is: -14.554999999999998 with a standard deviation of 15.04015752523351
Average reward for red agent RedMeanderAgent and steps 30 is: -16.079 with a standard deviation of 5.062763548446121
Average reward for red agent SleepAgent and steps 30 is: 0.0 with a standard deviation of 0.0
Average reward for red agent B_lineAgent and steps 50 is: -23.675000000000004 with a standard deviation of 9.210362795692841
Average reward for red agent RedMeanderAgent and steps 50 is: -62.583000000000006 with a standard deviation of 39.83942023988934
Average reward for red agent SleepAgent and steps 50 is: 0.0 with a standard deviation of 0.0
Average reward for red agent B_lineAgent and steps 100 is: -46.58300000000001 with a standard deviation of 17.292843472186274
Average reward for red agent RedMeanderAgent and steps 100 is: -150.039 with a standard deviation of 63.682225443005265
Average reward for red agent SleepAgent and s