In [1]:
import inspect
import os
import json
import numpy as np
import subprocess
from shutil import make_archive
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers import ChallengeWrapper
import ray
from ray import tune
import ray.rllib.algorithms.impala as impala
from ray.tune.registry import register_env



In [2]:
def wrap(env):
    return ChallengeWrapper(env=env, agent_name='Blue')

def env_creator(env_config):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = ChallengeWrapper(env=cyborg, agent_name='Blue')
    return env

register_env("cyborg", env_creator)

In [3]:
# Configuration used to train the model (not sure if needed)
config = impala.DEFAULT_CONFIG.copy()
config['framework'] = "tf"
config['env'] = 'cyborg'
config['num_gpus'] = 1
config["num_workers"] = 3
config['horizon'] = 1024

# Load the model from path
trained_model_path = "C:/Users/Rafael/ray_results/experiment_2022-07-24_21-42-03/experiment_cyborg_fbc03_00000_0_2022-07-24_21-42-03/checkpoint_010000/checkpoint-10000"
agent = impala.Impala(config=config, env="cyborg")
agent.restore(trained_model_path)

2022-07-26 12:25:01,729	INFO trainable.py:157 -- Trainable.setup took 15.408 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2022-07-26 12:25:01,815	INFO trainable.py:589 -- Restored on 127.0.0.1 from checkpoint: C:/Users/Rafael/ray_results/experiment_2022-07-24_21-42-03/experiment_cyborg_fbc03_00000_0_2022-07-24_21-42-03/checkpoint_010000/checkpoint-10000
2022-07-26 12:25:01,816	INFO trainable.py:598 -- Current state after restoring: {'_iteration': 10000, '_timesteps_total': None, '_time_total': 103045.50446414948, '_episodes_total': 29382}


In [4]:
MAX_EPS = 100
agent_name = 'Blue'
scenarios = ["Scenario2"]
steps = [30, 50, 100]
red_agents = [B_lineAgent, RedMeanderAgent, SleepAgent]

for scenario in scenarios:
    path = (str(inspect.getfile(CybORG))[:-10] + '/Shared/Scenarios/{}.yaml').format(scenario)
    print("EVALUATION FOR", scenario)
    for num_steps in steps:
        for red_agent in red_agents:
            cyborg = CybORG(path, 'sim', agents={'Red': red_agent})
            wrapped_cyborg = wrap(cyborg)
            observation = wrapped_cyborg.reset()
            # observation = cyborg.reset().observation
            action_space = wrapped_cyborg.get_action_space(agent_name)
            # action_space = cyborg.get_action_space(agent_name)
            total_reward = []
            actions = []
            for i in range(MAX_EPS):
                r = []
                a = []
                # cyborg.env.env.tracker.render()
                for j in range(num_steps):
                    # action = agent.get_action(observation, action_space)
                    # action, _states = agent.predict(observation)
                    action = agent.compute_single_action(observation) # RLlib 
                    # action, state_out, _ = agent.compute_single_action(observation, state) # RLlib with Attention 
                    observation, rew, done, info = wrapped_cyborg.step(action)
                    # result = cyborg.step(agent_name, action)
                    r.append(rew)
                    # r.append(result.reward)
                    a.append((str(cyborg.get_last_action('Blue')), str(cyborg.get_last_action('Red'))))
                total_reward.append(sum(r))
                actions.append(a)
                # observation = cyborg.reset().observation
                observation = wrapped_cyborg.reset()
            print(f'Average reward for red agent {red_agent.__name__} and steps {num_steps} is: {mean(total_reward)} with a standard deviation of {stdev(total_reward)}')

EVALUATION FOR Scenario2
Average reward for red agent B_lineAgent and steps 30 is: -9.377999999999997 with a standard deviation of 4.049007360583655
Average reward for red agent RedMeanderAgent and steps 30 is: -11.536000000000001 with a standard deviation of 2.5274860748549854
Average reward for red agent SleepAgent and steps 30 is: 0.0 with a standard deviation of 0.0
Average reward for red agent B_lineAgent and steps 50 is: -17.871 with a standard deviation of 3.956743253196653
Average reward for red agent RedMeanderAgent and steps 50 is: -32.03199999999998 with a standard deviation of 7.790422506403796
Average reward for red agent SleepAgent and steps 50 is: 0.0 with a standard deviation of 0.0
Average reward for red agent B_lineAgent and steps 100 is: -36.89400000000006 with a standard deviation of 7.3765780027261405
Average reward for red agent RedMeanderAgent and steps 100 is: -98.85799999999999 with a standard deviation of 31.14338031386023


Exception in thread Thread-14:
Traceback (most recent call last):
  File "c:\Users\Rafael\Anaconda3\envs\cyborg2\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
  File "c:\Users\Rafael\Anaconda3\envs\cyborg2\lib\site-packages\ray\rllib\execution\learner_thread.py", line 74, in run
    self.step()
  File "c:\Users\Rafael\Anaconda3\envs\cyborg2\lib\site-packages\ray\rllib\execution\multi_gpu_learner_thread.py", line 143, in step
    buffer_idx, released = self.ready_tower_stacks_buffer.get()
  File "c:\Users\Rafael\Anaconda3\envs\cyborg2\lib\site-packages\ray\rllib\execution\minibatch_buffer.py", line 48, in get
    self.buffers[self.idx] = self.inqueue.get(timeout=self.timeout)
  File "c:\Users\Rafael\Anaconda3\envs\cyborg2\lib\queue.py", line 178, in get
    raise Empty
_queue.Empty


Average reward for red agent SleepAgent and steps 100 is: 0.0 with a standard deviation of 0.0
