In [1]:
import inspect
import os
import json
import numpy as np
import subprocess
from shutil import make_archive
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers import ChallengeWrapper
import ray
from ray import tune
import ray.rllib.algorithms.ppo as ppo
from ray.tune.registry import register_env

In [10]:
def wrap(env):
    return ChallengeWrapper(env=env, agent_name='Blue')

def env_creator(env_config):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario1b.yaml'
    agents = {"Red": B_lineAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = ChallengeWrapper(env=cyborg, agent_name='Blue')
    return env

register_env("cyborg", env_creator)

In [11]:
# Configuration used to train the model (not sure if needed)
config = ppo.DEFAULT_CONFIG.copy()
# config['framework'] = "tf"
# config['env'] = 'cyborg'
# config['num_gpus'] = 1
# config["num_workers"] = 3
# config['horizon'] = 1000
# config['train_batch_size'] = 512
# config['sgd_minibatch_size'] = 32
# config['rollout_fragment_length'] = 100
# # config['model'] = {
# #     "fcnet_hiddens": [512, 512],
# #     "fcnet_activation": "relu"
# # }
# config['batch_mode'] = "truncate_episodes"
# config['lambda'] = 0.95
# config['kl_coeff'] = 0.5
# config['clip_rewards'] = True
# config['clip_param'] = 0.1
# config['vf_clip_param'] = 10.0
# config['entropy_coeff'] = 0.01
# config['vf_share_layers'] = True

# Load the model from path
trained_model_path = "C:/Users/Rafael/ray_results/experiment_2022-07-05_11-56-56/experiment_cyborg_18c39_00000_0_2022-07-05_11-56-56/checkpoint_005000/checkpoint-5000"
agent = ppo.PPO(config=config, env="cyborg")
agent.restore(trained_model_path)

2022-07-14 10:27:00,972	INFO trainable.py:589 -- Restored on 127.0.0.1 from checkpoint: C:/Users/Rafael/ray_results/experiment_2022-07-05_11-56-56/experiment_cyborg_18c39_00000_0_2022-07-05_11-56-56/checkpoint_005000/checkpoint-5000
2022-07-14 10:27:00,973	INFO trainable.py:598 -- Current state after restoring: {'_iteration': 5000, '_timesteps_total': None, '_time_total': 115647.05923509598, '_episodes_total': 25000}


In [23]:
MAX_EPS = 100
agent_name = 'Blue'
scenarios = ["Scenario1b", "Scenario2"]
steps = [30, 50]
red_agents = [B_lineAgent, RedMeanderAgent, SleepAgent]

for scenario in scenarios:
    path = (str(inspect.getfile(CybORG))[:-10] + '/Shared/Scenarios/{}.yaml').format(scenario)
    print("EVALUATION FOR", scenario)
    for num_steps in steps:
        for red_agent in red_agents:
            cyborg = CybORG(path, 'sim', agents={'Red': red_agent})
            wrapped_cyborg = wrap(cyborg)
            observation = wrapped_cyborg.reset()
            # observation = cyborg.reset().observation
            action_space = wrapped_cyborg.get_action_space(agent_name)
            # action_space = cyborg.get_action_space(agent_name)
            total_reward = []
            actions = []
            for i in range(MAX_EPS):
                r = []
                a = []
                # cyborg.env.env.tracker.render()
                for j in range(num_steps):
                    # action = agent.get_action(observation, action_space)
                    # action, _states = agent.predict(observation)
                    action = agent.compute_single_action(observation) # RLlib 
                    # action, state_out, _ = agent.compute_single_action(observation, state) # RLlib with Attention 
                    observation, rew, done, info = wrapped_cyborg.step(action)
                    # result = cyborg.step(agent_name, action)
                    r.append(rew)
                    # r.append(result.reward)
                    a.append((str(cyborg.get_last_action('Blue')), str(cyborg.get_last_action('Red'))))
                total_reward.append(sum(r))
                actions.append(a)
                # observation = cyborg.reset().observation
                observation = wrapped_cyborg.reset()
            print(f'Average reward for red agent {red_agent.__name__} and steps {num_steps} is: {mean(total_reward)} with a standard deviation of {stdev(total_reward)}')

EVALUATION FOR Scenario1b
Average reward for red agent B_lineAgent and steps 30 is: -9.336 with a standard deviation of 3.8067595382185857
Average reward for red agent RedMeanderAgent and steps 30 is: -11.942 with a standard deviation of 2.712454626534811
Average reward for red agent SleepAgent and steps 30 is: -0.01 with a standard deviation of 0.1
Average reward for red agent B_lineAgent and steps 50 is: -14.744 with a standard deviation of 5.654377405018508
Average reward for red agent RedMeanderAgent and steps 50 is: -24.82 with a standard deviation of 6.035166304521554
Average reward for red agent SleepAgent and steps 50 is: 0.0 with a standard deviation of 0.0
EVALUATION FOR Scenario2
Average reward for red agent B_lineAgent and steps 30 is: -159.87899999999996 with a standard deviation of 82.86070859757913
Average reward for red agent RedMeanderAgent and steps 30 is: -30.994999999999997 with a standard deviation of 16.444552535134793
Average reward for red agent SleepAgent and s