In [19]:
import inspect
import time
from statistics import mean, stdev
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent
from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers.EnumActionWrapper import EnumActionWrapper
from CybORG.Agents.Wrappers.rllib_wrapper import RLlibWrapper
from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
from CybORG.Agents.Wrappers.ReduceActionSpaceWrapper import ReduceActionSpaceWrapper
from CybORG.Agents.Wrappers import ChallengeWrapper
import os
from ray.tune.registry import register_env
from ray.rllib.algorithms.ppo import PPOConfig
import warnings
import ray
import numpy as np
from tqdm import trange
warnings.filterwarnings('ignore')


In [21]:
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import numpy as np

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
from keras.layers import Bidirectional
from keras.models import Sequential
from keras.layers.core import Activation, Dropout, Dense
from keras.layers import Flatten, LSTM
from keras.layers import Bidirectional
from ray import tune
from ray.tune.registry import register_env

sequence_length=3
state_len = 52
num_action = 1
encoding_len = state_len + num_action

class WorldMovelEnv(gym.Env):

    def __init__(self):
        
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(52,))
        self.action_space = gym.spaces.Discrete(145)
        
        self.step_count = 0
        self.st_model = Sequential()
        self.st_model.add(Bidirectional(LSTM(256, activation='relu', return_sequences=True), input_shape=(sequence_length, encoding_len)))
        self.st_model.add(Flatten())
        self.st_model.add(Dense(512, activation='relu'))
        self.st_model.add(Dense(512, activation='relu'))
        self.st_model.add(Dense(512, activation='relu'))
        self.st_model.add(Dense(9674, activation='softmax'))
        self.st_model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=[tf.keras.metrics.Accuracy()])
        self.st_model.load_weights('ClassStateModel')
        
        self.r_model = Sequential()
        self.r_model.add(Bidirectional(LSTM(256, activation='relu', return_sequences=True), input_shape=(sequence_length, state_len)))
        self.r_model.add(Flatten())
        self.r_model.add(Dense(256, activation='relu'))
        self.r_model.add(Dense(128, activation='relu'))
        self.r_model.add(Dense(1, activation='linear'))
        self.r_model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())
        self.r_model.load_weights('RewardModel')
        
        self.index_to_state = np.load('index_to_state.npy', allow_pickle=True).item()
        self.state_to_index = np.load('state_to_index.npy', allow_pickle=True).item()
        
        self.state_action = np.zeros((sequence_length,encoding_len))
        self.state = np.zeros((sequence_length,state_len))

    def step(self, action):
        self.step_count += 1
        
        self.state_action[0,-1] = action
        obs_index_probs = self.st_model.predict(np.array([self.state_action]))
        #obs_index = np.argmax(obs_index_probs)
        obs_index = np.random.choice(np.arange(9674), p=obs_index_probs[0])
        new_state = self.index_to_state[obs_index]
        new_state = np.frombuffer(new_state)
        
        reward = self.r_model.predict(np.array([self.state]))
        
        for i in range(1,3):
            self.state[i,:] = self.state[i-1,:]
            
        self.state[0,:] = new_state
        
        for i in range(1,3):
            self.state_action[i,:] = self.state_action[i-1,:]
        
        self.state_action[0,:] = np.concatenate([new_state, [0]])
        
    
        
        done = self.step_count == 100
        return new_state, reward[0][0], done, {}

    def reset(self):
        step_count = 0
        
        self.state_action = np.zeros((sequence_length,encoding_len))
        self.state = np.zeros((sequence_length,state_len))
        
        obs_index_probs = self.st_model.predict(np.array([self.state_action]))
        #obs_index = np.argmax(obs_index_probs)
        obs_index = np.random.choice(np.arange(9674), p=obs_index_probs[0])
        new_state = self.index_to_state[obs_index]
        new_state = np.frombuffer(new_state)
      
        return new_state
    

    def render(self, mode='human', close=False):
        pass

    def close(self):
        pass
    
def env_creator(config):
    return WorldMovelEnv() 

register_env(name="DreamCybORG", env_creator=env_creator)

In [26]:
MAX_EPS = 100
agent_name = 'Blue'

def wrap(env):
    return RLlibWrapper(agent_name="Blue", env=env)

def env_creator(env_config: dict):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = RLlibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
    return env

register_env(name="CybORG", env_creator=env_creator)

config = PPOConfig().resources(num_gpus=1).environment(env = 'smac').rollouts(num_rollout_workers=1)

trainer = config.build("CybORG")
path = 'results/APPO/explore/PPO_CybORG_5000a_00000_0_exploration_config=type_RE3_embeds_dim_128_beta_schedule_constant_sub_exploration_type_StochasticSampling_2022-12-05_22-01-04/checkpoint_004902'
path = 'results/APPO/explore/PPO_CybORG_5000a_00001_1_exploration_config=type_StochasticSampling,fcnet_activation=tanh,fcnet_hiddens=256_256_2022-12-06_02-59-07/checkpoint_004902'
path = 'results/checkpoint_000234'
trainer.from_checkpoint(path)

path = str(inspect.getfile(CybORG))
path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
obs = []
#print(f'using CybORG v{cyborg_version}, {scenario}\n')
for red_agent in [B_lineAgent]:#, RedMeanderAgent]:

    cyborg = CybORG(path, 'sim', agents={'Red': red_agent})
    wrapped_cyborg = wrap(cyborg)

    observation = wrapped_cyborg.reset()
    obs.append(observation)
    # observation = cyborg.reset().observation

    action_space = wrapped_cyborg.get_action_space(agent_name)
    # action_space = cyborg.get_action_space(agent_name)
    total_reward = []
    actions = []
    for i in trange(MAX_EPS):
        r = []
        #a = []
        # cyborg.env.env.tracker.render()
        for j in range(100):
            action = trainer.compute_single_action(observation, explore=False)
            #action_vec = np.zeros(145)
            #action_vec[int(action)] = 1
            #action = agent.get_action(observation, action_space)
            observation, rew, done, info = wrapped_cyborg.step(action)
            obs.append(observation)
            #actions.append(action_vec)
            # result = cyborg.step(agent_name, action)
            r.append(rew)
            # r.append(result.reward)
           # a.append((str(cyborg.get_last_action('Blue')), str(cyborg.get_last_action('Red'))))
        total_reward.append(sum(r))
        # observation = cyborg.reset().observation
        observation = wrapped_cyborg.reset()
    print(f'Average reward for red agent {red_agent.__name__} and steps {100} is: {mean(total_reward):.1f} with a standard deviation of {stdev(total_reward):.1f}')
    #return mean(total_reward), np.mean(np.array(obs), axis=0),  np.mean(np.array(actions), axis=0)

2023-01-03 13:01:18,343	INFO trainable.py:164 -- Trainable.setup took 11.563 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(RolloutWorker pid=18330)[0m Instructions for updating:
[2m[36m(RolloutWorker pid=18330)[0m Call initializer instance with the dtype argument instead of passing it to the constructor
[2m[36m(RolloutWorker pid=18330)[0m Instructions for updating:
[2m[36m(RolloutWorker pid=18330)[0m Call initializer instance with the dtype argument instead of passing it to the constructor
[2m[36m(RolloutWorker pid=18330)[0m Instructions for updating:
[2m[36m(RolloutWorker pid=18330)[0m Call initializer instance with the dtype argument instead of passing it to the constructor
[2m[36m(RolloutWorker pid=18331)[0m Instructions for updating:
[2m[36m(RolloutWorker pid=18331)[0m Call initializer instance with the dtype argument instead of passing it to the constructor
[2m[36m(RolloutW





2023-01-03 13:01:33,062	INFO trainable.py:164 -- Trainable.setup took 14.698 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
100%|██████████| 100/100 [01:36<00:00,  1.04it/s]

Average reward for red agent B_lineAgent and steps 100 is: -1134.5 with a standard deviation of 18.3



