## First create network


In [1]:
from yawning_titan.networks.node import Node
from yawning_titan.networks.network import Network

#Import packages - SB3
import time
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import A2C, DQN, PPO
from stable_baselines3.ppo import MlpPolicy as PPOMlp

from yawning_titan.envs.generic.core.blue_interface import BlueInterface
from yawning_titan.envs.generic.core.red_interface import RedInterface
from yawning_titan.envs.generic.generic_env import GenericNetworkEnv
from yawning_titan.envs.generic.core.action_loops import ActionLoop
from yawning_titan.envs.generic.core.network_interface import NetworkInterface
from yawning_titan.networks.network_db import default_18_node_network
import yawning_titan.game_modes
from yawning_titan.envs.generic.core.action_loops import ActionLoop

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Instantiate the Network
network = Network()

# Instantiate the Node's and add them to the Network
 
pc_1 = Node("PC 1")
network.add_node(pc_1)
pc_1.x_pos = -1.00
pc_1.y_pos = 0.01
pc_1.entry_node = True
pc_1.vulnerability = 0.5

pc_2 = Node("PC 2")
network.add_node(pc_2)
pc_2.x_pos = -0.50
pc_2.y_pos = 0.01
pc_2.vulnerability = 0.5

pc_3 = Node("PC 3")
network.add_node(pc_3)
pc_3.x_pos = 0.00
pc_3.y_pos = 0.01
pc_3.vulnerability = 0.5

pc_4 = Node("PC 4")
network.add_node(pc_4)
pc_4.x_pos = 0.50
pc_4.y_pos = 0.01
pc_4.vulnerability = 0.5

pc_5 = Node("PC 5")
network.add_node(pc_5)
pc_5.x_pos = 1.00
pc_5.y_pos = 0.01
pc_5.high_value_node = True
pc_5.vulnerability = 0.5


# Add the edges between Node's
network.add_edge(pc_1, pc_2)
network.add_edge(pc_2, pc_3)
network.add_edge(pc_3, pc_4)
network.add_edge(pc_4, pc_5)



# Reset the entry nodes, high value nodes, and vulnerability scores by calling .setup()
# network.reset()

# View the Networks Node Details
network.show(verbose=True)

UUID                                  Name    High Value Node    Entry Node      Vulnerability  Position (x,y)
------------------------------------  ------  -----------------  ------------  ---------------  ----------------
20d8ed52-7ff6-4979-be62-1244821fd527  PC 1    False              True                      0.5  -1.00, 0.01
d626d0a9-b31e-4e90-97cf-0c8208e35245  PC 2    False              False                     0.5  -0.50, 0.01
6fed26e9-788d-48c8-b56f-251a6a57e6f1  PC 3    False              False                     0.5  0.00, 0.01
32c70577-46ef-4002-8a79-de03e13821c9  PC 4    False              False                     0.5  0.50, 0.01
0227c961-870c-43d5-8c73-c564af3b9a5e  PC 5    True               False                     0.5  1.00, 0.01


In [3]:
## Using DB
from yawning_titan.game_modes.game_mode_db import GameModeDB, GameModeSchema
from yawning_titan.db.doc_metadata import DocMetadataSchema
db = GameModeDB()

In [4]:
db.show(True)

name               author              locked    uuid
-----------------  ------------------  --------  ------------------------------------
DCBO Agent Config  dstl/YAWNING-TITAN  True      bac2cb9d-b24b-426c-88a5-5edd0c2de413
Default Game Mode  dstl/YAWNING-TITAN  True      900a704f-6271-4994-ade7-40b74d3199b1
Low skill red      dstl/YAWNING-TITAN  True      3ccd9988-8781-4c3e-9c75-44cc987ae6af
simple_mode        Hannah Harrison     False     919da33c-7bc9-4d29-99eb-097a7e9bb016
no_zero_day        Hannah Harrison     False     fe76bb6c-4806-41af-aaf3-ac78d2942021


In [5]:
simple_mode = db.get("919da33c-7bc9-4d29-99eb-097a7e9bb016")

In [6]:
## Build network interface
s_network_interface = NetworkInterface(game_mode=simple_mode, network=network)

In [7]:
## Name agents
red = RedInterface(s_network_interface)
blue = BlueInterface(s_network_interface)

In [8]:
## Create environment
s_env = GenericNetworkEnv(red, blue, s_network_interface)

In [9]:
## Check compliant with OpenAI gym
check_env(s_env, warn=True)
_ = s_env.reset()

## Creating a dataset of states and actions
https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/pretraining.ipynb#scrollTo=Tgx4AMZo8anP

In [10]:
import gymnasium as gym
from tqdm import tqdm
import numpy as np

print(f"{gym.__version__}")

0.26.3


In [11]:
import torch as th
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

In [12]:
from stable_baselines3 import PPO, A2C, SAC, TD3
from stable_baselines3.common.evaluation import evaluate_policy

In [13]:
s_ppo_expert = PPO.load('./ppo-s-linear.zip', env= s_env, device='auto', custom_objects=None, print_system_info=False, force_reset=True)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [14]:
mean_reward, std_reward = evaluate_policy(s_ppo_expert, s_env, n_eval_episodes=10)

print(f"Mean reward = {mean_reward} +/- {std_reward}")




Mean reward = -88.45666600763798 +/- 0.3818813911574492


In [15]:
num_interactions = int(4e4)

In [19]:
if isinstance(s_env.action_space, gym.spaces.Box):
    expert_observations = np.empty((num_interactions,) + s_env.observation_space.shape)
    expert_actions = np.empty((num_interactions,) + (s_env.action_space.shape[0],))

else:
    expert_observations = np.empty((num_interactions,) + s_env.observation_space.shape)
    expert_actions = np.empty((num_interactions,) + s_env.action_space.shape)

obs = s_env.reset()

for i in tqdm(range(num_interactions)):
    action, _ = s_ppo_expert.predict(obs, deterministic=True)
    expert_observations[i] = obs
    expert_actions[i] = action
    obs, reward, terminated, truncated = s_env.step(action)
    done = terminated 
    if done:
        obs = s_env.reset()

np.savez_compressed(
    "s_expert_data",
    expert_actions=expert_actions,
    expert_observations=expert_observations,
)

100%|██████████████████████████████████████████████████████████████████████████| 40000/40000 [00:31<00:00, 1254.02it/s]


In [20]:
print(truncated)

{'initial_state': {'20d8ed52-7ff6-4979-be62-1244821fd527': 1, 'd626d0a9-b31e-4e90-97cf-0c8208e35245': 1, '6fed26e9-788d-48c8-b56f-251a6a57e6f1': 0, '32c70577-46ef-4002-8a79-de03e13821c9': 0, '0227c961-870c-43d5-8c73-c564af3b9a5e': 0}, 'initial_blue_view': {'20d8ed52-7ff6-4979-be62-1244821fd527': 1, 'd626d0a9-b31e-4e90-97cf-0c8208e35245': 1, '6fed26e9-788d-48c8-b56f-251a6a57e6f1': 0, '32c70577-46ef-4002-8a79-de03e13821c9': 0, '0227c961-870c-43d5-8c73-c564af3b9a5e': 0}, 'initial_vulnerabilities': {'20d8ed52-7ff6-4979-be62-1244821fd527': 0.5, 'd626d0a9-b31e-4e90-97cf-0c8208e35245': 0.5, '6fed26e9-788d-48c8-b56f-251a6a57e6f1': 0.5, '32c70577-46ef-4002-8a79-de03e13821c9': 0.5, '0227c961-870c-43d5-8c73-c564af3b9a5e': 0.01}, 'initial_red_location': Node(uuid='d626d0a9-b31e-4e90-97cf-0c8208e35245', name='PC 2', high_value_node=False, entry_node=False, vulnerability=0.5, x_pos=-0.5, y_pos=0.01), 'initial_graph': {Node(uuid='20d8ed52-7ff6-4979-be62-1244821fd527', name='PC 1', high_value_node=Fal

In [21]:
b = np.load('s_expert_data.npz')
print(b.files)

['expert_actions', 'expert_observations']


In [22]:
b['expert_actions'][34] 

1.0

In [23]:
b['expert_observations'][-1] #54!

array([0.        , 1.        , 0.        , 0.        , 0.        ,
       1.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 1.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 1.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       1.        , 1.        , 0.        , 0.        , 0.        ,
       0.5       , 0.5       , 0.5       , 0.5       , 0.01      ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       1.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 1.        ,
       0.69999999])