## First create network


In [1]:
from yawning_titan.networks.node import Node
from yawning_titan.networks.network import Network

In [2]:
# Instantiate the Network
network = Network()

# Instantiate the Node's and add them to the Network
 
pc_1 = Node("PC 1")
network.add_node(pc_1)
pc_1.x_pos = 0.01
pc_1.y_pos = 1.00
pc_1.entry_node = True
pc_1.vulnerability = 0.3

pc_2 = Node("PC 2")
network.add_node(pc_2)
pc_2.x_pos = -0.50
pc_2.y_pos = 0.30
pc_2.vulnerability = 0.3

pc_3 = Node("PC 3")
network.add_node(pc_3)
pc_3.x_pos = -0.50
pc_3.y_pos = -0.30
pc_3.vulnerability = 0.3

pc_4 = Node("PC 4")
network.add_node(pc_4)
pc_4.x_pos = 0.50
pc_4.y_pos = 0.01
pc_4.vulnerability = 0.3

pc_5 = Node("PC 5")
network.add_node(pc_5)
pc_5.x_pos = 0.01
pc_5.y_pos = -1.00
pc_5.high_value_node = True
pc_5.vulnerability = 0.5


# Add the edges between Node's
network.add_edge(pc_1, pc_2)
network.add_edge(pc_1, pc_4)
network.add_edge(pc_2, pc_3)
network.add_edge(pc_4, pc_5)
network.add_edge(pc_3, pc_5)


# Reset the entry nodes, high value nodes, and vulnerability scores by calling .setup()
# network.reset()

# View the Networks Node Details
network.show(verbose=True)



UUID                                  Name    High Value Node    Entry Node      Vulnerability  Position (x,y)
------------------------------------  ------  -----------------  ------------  ---------------  ----------------
ec8f26df-9d81-4f4f-906b-3c91731dc429  PC 1    False              True                      0.3  0.01, 1.00
60394c53-491b-4784-aded-690e3ecf5d39  PC 2    False              False                     0.3  -0.50, 0.30
79cac5ff-3638-423d-b3d0-3cef1f3f6753  PC 3    False              False                     0.3  -0.50, -0.30
96450e63-2c41-4bf4-b040-413109a94122  PC 4    False              False                     0.3  0.50, 0.01
213d4a78-4fff-4bad-824d-4f68f5ef779b  PC 5    True               False                     0.5  0.01, -1.00


## Creating environment and training


In [3]:
# load notebook extension
%load_ext tensorboard

In [4]:
#Import packages - SB3
import time
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import A2C, DQN, PPO
from stable_baselines3.ppo import MlpPolicy as PPOMlp

from yawning_titan.envs.generic.core.blue_interface import BlueInterface
from yawning_titan.envs.generic.core.red_interface import RedInterface
from yawning_titan.envs.generic.generic_env import GenericNetworkEnv
from yawning_titan.envs.generic.core.action_loops import ActionLoop
from yawning_titan.envs.generic.core.network_interface import NetworkInterface
from yawning_titan.networks.network_db import default_18_node_network
import yawning_titan.game_modes
from yawning_titan.envs.generic.core.action_loops import ActionLoop

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
## Using DB
from yawning_titan.game_modes.game_mode_db import GameModeDB, GameModeSchema
from yawning_titan.db.doc_metadata import DocMetadataSchema
db = GameModeDB()

In [6]:
# Example
#results = db.search(GameModeSchema.CONFIGURATION.RED.AGENT_ATTACK.IGNORES_DEFENCES.all([True]))
#print(results)

#Finding my game mode
#author = db.search(DocMetadataSchema.AUTHOR == "Hannah Harrison")
#print(author)

In [7]:
db.show(True)

JSONDecodeError: Extra data: line 1 column 15878 (char 15877)

In [None]:
game_mode = db.get("919da33c-7bc9-4d29-99eb-097a7e9bb016")

In [None]:
## Build network interface
network_interface = NetworkInterface(game_mode=game_mode, network=network)

In [None]:
## Name agents
red = RedInterface(network_interface)
blue = BlueInterface(network_interface)

In [None]:
## Create environment
env = GenericNetworkEnv(red, blue, network_interface)

In [None]:
## Check compliant with OpenAI gym
check_env(env, warn=True)
_ = env.reset()

In [None]:
## Initialise environment callback
eval_callback = EvalCallback(Monitor(env), eval_freq=1000, deterministic=False, render=False)

In [None]:
## Create agent
agent = PPO(PPOMlp, env, verbose=1, tensorboard_log="./logs/ppo_YT_simple_tensorboard/")

In [None]:
## Train agent for 1M timesteps
agent.learn(total_timesteps=100000, n_eval_episodes=1, callback=eval_callback)
%tensorboard --logdir ./logs/ppo_YT_simple_tensorboard/

In [None]:
## Save trained agent
agent_name = "ppo-simple-5PC"
agent.save(agent_name)

## Agent evaluation

In [None]:

# Create a new environment for evaluation
eval_env =GenericNetworkEnv(red, blue, network_interface)

# Evaluate the model with 10 evaluation episodes and deterministic=True
mean_reward, std_reward = evaluate_policy(agent, eval_env, n_eval_episodes=10, deterministic=True)

# Print the results
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

In [None]:
loop = ActionLoop(eval_env, agent, episode_count=1)
loop.gif_action_loop(save_gif=True, render_network=True, gif_output_directory='.\gifs', webm_output_directory='.\gifs')