## Initial Code

This is all some intro code just to visualize some of the settings and get a baseline. You can run through it if you want to get familiar with what the scenario arguments, agents, and training looks like.

In [1]:
testfile = 'data.yaml'


def writeToYAML():
    with open(testfile, 'w') as f:
        data = yaml.dump(curr_data, f, sort_keys=False, default_flow_style=False)

In [2]:
def print_nested_dict(dict_obj, indent = 0):
    ''' Pretty Print nested dictionary with given indent level  
    '''
    # Iterate over all key-value pairs of dictionary
    for key, value in dict_obj.items():
        # If value is dict type, then print nested dict 
        if isinstance(value, dict):
            print(' ' * indent, key, ':', '{')
            print_nested_dict(value, indent + 4)
            print(' ' * indent, '}')
        else:
            print(' ' * indent, key, ':', value)

In [4]:
scenario_args={
    "num_hosts": 5,         # Number of hosts in the network 
    "num_services": 3,      # Number of services on the network (ssh, ftp, http)
    "num_os": 2,            # Number of operatings systems on the network (windows, linux, etc)
    "num_processes": 2,     # Number of processes on the network (tomcat, daclsvc, etc)
    "num_exploits": None,   # 
    "num_privescs": None,
    "r_sensitive": 10,
    "r_user": 10,
    "exploit_cost": 1,
    "exploit_probs": 1.0,
    "privesc_cost": 1,
    "privesc_probs": 1.0,
    "service_scan_cost": 1,
    "os_scan_cost": 1,
    "subnet_scan_cost": 1,
    "process_scan_cost": 1,
    "uniform": False,
    "alpha_H": 2.0,
    "alpha_V": 2.0,
    "lambda_V": 1.0,
    "restrictiveness": 5,
    "random_goal": False,
    "base_host_value": 1,
    "host_discovery_value": 1,
    "seed": None,
    "name": None,
    "step_limit": None}

#Scenario Generator Parameter List: https://networkattacksimulator.readthedocs.io/en/latest/reference/scenarios/generator.html#scenario-generator

In [5]:
import nasim
import json
env = nasim.generate(**scenario_args)
env = nasim.make_benchmark("huge-gen")
env = nasim.load("unreachable.yaml")

scenario_desc = env.scenario.get_description()
scenario_dict = env.scenario.scenario_dict
print_nested_dict(scenario_desc,4)
print_nested_dict(scenario_dict,6)


     Name : unreachable
     Type : static
     Subnets : 4
     Hosts : 3
     OS : 1
     Services : 1
     Processes : 1
     Exploits : 1
     PrivEscs : 1
     Actions : 18
     Observation Dims : (4, 14)
     States : 576
     Step Limit : 1000
       subnets : [1, 1, 1, 1]
       topology : [[1, 1, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1], [0, 1, 1, 1]]
       os : ['linux']
       services : ['ssh']
       processes : ['tomcat']
       sensitive_hosts : {
           (2, 0) : 100
           (3, 0) : 100
       }
       exploits : {
           e_ssh : {
               service : ssh
               os : linux
               prob : 0.8
               cost : 1
               access : 1
           }
       }
       privilege_escalation : {
           pe_tomcat : {
               process : tomcat
               os : linux
               prob : 1.0
               cost : 1
               access : 2
           }
       }
       os_scan_cost : 1
       service_scan_cost : 1
       subnet_scan_cos

In [33]:
#env.get_minimum_actions()

In [6]:
from nasim.agents.ql_agent import TabularQLearningAgent

ql_agent = TabularQLearningAgent(env, verbose=1, training_steps=50000)
ql_agent.train()

  from .autonotebook import tqdm as notebook_tqdm



Running Tabular Q-Learning with config:
{'env': <nasim.envs.environment.NASimEnv object at 0x11ebe0b20>,
 'exploration_steps': 10000,
 'final_epsilon': 0.05,
 'gamma': 0.99,
 'kwargs': {},
 'lr': 0.001,
 'seed': None,
 'self': <nasim.agents.ql_agent.TabularQLearningAgent object at 0x11eb01210>,
 'training_steps': 50000,
 'verbose': 1}

Starting training


AttributeError: module 'numpy' has no attribute 'int'.
`np.int` was a deprecated alias for the builtin `int`. To avoid this error in existing code, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

## Current Code 
Here is the main code to test/run.

In [6]:
# Initial scenario arguments... we will be editing the number of hosts by marking actions involving them as invalid
scenario_args={
    "num_hosts": 5,
    "num_services": 3,
    "num_os": 2,
    "num_processes": 2,
    "num_exploits": None,
    "num_privescs": None,
    "r_sensitive": 10,
    "r_user": 10,
    "exploit_cost": 1,
    "exploit_probs": 1.0,
    "privesc_cost": 1,
    "privesc_probs": 1.0,
    "service_scan_cost": 1,
    "os_scan_cost": 1,
    "subnet_scan_cost": 1,
    "process_scan_cost": 1,
    "uniform": False,
    "alpha_H": 2.0,
    "alpha_V": 2.0,
    "lambda_V": 1.0,
    "restrictiveness": 5,
    "random_goal": False,
    "base_host_value": 1,
    "host_discovery_value": 1,
    "seed": None,
    "name": None,
    "step_limit": None}

In [7]:
# Defining Python user-defined exceptions
class SensitiveHostRemovalException(Exception):
    "Raised when selected network host cannot be removed (sensitive host needs to remain in network)"
    pass

class PublicHostRemovalException(Exception):
    "Raised when selected network host cannot be removed (public host to enter the network... specific to this configuration)"
    pass

In [8]:
# Import necessary libraries, including which methods will be redefined
import nasim
import random
from nasim.envs.action import Action
from nasim.agents.dqn_agent import DQNAgent
from nasim.envs.environment import NASimEnv

# User-defined Python method to check whether the selected blocked_host is valid to select
def check_host_valid(self, blocked_host):
    if blocked_host == -1:
        return
    elif self.env.network.address_space[blocked_host] in self.env.network.get_sensitive_hosts():
        raise SensitiveHostRemovalException
    elif blocked_host == 0:
        raise PublicHostRemovalException
    else:
        return
# Setting the method
DQNAgent.check_host_valid = check_host_valid
    
# Redefining the DQNAgent run_train_episode method
def run_train_episode(self, step_limit):
        done = False
        env_step_limit_reached = False
        steps = 0
        episode_return = 0
        max_host_index = len(self.env.network.host_num_map) - 1
        
        # Choosing random host index to be invalid... try/catch loop until valid host selected to block. Note: If -1, no host will be marked invalid
        blocked_host = -1
        if self.steps_done > 0:
            while True:
                try:
                    blocked_host = random.randint(-1,max_host_index)
                    self.check_host_valid(blocked_host)
                    break
                except SensitiveHostRemovalException:
                    pass
                except PublicHostRemovalException:
                    pass
                
        o = self.env.reset()
        
        # If you wanted to see which host was blocked... used for the logging
        print("Blocked host index:  " + str(blocked_host))
        
        while not done and not env_step_limit_reached: #and steps < step_limit:
            # Keep generating an action in the action space until it does not involve a blocked host
            while True:
                a = self.get_egreedy_action(o, self.get_epsilon())
                
                if blocked_host == -1:
                    break
                else:
                    action = self.env.action_space.get_action(a)
                    target_host_index = self.env.network.host_num_map[action.target]
                    if target_host_index != blocked_host:
                        break
                
            next_o, r, done, env_step_limit_reached, _ = self.env.step(a)
            self.replay.store(o, a, next_o, r, done)
            self.steps_done += 1
            loss, mean_v = self.optimize()
            
            o = next_o
            episode_return += r
            steps += 1

        return episode_return, steps, self.env.goal_reached()

# Setting the method
DQNAgent.run_train_episode = run_train_episode

# Training function... redefined because it wasn't converging originally
def train(self):
    if self.verbose:
        print("\nStarting training")

    num_episodes = 0
    training_steps_remaining = self.training_steps
    og_env = self.env
    
    while self.steps_done < self.training_steps:
        self.env = og_env
        ep_results = self.run_train_episode(training_steps_remaining)
        ep_return, ep_steps, goal = ep_results
        num_episodes += 1
        training_steps_remaining -= ep_steps

        self.logger.add_scalar("episode", num_episodes, self.steps_done)
        self.logger.add_scalar(
            "epsilon", self.get_epsilon(), self.steps_done
        )
        self.logger.add_scalar(
            "episode_return", ep_return, self.steps_done
        )
        self.logger.add_scalar(
            "episode_steps", ep_steps, self.steps_done
        )
        self.logger.add_scalar(
            "episode_goal_reached", int(goal), self.steps_done
        )

        if num_episodes % 10 == 0 and self.verbose:
            print(f"\nEpisode {num_episodes}:")
            print(f"\tsteps done = {self.steps_done} / "
                    f"{self.training_steps}")
            print(f"\treturn = {ep_return}")
            print(f"\tgoal = {goal}")

    self.logger.close()
    if self.verbose:
        print("Training complete")
        print(f"\nEpisode {num_episodes}:")
        print(f"\tsteps done = {self.steps_done} / {self.training_steps}")
        print(f"\treturn = {ep_return}")
        print(f"\tgoal = {goal}")
# Set the method        
DQNAgent.train = train

# You can switch to a different benchmark if you want... like the scenario args posted or your own
env = nasim.make_benchmark("small")
# Initializing and training agent
dqn_agent = DQNAgent(env, verbose=1, training_steps=50000000)
dqn_agent.train()


Running DQN with config:
{'batch_size': 32,
 'env': <nasim.envs.environment.NASimEnv object at 0x112635ae0>,
 'exploration_steps': 10000,
 'final_epsilon': 0.05,
 'gamma': 0.99,
 'hidden_sizes': [64, 64],
 'kwargs': {},
 'lr': 0.001,
 'replay_size': 10000,
 'seed': None,
 'self': <nasim.agents.dqn_agent.DQNAgent object at 0x12849c5e0>,
 'target_update_freq': 1000,
 'training_steps': 50000000,
 'verbose': 1}

Using Neural Network running on device=cpu:
DQN(
  (layers): ModuleList(
    (0): Linear(in_features=207, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
  )
  (out): Linear(in_features=64, out_features=72, bias=True)
)

Starting training
Blocked host index:  -1


ValueError: setting an array element with a sequence. The requested array would exceed the maximum number of dimension of 1.

In [24]:
dqn_agent.run_eval_episode(render=False)

ValueError: could not broadcast input array from shape (14,) into shape (23,)

## Past Attempts

This was some code that didn't end up working if you wanted to see a previous attempt

In [None]:
import numpy as np

capacity = 10
s_dims = (5,)
s_buf = np.zeros((capacity, *s_dims), dtype=np.float32)
#test_tuple.resize(test_tuple, [3,2])

print(s_buf)

In [None]:
import nasim
import random
from nasim.agents.dqn_agent import DQNAgent

def run_train_episode(self, step_limit):
        done = False
        env_step_limit_reached = False
        steps = 0
        episode_return = 0
        
        o = self.env.reset()
        
        while not done and not env_step_limit_reached: #and steps < step_limit:
            a = self.get_egreedy_action(o, self.get_epsilon())
        
            next_o, r, done, env_step_limit_reached, _ = self.env.step(a)
            self.replay.store(o, a, next_o, r, done)
            self.steps_done += 1
            loss, mean_v = self.optimize()
            
            o = next_o
            episode_return += r
            steps += 1

        return episode_return, steps, self.env.goal_reached()
    
DQNAgent.run_train_episode = run_train_episode

def train(self):
    if self.verbose:
        print("\nStarting training")

    num_episodes = 0
    training_steps_remaining = self.training_steps
    max_hosts = (self.env.scenario.get_description())['Hosts']
    max_obs_dim = self.env.observation_space.shape
    
    while self.steps_done < self.training_steps:
        if self.steps_done > 0:
            print(self.env.network.address_space)
            print(self.env.network.host_num_map)
            print(self.env.network.subnets)
            print(self.env.network.topology)
            print(self.env.network.firewall)
            print(self.env.network.address_space)
            print(self.env.network.address_space_bounds)
            print(self.env.network.sensitive_addresses)
            print(self.env.network.sensitive_hosts)

            self.env.observation_space = prev_observation_space
            self.num_actions = prev_num_actions
            self.obs_dim = prev_obs_dim
            self.replay = ReplayMemory(prev_replay_size,
                                   #self.obs_dim,
                                   #self.device)
            
            prev_observation_space = self.env.observation_space
            prev_num_actions = self.num_actions
            prev_obs_dim = self.obs_dim
            prev_replay = self.replay
            
            scenario_args.update(num_hosts=random.randint(3,max_hosts))
            
            self.env =  nasim.generate(**scenario_args)
            self.env.observation_space = prev_observation_space
            self.num_actions = prev_num_actions
            self.obs_dim = prev_obs_dim
            self.replay = prev_replay
            
        ep_results = self.run_train_episode(training_steps_remaining)
        ep_return, ep_steps, goal = ep_results
        num_episodes += 1
        training_steps_remaining -= ep_steps

        self.logger.add_scalar("episode", num_episodes, self.steps_done)
        self.logger.add_scalar(
            "epsilon", self.get_epsilon(), self.steps_done
        )
        self.logger.add_scalar(
            "episode_return", ep_return, self.steps_done
        )
        self.logger.add_scalar(
            "episode_steps", ep_steps, self.steps_done
        )
        self.logger.add_scalar(
            "episode_goal_reached", int(goal), self.steps_done
        )

        if num_episodes % 10 == 0 and self.verbose:
            print(f"\nEpisode {num_episodes}:")
            print(f"\tsteps done = {self.steps_done} / "
                    f"{self.training_steps}")
            print(f"\treturn = {ep_return}")
            print(f"\tgoal = {goal}")

    self.logger.close()
    if self.verbose:
        print("Training complete")
        print(f"\nEpisode {num_episodes}:")
        print(f"\tsteps done = {self.steps_done} / {self.training_steps}")
        print(f"\treturn = {ep_return}")
        print(f"\tgoal = {goal}")
            
DQNAgent.train = train

print(scenario_args)
env = nasim.generate(**scenario_args)
dqn_agent = DQNAgent(env, verbose=1, training_steps=100000)
dqn_agent.train()
dqn_agent.run_eval_episode(render=args.render_eval)