In [1]:
#imports for running agent
from itertools import product
import nasim
import pandas as pd
import csv
import numpy as np
import collections
import matplotlib.pyplot as plt
import networkx as nx

#Import yaml and various ways to load in the data from yaml files used in experiments
import yaml
import copy
from yaml.loader import SafeLoader, BaseLoader, FullLoader, UnsafeLoader

# Imports for documentation purposes
from typing import List, Tuple, Dict, Union

# Weird fix for matplotlib does not work without it
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

ModuleNotFoundError: No module named 'pandas'

# DQN Agent With Modifications to Obtain Hosts and Actions

In [None]:
"""An example DQN Agent.
It uses pytorch 1.5+ and tensorboard libraries (HINT: these dependencies can
be installed by running pip install nasim[dqn])
To run 'tiny' benchmark scenario with default settings, run the following from
the nasim/agents dir:
$ python dqn_agent.py tiny
To see detailed results using tensorboard:
$ tensorboard --logdir runs/
To see available hyperparameters:
$ python dqn_agent.py --help
Notes
-----
This is by no means a state of the art implementation of DQN, but is designed
to be an example implementation that can be used as a reference for building
your own agents.
"""
import random
import numpy as np
from gym import error
from pprint import pprint

import nasim

try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import torch.nn.functional as F
    from torch.utils.tensorboard import SummaryWriter
except ImportError as e:
    raise error.DependencyNotInstalled(
        f"{e}. (HINT: you can install dqn_agent dependencies by running "
        "'pip install nasim[dqn]'.)"
    )


class ReplayMemoryDQN:

    def __init__(self, capacity, s_dims, device):
        self.capacity = capacity
        self.device = device
        self.s_buf = np.zeros((capacity, *s_dims), dtype=np.float32)
        self.a_buf = np.zeros((capacity, 1), dtype=np.int64)
        self.next_s_buf = np.zeros((capacity, *s_dims), dtype=np.float32)
        self.r_buf = np.zeros(capacity, dtype=np.float32)
        self.done_buf = np.zeros(capacity, dtype=np.float32)
        self.ptr, self.size = 0, 0

    def store(self, s, a, next_s, r, done):
        self.s_buf[self.ptr] = s
        self.a_buf[self.ptr] = a
        self.next_s_buf[self.ptr] = next_s
        self.r_buf[self.ptr] = r
        self.done_buf[self.ptr] = done
        self.ptr = (self.ptr + 1) % self.capacity
        self.size = min(self.size+1, self.capacity)

    def sample_batch(self, batch_size):
        sample_idxs = np.random.choice(self.size, batch_size)
        batch = [self.s_buf[sample_idxs],
                 self.a_buf[sample_idxs],
                 self.next_s_buf[sample_idxs],
                 self.r_buf[sample_idxs],
                 self.done_buf[sample_idxs]]
        return [torch.from_numpy(buf).to(self.device) for buf in batch]


class DQN(nn.Module):
    """A simple Deep Q-Network """

    def __init__(self, input_dim, layers, num_actions):
        super().__init__()
        self.layers = nn.ModuleList([nn.Linear(input_dim[0], layers[0])])
        for l in range(1, len(layers)):
            self.layers.append(nn.Linear(layers[l-1], layers[l]))
        self.out = nn.Linear(layers[-1], num_actions)

    def forward(self, x):
        for layer in self.layers:
            x = F.relu(layer(x))
        x = self.out(x)
        return x

    def save_DQN(self, file_path):
        torch.save(self.state_dict(), file_path)

    def load_DQN(self, file_path):
        self.load_state_dict(torch.load(file_path))

    def get_action(self, x):
        with torch.no_grad():
            if len(x.shape) == 1:
                x = x.view(1, -1)
            return self.forward(x).max(1)[1]


class DQNAgent:
    """A simple Deep Q-Network Agent """

    def __init__(self,
                 env,
                 seed=None,
                 lr=0.001,
                 training_steps=20000,
                 batch_size=32,
                 replay_size=10000,
                 final_epsilon=0.05,
                 exploration_steps=10000,
                 gamma=0.99,
                 hidden_sizes=[64, 64],
                 target_update_freq=1000,
                 verbose=True,
                 **kwargs):
        
        # This DQN implementation only works for flat actions
        assert env.flat_actions
        self.verbose = verbose
        if self.verbose:
            print(f"\nRunning DQN with config:")
            pprint(locals())

        # set seeds
        self.seed = seed
        if self.seed is not None:
            np.random.seed(self.seed)

        # envirnment setup
        self.env = env

        self.num_actions = self.env.action_space.n
        self.obs_dim = self.env.observation_space.shape

        # logger setup
        self.logger = SummaryWriter()

        # Training related attributes
        self.lr = lr
        self.exploration_steps = exploration_steps
        self.final_epsilon = final_epsilon
        self.epsilon_schedule = np.linspace(1.0,
                                            self.final_epsilon,
                                            self.exploration_steps)
        self.batch_size = batch_size
        self.discount = gamma
        self.steps_done = 0

        # Neural Network related attributes
        self.device = torch.device("cuda"
                                   if torch.cuda.is_available()
                                   else "cpu")
        self.dqn = DQN(self.obs_dim,
                       hidden_sizes,
                       self.num_actions).to(self.device)
        if self.verbose:
            print(f"\nUsing Neural Network running on device={self.device}:")
            print(self.dqn)

        self.target_dqn = DQN(self.obs_dim,
                              hidden_sizes,
                              self.num_actions).to(self.device)
        self.target_update_freq = target_update_freq

        self.optimizer = optim.Adam(self.dqn.parameters(), lr=self.lr)
        self.loss_fn = nn.SmoothL1Loss()

        # replay setup
        self.replay = ReplayMemoryDQN(replay_size, self.obs_dim, self.device)

    def save(self, save_path):
        self.dqn.save_DQN(save_path)

    def load(self, load_path):
        self.dqn.load_DQN(load_path)

    def get_epsilon(self):
        if self.steps_done < self.exploration_steps:
            return self.epsilon_schedule[self.steps_done]
        return self.final_epsilon

    def get_egreedy_action(self, o, epsilon):
        if random.random() > epsilon:
            o = torch.from_numpy(o).float().to(self.device)
            return self.dqn.get_action(o).cpu().item()
        return random.randint(0, self.num_actions-1)

    def optimize(self):
        batch = self.replay.sample_batch(self.batch_size)
        s_batch, a_batch, next_s_batch, r_batch, d_batch = batch

        # get q_vals for each state and the action performed in that state
        q_vals_raw = self.dqn(s_batch)
        q_vals = q_vals_raw.gather(1, a_batch).squeeze()

        # get target q val = max val of next state
        with torch.no_grad():
            target_q_val_raw = self.target_dqn(next_s_batch)
            target_q_val = target_q_val_raw.max(1)[0]
            target = r_batch + self.discount*(1-d_batch)*target_q_val

        # calculate loss
        loss = self.loss_fn(q_vals, target)

        # optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.steps_done % self.target_update_freq == 0:
            self.target_dqn.load_state_dict(self.dqn.state_dict())

        q_vals_max = q_vals_raw.max(1)[0]
        mean_v = q_vals_max.mean().item()
        return loss.item(), mean_v

    def train(self, max_episodes, training_steps):
        if self.verbose:
            print("\nStarting training")

        num_episodes = 0
        training_steps_remaining = training_steps
        temp = copy.copy(training_steps_remaining)

        while num_episodes < max_episodes:
            ep_results = self.run_train_episode(training_steps_remaining)
            ep_return, ep_steps, goal = ep_results
            num_episodes += 1
            training_steps_remaining -= ep_steps

            self.logger.add_scalar("episode", num_episodes, self.steps_done)
            self.logger.add_scalar(
                "epsilon", self.get_epsilon(), self.steps_done
            )
            self.logger.add_scalar(
                "episode_return", ep_return, self.steps_done
            )
            self.logger.add_scalar(
                "episode_steps", ep_steps, self.steps_done
            )
            self.logger.add_scalar(
                "episode_goal_reached", int(goal), self.steps_done
            )

            if num_episodes % 10 == 0 and self.verbose:
                print(f"\nEpisode {num_episodes}:")
                print(f"\tsteps done = {self.steps_done} / "
                      f"{temp}")
                print(f"\treturn = {ep_return}")
                print(f"\tgoal = {goal}")

        self.logger.close()
        if self.verbose:
            print("Training complete")
            print(f"\nEpisode {num_episodes}:")
            print(f"\tsteps done = {self.steps_done} / {temp}")
            print(f"\treturn = {ep_return}")
            print(f"\tgoal = {goal}")
            return num_episodes

    def run_train_episode(self, step_limit):
        o = self.env.reset()
        done = False

        steps = 0
        episode_return = 0

        while not done and steps < step_limit:
            a = self.get_egreedy_action(o, self.get_epsilon())

            next_o, r, done, _ = self.env.step(a)
            self.replay.store(o, a, next_o, r, done)
            self.steps_done += 1
            loss, mean_v = self.optimize()
            self.logger.add_scalar("loss", loss, self.steps_done)
            self.logger.add_scalar("mean_v", mean_v, self.steps_done)

            o = next_o
            episode_return += r
            steps += 1

        return episode_return, steps, self.env.goal_reached()

    def run_eval_episode(self,
                         env=None,
                         render=False,
                         eval_epsilon=0.05,
                         render_mode="readable",
                         **kwargs):
        
        # ESD Team Implementation to allow for more data to be gathered
        get_action_list = kwargs.get('al', None)
        get_visited_hosts = kwargs.get('hv', None)
        
        if get_action_list:
            action_list = []
           
        if get_visited_hosts:
            visited_hosts = []
            
        if env is None:
            env = self.env
        else:
            self.env = env
        
        o = self.env.reset()
        done = False

        steps = 0
        episode_return = 0

        line_break = "="*60
        if render:
            print("\n" + line_break)
            print(f"Running EVALUATION using epsilon = {eval_epsilon:.4f}")
            print(line_break)
            env.render(render_mode)
            input("Initial state. Press enter to continue..")

        while not done:
            a = self.get_egreedy_action(o, eval_epsilon)
            next_o, r, done, _ = env.step(a)
            
            o = next_o
            episode_return += r
            steps += 1
            if get_action_list:
                action_list.append([env.action_space.get_action(a).name, r, episode_return])
            
            if get_visited_hosts:
                visited_hosts.append(env.action_space.get_action(a).target)
            if render:
                print("\n" + line_break)
                print(f"Step {steps}")
                print(line_break)
                print(f"Action Performed = {env.action_space.get_action(a)}")
                env.render(render_mode)
                print(f"Reward = {r}")
                print(f"Done = {done}")
                input("Press enter to continue..")

                if done:
                    print("\n" + line_break)
                    print("EPISODE FINISHED")
                    print(line_break)
                    print(f"Goal reached = {env.goal_reached()}")
                    print(f"Total steps = {steps}")
                    print(f"Total reward = {episode_return}")

        # This is the handle the case when we taken both the visited hosts and the actions of form [action_list, visited_hosts]
        res = []
        
        if get_action_list:
            res.append(action_list)
        
        if get_visited_hosts:
            res.append(visited_hosts)

        return res if res else None

# Functions for Analyzing Optimal Policy

### Functions to Isolate Various Metrics

In [None]:
"""
Isolates the action name from the list of actions discarding the targeted host and
the actions reward and probability
:param action_list: list containing the action name, probability and reward
:returns: list of each aaction taken
"""
def isolate_action_names(action_list: List[str]) -> List[str]:
    # The action name is the first item in the action list
    actions_taken = [action_list[i][0] for i in range(len(action_list))]
    return actions_taken

### Functions for Analysis of Most Visited Host

In [None]:
"""
Finds the most visited host in any given path taken by the agent
:param hosts_visited: list of all hosts visited on an agents path
:returns: the most visited host in form (subnet, host)
"""
def find_most_visited_host(hosts_visited: List[Tuple[int, int]]) -> Tuple[int, int]:
    host_visits = collections.defaultdict(int)
    
    for host in hosts_visited:
         host_visits[host] += 1
            
    most_visited_host = max(host_visits, key=host_visits.get)
    
    return most_visited_host

In [30]:
"""
Creates a bar graph plot of the number of times each host was visited
:param host_frequencies: A dictionary containing each host and the associated number of visits
"""
def create_host_frequencies_plot(host_frequencies: Dict[Tuple[int, int], int]) -> None:
   # Convert the tuples to strings to make plotting the data easier 
    host_frequencies = {str(k) : v for k, v in host_frequencies.items()}
    plt.bar(*zip(*host_frequencies.items()))
    plt.show()

In [31]:
"""
Creates a directed graph modelleing the path the agent took through the network
:param hosts_visited: list of all hosts visited on an agents path in the order they were visited
TODOS:
HIGHLIGHT THE MOST VISITED HOST A SEPERATE COLOR
LABEL THE THE ORDER OF THE EDGES (ie 1, 2, 3)
FIX TO DIPLAY ALL THE EDGES NOT A SELECT FEW
"""
def model_agent_path(hosts_visited: List[Tuple[int, int]]) -> None:
    
    # Get the edges from the order of the visited nodes
    edges = [(hosts_visited[i], hosts_visited[i + 1]) for i in range(len(hosts_visited) - 1)]
    
    # Create directed graph to represent the path the agent took
    G = nx.MultiDiGraph()
    G.add_edges_from(edges)
    nx.draw_networkx(G, with_labels=True, connectionstyle="arc3, rad = 0.1")
    plt.show()

### Functions for Analysis of Most Frequent Actions

In [32]:
"""
Finds the action most frequently taken by the agent while traversing the network
:param actions_taken: list of names of each action taken by the agent
:returns: the most frequently taken action
"""
def find_most_frequent_action(actions_taken: List[str]) -> str:
    action_cntr = collections.defaultdict(int)
    
    for action in actions_taken:
        action_cntr[action] += 1
    
    most_frequent_action = max(action_cntr, key=action_cntr.get)

    return most_frequent_action 

In [33]:
"""
Creates a bar graph plot of the frequency of each action
:param action_frequencies: dictionary containing each exploit and the associated number of times the action was executed
"""
def create_action_plot(action_frequencies: Dict[str, int]) -> None:
    plt.bar(*zip(*action_frequencies.items()))
    plt.xticks(rotation=30)
    plt.show()

### Function to Analyze the Most Frequent Action Enacted on a Specific Host

In [34]:
"""
Gets the frequencies of the type of action taken on a specific host
:param hosts_and_actions: list of all the hosts visitied and the associated action taken during each host visit
:param specified_host: the specified host for which we are getting the action frequencies
:returns: a list ordered in non-descending order of each action and the number of times it was taken on the specified host
"""
def host_action_frequencies(hosts_and_actions: List[Union[Tuple[int, int], str]], specified_host: List[Union[int, int]]) -> List[List[Union[str, int]]]:
    action_frequencies = collections.defaultdict(int)
        
    for host, action in hosts_and_actions:
        if host == specified_host:
             action_frequencies.append(action)
                
    action_frequency_list = action_frequency.items()
    action_frequency_list.sort(key = lambda x: x[1])
    
    return action_frequency_list

### Comparison Functions

In [35]:
"""
Compares the number of time each host was visited for two different agent paths and returns the difference as well as the number of times each host was visited
:param visited_hosts1: a list of hosts visited by an agent
:param visited_hosts2: a list of hosts visited by an agent that differs from visited_hosts1
:returns: a dictionary containing each host and the associated difference in visited as well as the number of visits for both host in form [diff, host1, host2]
"""
def common_visited_hosts(visited_hosts1: List[Tuple[int, int]], visited_hosts2: List[Tuple[int, int]]) -> Dict[Tuple[int, int], Tuple[int, int, int]]:
    common_hosts = collections.defaultdict(list)
    host1_cntr, host2_cntr = collections.Counter(visited_hosts1), collections.Counter(visited_hosts2)
    
    for key in host1_cntr:
        if key in host2_cntr:
            diff = abs(host1_cntr[key] - host2_cntr[key])
            common_hosts[key].append((diff, host1_cntr[key], host2_cntr[key]))
            
        else:
            common_hosts[key].append(host1_cntr, 0, host1_cntr)
            
    common_hosts.sort()
    
    return common_hosts

In [36]:
"""
Compares two paths to see the similarity of their moves
:param path: the path that we are comparing to a baseline path
:param path_baseline: the baseline path that we are comparing to
:returns: a counter with each action and the number of times it was taken in path with respect to path_baseline, a positive value means that the specific action was performed more in the path and
a negative value means that the action was performed more by in the baseline path 
"""
def compare_action_path(path: List[Union[Tuple[int, int], str]], path_baseline: List[Union[Tuple[int, int], str]]) -> Dict[List[Union[Tuple[int, int], str]], int]: 
    
    # Create a directed graph for each path, where the hosts are nodes that have edge weights of the action taken
    path_edges = [[(path[i][0], path[i + 1][0]), path[i][1]] for i in range(len(path))]
    baseline_edges = [[(path_baseline[i][0], path_baseline[i + 1][0]), path_baseline[i][1]] for i in range(len(path_baseline))]
    
    # Get a count of the number of times an edge was travered for both paths
    path_cntr = collections.Counter(path_edges)
    baseline_cntr = collections.Counter(baseline_edges)
    
    # Compare the number of times an action was performed for each path 
    for key in path_cntr:
        path_cntr[key] -= path_baseline[key]
    
    path_cntr.sort()
    
    return same_steps

In [37]:
"""
EXPERIMENTAL

Given two paths taken by agents through a given network, we will compare the similarity of the two paths, by first creating a weighted, directed graph 
"""
def experimental_action_path_comparison(path: List[Union[Tuple[int, int], str]], path_baseline: List[Union[Tuple[int, int], str]]) -> Dict[List[Union[Tuple[int, int], str]], int]:
    
   # Create a directed graph for each path, where the hosts are nodes that have edge weights of the action taken
    path_edges = [[(path[i][0], path[i + 1][0]), path[i][1]] for i in range(len(path))]
    baseline_edges = [[(path_baseline[i][0], path_baseline[i + 1][0]), path_baseline[i][1]] for i in range(len(path_baseline))] 
    return path_edges
    

### Functions for Formatting Various Metric Outputs

In [38]:
"""
Simple function to be easily able to convert the abbreviation of an action to its full name
:param action: the abbreviation of an action in the enviornment
:returns: the full name of the action
"""
def abbreviation_to_name(action: str) -> str:
    abbrev_to_name = {
        "e_ssh" : "SSH Exploit",
        "e_ftp" : "FTP Exploit",
        "e_http" : "HTTP Exploit",
        "pe_tomcat" : "Tomcat Privilege Escalation",
        "pe_daclsvc" : "Daclsvc Privilege Escalation",
        "service_scan" : "Service Scan",
        "os_scan" : "Operating System Scan",
        "process_scan" : "Process Scan",
        "subnet_scan" : "Subnet Scan"
    }
    
    return abbrev_to_name[action] if action in abbrev_to_name else "Error: Invalid action name!"

In [39]:
"""
Formats and prints output for most frequently taken action
:param most_frequent_action: action most frequently taken by the agent during the episode
"""
def print_most_frequent_action(most_frequent_action: str) -> None: 
    print(f"The most frequent action taken by the agent during the episode was {abbreviation_to_name(most_frequent_action)} ({most_frequent_action})")

In [40]:
"""
Formats and prints the output for the most frequently visited host
:param most_visited_host: most visited host in the network
"""
def print_most_visited_host(most_visited_host: Tuple[int, int]) -> None:
    print(f"The host most visited by the agent during the episode was host [{most_visited_host[1]}] in subnet [{most_visited_host[0]}]")

### Gromov-Hausdorff Convergence Functions For Graph Comparison

In [41]:
def action_weights(action):
    
    action_to_weight = {
        "e_ssh" : 1,
        "e_ftp" : 2,
        "e_http" : 3,
        "pe_tomcat" : 4,
        "pe_daclsvc" : 5,
        "service_scan" : 6,
        "os_scan" : 7,
        "process_scan" : 8,
        "subnet_scan" : 9
    }
    
    return action_to_weight[action]

In [42]:
def create_graph(path):
    """Create graph with the adjusted weights for use in gromov-hausdorff calculation"""
    
    graph = [[(path[i][0], path[i + 1][0]), action_weights[path[i][1]]] for i in range(len(path))]    
    
    return graph

In [43]:
#def gromov_hausdorff_distance(graph1, graph2):
    

# Functions that split training and testing

In [44]:
"""
Creates agent based on selected yaml environment
:param agent_name: name of desired agent type
:param yaml: name of the desired yaml environment
:returns: the untrained agent with the desired specifications
"""
def create_agent(agent_name, yaml):
    nasimenv = nasim.load(yaml)
    
    if (agent_name == 'ql'):
        agent = TabularQLearningAgent(nasimenv)
    elif (agent_name == 'dqn'):
        agent = DQNAgent(nasimenv)
    elif (agent_name == 'ql_replay'):
        agent = TabularQLearningReplayAgent(nasimenv)
    else:
        return "Enter valid QL agent to run"
    
    return agent

In [45]:
"""
Trains the agent based on user specifications
:param agent: agent to be trained
:param max_episodes: maximum number of training episodes for the agent
:param training_steps: number of steps the agent will train for
:returns: the number of episodes the agent trained for
"""
def train_agent(agent, max_episodes, training_steps):
    n_episodes = agent.train(max_episodes, training_steps)
    return n_episodes 

In [46]:
"""
TODO: Function to make testing based on various paramters easy
"""
def test_agent(agent, **kwargs):
    alt_env = kwargs.get('env', None)
    al = kwargs.get('al', None)
    if al and alt_env:
        reward = agent.run_eval_episode(env=alt_env, al=True)
    elif alt_env:
        reward = agent.run_eval_episode(env=alt_env)
    elif al:
        reward = agent.run_eval_episode(al=True)
    else:
        reward = agent.run_eval_episode()
    return reward


In [47]:
"""
Function to add a new sensitive host to a given network by editing the yaml file of the network
:param subnet: the subnet the host will be added to
:param position: the position of the host in the given subnet
:param value: value to be rewarded for accessing the sensative host
""" 
def add_new_sensitive_host(subnet: int, position: int, value: int) -> None:
        address = '(' + str(x) + ', ' + str(y) + ')'
        curr_data['sensitive_hosts'][address] = value

In [48]:
"""
Resets the environment for an agent
:param filepath: filepath for the network's yaml file
:param evaluation_mode:
"""
def reset_env(filepath: str) -> None:
    with open(filepath, "r") as stream:
        try:
            curr_data = (yaml.safe_load(stream))
        except yaml.YAMLError as exc:
            print(exc)
        
    with open(filepath, 'w') as f:
        data = yaml.dump(curr_data, f, sort_keys=False, default_flow_style=False)
        
    nasimenv = nasim.load(filepath)
    return nasimenv

In [49]:
test = create_agent("dqn", "tiny-small.yaml")
train_agent(test, 10000, 10000)

test_compare = create_agent("dqn", "tiny-small.yaml")
train_agent(test_compare, 10000, 10000)


Running DQN with config:
{'batch_size': 32,
 'env': <nasim.envs.environment.NASimEnv object at 0x0000018E54335F40>,
 'exploration_steps': 10000,
 'final_epsilon': 0.05,
 'gamma': 0.99,
 'hidden_sizes': [64, 64],
 'kwargs': {},
 'lr': 0.001,
 'replay_size': 10000,
 'seed': None,
 'self': <__main__.DQNAgent object at 0x0000018E54335070>,
 'target_update_freq': 1000,
 'training_steps': 20000,
 'verbose': True}

Using Neural Network running on device=cpu:
DQN(
  (layers): ModuleList(
    (0): Linear(in_features=120, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
  )
  (out): Linear(in_features=64, out_features=45, bias=True)
)

Starting training

Episode 10:
	steps done = 2444 / 10000
	return = 29.0
	goal = True

Episode 20:
	steps done = 5567 / 10000
	return = -652.0
	goal = True

Episode 30:
	steps done = 10000 / 10000
	return = 0
	goal = False

Episode 40:
	steps done = 10000 / 10000
	return = 0
	goal = False

Episode 50:
	steps done = 10000 / 1

10000

# Testing for the Optimal Policy Metrics Functions 

### Test for Most Frequent Action and Host Functions

In [50]:
# Get list of actions and hosts form the agent's evaluation episode the list is of form [actions, host_visited]
actions_and_hosts = test.run_eval_episode(al=True, hv=True)

# Isolate actions and visited hosts
actions_taken = isolate_action_names(actions_and_hosts[0])
visited_hosts = actions_and_hosts[1]
hosts_and_associated_actions = [[visited_hosts[i], actions_taken[i]] for i in range(len(actions_taken))]

# Find the most frequent actions and most visited host
most_frequent_action = find_most_frequent_action(actions_taken)
most_visited_host = find_most_visited_host(visited_hosts)

# Print out the most taken action and most visited host
print_most_frequent_action(most_frequent_action)
print_most_visited_host(most_visited_host)

The most frequent action taken by the agent during the episode was Subnet Scan (subnet_scan)
The host most visited by the agent during the episode was host [0] in subnet [4]


### Test for Frequency Graphs

#### Plot of Action Frequencies

In [51]:
create_action_plot(collections.Counter(actions_taken))

#### Plot of Host Visit Frequencies 

In [52]:
create_host_frequencies_plot(collections.Counter(visited_hosts))

### Model of Path Taken By the Agent

In [53]:
model_agent_path(visited_hosts)

### Testing Comparison Functions

In [54]:
# Get list of actions and hosts for comparison function tests
actions_and_hosts_comparison = test_compare.run_eval_episode(al=True, hv=True) 

#### Test for Visited Host Comparison Function

In [55]:
# Set up for visited hosts comparison
visited_hosts_for_comparison = actions_and_hosts_comparison[1]

In [56]:
print(common_visited_hosts(visited_hosts, visited_hosts_for_comparison))

AttributeError: 'collections.defaultdict' object has no attribute 'sort'