In [1]:
#Import yaml and various ways to load in the data from the yaml file

import yaml
from yaml.loader import SafeLoader, BaseLoader, FullLoader, UnsafeLoader

In [2]:
#Load in data from yaml file using a safe load. Makes sure to update curr_data when we switch to a new network.

def reset(filepath):
    
    with open(filepath, "r") as stream:
        try:
            data = (yaml.safe_load(stream))
        except yaml.YAMLError as exc:
            print(exc)
    
    return data

In [3]:
#imports for running agent
from itertools import product

import nasim
import pandas as pd
import csv

  from numpy.dual import register_func
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,


In [4]:
#run bruteforce agent
LINE_BREAK = "-"*60


def run_bruteforce_agent(env, cols, step_limit=1e6, verbose=True):
    """Run bruteforce agent on nasim environment.

    Parameters
    ----------
    env : nasim.NASimEnv
        the nasim environment to run agent on
    step_limit : int, optional
        the maximum number of steps to run agent for (default=1e6)
    verbose : bool, optional
        whether to print out progress messages or not (default=True)

    Returns
    -------
    int
        timesteps agent ran for
    float
        the total reward recieved by agent
    bool
        whether the goal was reached or not
    """
    if verbose:
        print(LINE_BREAK)
        print("STARTING EPISODE")
        print(LINE_BREAK)
        print("t: Reward")
        
    env.reset()
    total_reward = 0
    pred_reward = 0
    done = False
    steps = 0
    cycle_complete = False
    col_list = cols
    currData = pd.read_csv("bruteforce_agent.csv", usecols=['Reward', 'Steps', 'Network', 'Change'])

    if env.flat_actions:
        act = 0
    else:
        act_iter = product(*[range(n) for n in env.action_space.nvec])

    while not done and steps < step_limit:
        if env.flat_actions:
            act = (act + 1) % env.action_space.n
            cycle_complete = (steps > 0 and act == 0)
        else:
            try:
                act = next(act_iter)
                cycle_complete = False
            except StopIteration:
                act_iter = product(*[range(n) for n in env.action_space.nvec])
                act = next(act_iter)
                cycle_complete = True

        _, rew, done, _ = env.step(act)
        actiondf = pd.DataFrame({'States': [_],
                                 'Steps': [steps]})
        actiondf.to_csv('states.csv', mode = 'a', index = False, header = False)
        
        total_reward += rew
    
        if cycle_complete and verbose:
            print(f"{steps}: {total_reward}")
        
        stateData = pd.read_csv("states.csv", usecols=['States', 'Steps'])
        #if "'value': 100" in str(stateData["States"][stateData[stateData.columns[0]].count() - 1]):
            #pred_reward += 99.5
        #else:
            #pred_reward -= 1
            
        #print("Predicted reward at step " + str(steps) + ": " + str(pred_reward))      
        print("Total reward at step " + str(steps) + ": " + str(total_reward))
        steps += 1

    #totReward = 0
    #totIter = 0
    #for i in range(0, currData[currData.columns[0]].count()):
        #totReward += currData["Reward"][i]
        #totIter += 1
        
    #predReward = totReward / totIter
    
    if done and verbose:
        print(LINE_BREAK)
        print("EPISODE FINISHED")
        print(LINE_BREAK)
        print(f"Goal reached = {env.goal_reached()}")
        print(f"Total steps = {steps}")
        #print(f"Average reward = {round(predReward)}")
        print(f"Total reward = {total_reward}")
        print(f"Terminating state = {_}")
        stateData = pd.read_csv("states.csv", usecols=['States', 'Steps'])
        print(str(stateData["States"][0]))
    elif verbose:
        print(LINE_BREAK)
        print("STEP LIMIT REACHED")
        print(LINE_BREAK)

    if done:
        done = env.goal_reached()
        
    
    
    df = pd.DataFrame({'Reward': [total_reward],
                       'Steps': [steps],
                       'Network': [cols[2]],
                       'Change': [cols[3]]})
    df.to_csv('bruteforce_agent.csv', mode = 'a', index = False, header = False)

    return steps, total_reward, done

  and should_run_async(code)


In [5]:
#run random agent
LINE_BREAK = "-"*60


def run_random_agent(env, cols, step_limit=1e6, verbose=True):
    if verbose:
        print(LINE_BREAK)
        print("STARTING EPISODE")
        print(LINE_BREAK)
        print(f"t: Reward")

    env.reset()
    total_reward = 0
    done = False
    t = 0
    a = 0

    while not done and t < step_limit:
        a = env.action_space.sample()
        _, r, done, _ = env.step(a)
        total_reward += r
        if (t+1) % 100 == 0 and verbose:
            print(f"{t}: {total_reward}")
        t += 1

    if done and verbose:
        print(LINE_BREAK)
        print("EPISODE FINISHED")
        print(LINE_BREAK)
        print(f"Total steps = {t}")
        print(f"Total reward = {total_reward}")
    elif verbose:
        print(LINE_BREAK)
        print("STEP LIMIT REACHED")
        print(LINE_BREAK)

    if done:
        done = env.goal_reached()
        
    df = pd.DataFrame({'Reward': [total_reward],
                       'Steps': [steps],
                       'Network': [cols[2]],
                       'Change': [cols[3]]})
    df.to_csv('random_agent.csv', mode = 'a', index = False, header = False)

    return t, total_reward, done

In [6]:
def run_env(filepath, change, agent):
    env = nasim.load(filepath)
    yaml = filepath[filepath.rindex("/") + 1:]

    for i in range(0, 5):
        if __name__ == "__main__":
            if (agent == 'bruteforce'):
                run_bruteforce_agent(env, ['Reward', 'Steps', yaml, change])
            if (agent == 'random'):
                run_random_agent(env, ['Reward', 'Steps', yaml, change])

In [7]:
#Edited the add subnet function to also update topology
def add_new_subnet(numOfHosts):
    curr_data['subnets'].append(numOfHosts)
    for i in range(0, len(curr_data['topology'])):
        curr_data['topology'][i].append(0)
    newRow = []
    for i in range(0, len(curr_data['topology'][0])):
        newRow.append(0)
    curr_data['topology'].append(newRow)
    
def add_new_os(os):
    if (len(curr_data['os'])) >= 1:
        curr_data['os'].append(os)
    else:
        curr_data['os'] = [os]

def add_new_process(process):
    if (len(curr_data['processes'])) >= 1:
        curr_data['processes'].append(process)
    else:
        curr_data['proceeses'] = [process]
    
def add_new_service(service):
    if (len(curr_data['services'])) >= 1:
        curr_data['services'].append(service)
    else:
        curr_data['services'] = [service]
    
def add_new_exploit(service, os, prob, cost, access):
    curr_data['exploits']['e_' + service] = {'service': service, 'os': os, 'prob': prob, 'cost': cost, 'access': access}
    
def add_new_privilege(process, os, prob, cost, access):
    curr_data['privilege_escalation']['pe_' + process] = {'process': process, 'os': os, 'prob': prob, 'cost': cost, 'access': access}
    
def add_new_firewall(x, y, services):
    address = '(' + str(x) + ', ' + str(y) + ')'
    curr_data['firewall'][address] = services
    
def add_new_sensitive_host(x, y, value):
    address = '(' + str(x) + ', ' + str(y) + ')'
    curr_data['sensitive_hosts'][address] = value
    
#Edited the add host function to also update both subnets and topology
def add_new_host_config(x, y, os, services, processes):
    address = '(' + str(x) + ', ' + str(y) + ')'
    curr_data['host_configurations'][address] = {'os': os, 'services': services, 'processes': processes}
    
    if x > len(curr_data['subnets']) - 1:
        add_new_subnet(1)
    elif y > curr_data['subnets'][x]:
        curr_data['subnets'][x] = curr_data['subnets'][x] + 1
        
    
def add_new_scan_cost(keyword, cost):
    curr_data[keyword + '_scan_cost'] = cost

In [8]:
#Edited the add subnet function to also update topology
def remove_subnet(subnetAddress):
    del curr_data['subnets'][subnetAddress]
    lenTop = len(curr_data['topology'])
    curr_data['topology'].remove(curr_data['topology'][lenTop - 1])
    lenRow = len(curr_data['topology'][0])
    for i in range(0, lenTop - 1):
        curr_data['topology'][i].remove(curr_data['topology'][i][lenRow - 1])
#def remove_topology(topologyIndex):
    #del curr_data['topology'][topologyIndex]
    
def remove_os(os):
    if os in curr_data['os']:
        curr_data['os'].remove(os)
        
def remove_process(process):
    if process in curr_data['processes']:
        curr_data['processes'].remove(process)
        
def remove_service(service):
    if service in curr_data['services']:
        curr_data['services'].remove(service)
        
def remove_exploit(exploitName):
    del curr_data['exploits'][exploitName]
    
def remove_privilege(privilegeName):
    del curr_data['privilege_escalation'][privilegeName]
    
def remove_firewall(x, y):
    address = '(' + str(x) + ', ' + str(y) + ')'
    del curr_data['firewall'][address]
    
def remove_sensitive_host(x, y):
    address = '(' + str(x) + ', ' + str(y) + ')'
    del curr_data['sensitive_hosts'][address]
    
def remove_host_config(x, y):
    address = '(' + str(x) + ', ' + str(y) + ')'
    if curr_data['subnets'][x - 1] == 1:
        remove_subnet(x - 1)
    else:
        curr_data['subnets'][x - 1] -= 1
    
    del curr_data['host_configurations'][address]
    
def remove_scan_cost(keyword):
    del curr_data[keyword + '_scan_cost']

In [9]:
def set_step_limit(stepLimit):
    curr_data['step_limit'] = stepLimit
    
#Update a YAML file with modifications that have been added or removed. 
#In this case a new file(for testing) is created to contain the updated YAML data.

def writeToYAML(filepath):
    with open(filepath, 'w') as f:
        data = yaml.dump(curr_data, f, sort_keys=False, default_flow_style=False)
        
def edit_topology(x, y, value):
    curr_data['topology'][x][y] = value

In [10]:
#An array that stores the file paths to the benchmark enviornments that are later modified
filepaths = ['../scenarios/benchmark/tiny.yaml', '../scenarios/benchmark/medium-single-site.yaml', '../scenarios/benchmark/small-linear.yaml']

In [11]:
#A sample run of the bruteforce agent on different sized networks with modifications made to them
run_env(filepaths[0], 'original', 'bruteforce')
curr_data = reset(filepaths[0])

add_new_sensitive_host(1, 0, 100)
writeToYAML(filepaths[0])
run_env(filepaths[0], 'new sensitive host', 'bruteforce')

remove_sensitive_host(1, 0)
remove_sensitive_host(2, 0)
writeToYAML(filepaths[0])
run_env(filepaths[0], 'removed sensitive host', 'bruteforce')

curr_data = reset(filepaths[1])
run_env(filepaths[1], 'original', 'bruteforce')

add_new_sensitive_host(1, 14, 100)
writeToYAML(filepaths[1])
run_env(filepaths[1], 'new sensitive host', 'bruteforce')

remove_sensitive_host(1, 14)
remove_sensitive_host(1, 3)
writeToYAML(filepaths[1])
run_env(filepaths[1], 'removed sensitive host', 'bruteforce')

curr_data = reset(filepaths[2])
run_env(filepaths[2], 'original', 'bruteforce')

add_new_sensitive_host(5, 0, 100)
writeToYAML(filepaths[2])
run_env(filepaths[2], 'new sensitive host', 'bruteforce')

remove_sensitive_host(3, 0)
writeToYAML(filepaths[2])
run_env(filepaths[2], 'removed sensitive host', 'bruteforce')



------------------------------------------------------------
STARTING EPISODE
------------------------------------------------------------
t: Reward
Total reward at step 0: -1
Total reward at step 1: -2.0
Total reward at step 2: -3.0
Total reward at step 3: -4.0
Total reward at step 4: -5.0
Total reward at step 5: -6.0
Total reward at step 6: -7.0
Total reward at step 7: -8.0
Total reward at step 8: -9.0
Total reward at step 9: -10.0
Total reward at step 10: -11.0
Total reward at step 11: -12.0
Total reward at step 12: -13.0
Total reward at step 13: -14.0
Total reward at step 14: -15.0
Total reward at step 15: -16.0
Total reward at step 16: -17.0
17: -18.0
Total reward at step 17: -18.0
Total reward at step 18: -19.0
Total reward at step 19: -20.0
Total reward at step 20: -21.0
Total reward at step 21: -22.0
Total reward at step 22: -23.0
Total reward at step 23: -24.0
Total reward at step 24: -25.0
Total reward at step 25: -26.0
Total reward at step 26: -27.0
Total reward at step 27: 

Total reward at step 14: -15.0
Total reward at step 15: -16.0
Total reward at step 16: -17.0
17: -18.0
Total reward at step 17: -18.0
Total reward at step 18: -19.0
Total reward at step 19: -20.0
Total reward at step 20: -21.0
Total reward at step 21: -22.0
Total reward at step 22: -23.0
Total reward at step 23: -24.0
Total reward at step 24: -25.0
Total reward at step 25: -26.0
Total reward at step 26: -27.0
Total reward at step 27: -28.0
Total reward at step 28: -29.0
Total reward at step 29: -30.0
Total reward at step 30: -31.0
Total reward at step 31: -32.0
Total reward at step 32: -33.0
Total reward at step 33: -34.0
Total reward at step 34: 65.0
35: 64.0
Total reward at step 35: 64.0
Total reward at step 36: 63.0
Total reward at step 37: 62.0
Total reward at step 38: 61.0
Total reward at step 39: 60.0
Total reward at step 40: 59.0
Total reward at step 41: 58.0
Total reward at step 42: 57.0
Total reward at step 43: 56.0
Total reward at step 44: 55.0
Total reward at step 45: 54.0
T

Total reward at step 36: 163.0
Total reward at step 37: 162.0
Total reward at step 38: 161.0
Total reward at step 39: 160.0
Total reward at step 40: 159.0
Total reward at step 41: 158.0
Total reward at step 42: 157.0
Total reward at step 43: 156.0
Total reward at step 44: 155.0
Total reward at step 45: 154.0
Total reward at step 46: 253.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 47
Total reward = 253.0
Terminating state = {'success': True, 'value': 100.0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {'tomcat': 1.0}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
------------------------------------

Total reward at step 6: -7.0
Total reward at step 7: -8.0
Total reward at step 8: -9.0
Total reward at step 9: -10.0
Total reward at step 10: -11.0
Total reward at step 11: -12.0
Total reward at step 12: -13.0
Total reward at step 13: -14.0
Total reward at step 14: -15.0
Total reward at step 15: -16.0
Total reward at step 16: -17.0
17: -18.0
Total reward at step 17: -18.0
Total reward at step 18: -19.0
Total reward at step 19: -20.0
Total reward at step 20: -21.0
Total reward at step 21: -22.0
Total reward at step 22: -23.0
Total reward at step 23: -24.0
Total reward at step 24: -25.0
Total reward at step 25: -26.0
Total reward at step 26: -27.0
Total reward at step 27: -28.0
Total reward at step 28: -29.0
Total reward at step 29: -30.0
Total reward at step 30: -31.0
Total reward at step 31: -32.0
Total reward at step 32: -33.0
Total reward at step 33: -34.0
Total reward at step 34: -35.0
35: -36.0
Total reward at step 35: -36.0
Total reward at step 36: -37.0
Total reward at step 37: -

Total reward at step 4: -5.0
Total reward at step 5: -6.0
Total reward at step 6: -7.0
Total reward at step 7: -8.0
Total reward at step 8: -9.0
Total reward at step 9: -10.0
Total reward at step 10: -11.0
Total reward at step 11: -12.0
Total reward at step 12: -13.0
Total reward at step 13: -14.0
Total reward at step 14: -15.0
Total reward at step 15: -16.0
Total reward at step 16: -17.0
17: -18.0
Total reward at step 17: -18.0
Total reward at step 18: -19.0
Total reward at step 19: -20.0
Total reward at step 20: -21.0
Total reward at step 21: -22.0
Total reward at step 22: -23.0
Total reward at step 23: -24.0
Total reward at step 24: -25.0
Total reward at step 25: -26.0
Total reward at step 26: -27.0
Total reward at step 27: -28.0
Total reward at step 28: -29.0
Total reward at step 29: -30.0
Total reward at step 30: -31.0
Total reward at step 31: -32.0
Total reward at step 32: -33.0
Total reward at step 33: -34.0
Total reward at step 34: -35.0
35: -36.0
Total reward at step 35: -36.0

Total reward at step 100: -51.0
Total reward at step 101: -53.0
Total reward at step 102: -55.0
Total reward at step 103: -58.0
Total reward at step 104: 41.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 105
Total reward = 41.0
Terminating state = {'success': True, 'value': 100.0, 'services': {}, 'os': {'linux': 1.0, 'windows': 0.0}, 'processes': {'tomcat': 1.0, 'daclsvc': 0.0, 'schtask': 0.0}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
------------------------------------------------------------
STARTING EPISODE
------------------------------------------------------------
t: Reward
Total reward at step 

Total reward at step 103: -158.0
Total reward at step 104: -59.0
Total reward at step 105: -60.0
Total reward at step 106: -61.0
Total reward at step 107: -62.0
Total reward at step 108: -63.0
Total reward at step 109: -64.0
Total reward at step 110: -65.0
Total reward at step 111: -68.0
Total reward at step 112: -69.0
Total reward at step 113: -71.0
Total reward at step 114: -73.0
Total reward at step 115: -76.0
Total reward at step 116: -77.0
Total reward at step 117: -78.0
Total reward at step 118: -79.0
Total reward at step 119: -80.0
Total reward at step 120: -81.0
Total reward at step 121: -82.0
Total reward at step 122: -83.0
Total reward at step 123: -86.0
Total reward at step 124: -87.0
Total reward at step 125: -89.0
Total reward at step 126: -91.0
Total reward at step 127: -94.0
Total reward at step 128: -95.0
Total reward at step 129: -96.0
Total reward at step 130: -97.0
Total reward at step 131: -98.0
Total reward at step 132: -99.0
Total reward at step 133: -100.0
Total 

Total reward at step 94: -143.0
Total reward at step 95: -144.0
Total reward at step 96: -145.0
Total reward at step 97: -146.0
Total reward at step 98: -147.0
Total reward at step 99: -150.0
Total reward at step 100: -151.0
Total reward at step 101: -153.0
Total reward at step 102: -155.0
Total reward at step 103: -158.0
Total reward at step 104: -59.0
Total reward at step 105: -60.0
Total reward at step 106: -61.0
Total reward at step 107: -62.0
Total reward at step 108: -63.0
Total reward at step 109: -64.0
Total reward at step 110: -65.0
Total reward at step 111: -68.0
Total reward at step 112: -69.0
Total reward at step 113: -71.0
Total reward at step 114: -73.0
Total reward at step 115: -76.0
Total reward at step 116: -77.0
Total reward at step 117: -78.0
Total reward at step 118: -79.0
Total reward at step 119: -80.0
Total reward at step 120: -81.0
Total reward at step 121: -82.0
Total reward at step 122: -83.0
Total reward at step 123: -86.0
Total reward at step 124: -87.0
Tota

Total reward at step 344: -419.0
Total reward at step 345: -420.0
Total reward at step 346: -421.0
Total reward at step 347: -422.0
Total reward at step 348: -423.0
Total reward at step 349: -424.0
Total reward at step 350: -425.0
Total reward at step 351: -428.0
Total reward at step 352: -429.0
Total reward at step 353: -431.0
Total reward at step 354: -433.0
Total reward at step 355: -436.0
Total reward at step 356: -437.0
Total reward at step 357: -438.0
Total reward at step 358: -439.0
Total reward at step 359: -440.0
Total reward at step 360: -441.0
Total reward at step 361: -442.0
Total reward at step 362: -443.0
Total reward at step 363: -446.0
Total reward at step 364: -447.0
Total reward at step 365: -449.0
Total reward at step 366: -451.0
Total reward at step 367: -454.0
Total reward at step 368: -455.0
Total reward at step 369: -456.0
Total reward at step 370: -457.0
Total reward at step 371: -458.0
Total reward at step 372: -459.0
Total reward at step 373: -460.0
Total rewa

Total reward at step 146: -119.0
Total reward at step 147: -122.0
Total reward at step 148: -123.0
Total reward at step 149: -125.0
Total reward at step 150: -127.0
Total reward at step 151: -130.0
Total reward at step 152: -131.0
Total reward at step 153: -132.0
Total reward at step 154: -133.0
Total reward at step 155: -134.0
Total reward at step 156: -135.0
Total reward at step 157: -136.0
Total reward at step 158: -137.0
Total reward at step 159: -140.0
Total reward at step 160: -141.0
Total reward at step 161: -143.0
Total reward at step 162: -145.0
Total reward at step 163: -148.0
Total reward at step 164: -149.0
Total reward at step 165: -150.0
Total reward at step 166: -151.0
Total reward at step 167: -152.0
Total reward at step 168: -153.0
Total reward at step 169: -154.0
Total reward at step 170: -155.0
Total reward at step 171: -158.0
Total reward at step 172: -159.0
Total reward at step 173: -161.0
Total reward at step 174: -163.0
Total reward at step 175: -166.0
Total rewa

Total reward at step 394: -493.0
Total reward at step 395: -494.0
Total reward at step 396: -495.0
Total reward at step 397: -496.0
Total reward at step 398: -497.0
Total reward at step 399: -500.0
Total reward at step 400: -501.0
Total reward at step 401: -503.0
Total reward at step 402: -505.0
Total reward at step 403: -508.0
Total reward at step 404: -509.0
Total reward at step 405: -510.0
Total reward at step 406: -511.0
Total reward at step 407: -512.0
Total reward at step 408: -513.0
Total reward at step 409: -514.0
Total reward at step 410: -515.0
Total reward at step 411: -518.0
Total reward at step 412: -519.0
Total reward at step 413: -521.0
Total reward at step 414: -523.0
Total reward at step 415: -526.0
Total reward at step 416: -527.0
Total reward at step 417: -528.0
Total reward at step 418: -529.0
Total reward at step 419: -530.0
Total reward at step 420: -531.0
Total reward at step 421: -532.0
Total reward at step 422: -533.0
Total reward at step 423: -536.0
Total rewa

Total reward at step 4: -7.0
Total reward at step 5: -9.0
Total reward at step 6: -11.0
Total reward at step 7: -14.0
Total reward at step 8: -15.0
Total reward at step 9: -16.0
Total reward at step 10: -17.0
Total reward at step 11: -18.0
Total reward at step 12: -19.0
Total reward at step 13: -20.0
Total reward at step 14: -21.0
Total reward at step 15: -24.0
Total reward at step 16: -25.0
Total reward at step 17: -27.0
Total reward at step 18: -29.0
Total reward at step 19: -32.0
Total reward at step 20: -33.0
Total reward at step 21: -34.0
Total reward at step 22: -35.0
Total reward at step 23: -36.0
Total reward at step 24: -37.0
Total reward at step 25: -38.0
Total reward at step 26: -39.0
Total reward at step 27: -42.0
Total reward at step 28: -43.0
Total reward at step 29: -45.0
Total reward at step 30: -47.0
Total reward at step 31: -50.0
Total reward at step 32: -51.0
Total reward at step 33: -52.0
Total reward at step 34: -53.0
Total reward at step 35: -54.0
Total reward at 

Total reward at step 71: -108.0
Total reward at step 72: -109.0
Total reward at step 73: -110.0
Total reward at step 74: -111.0
Total reward at step 75: -114.0
Total reward at step 76: -115.0
Total reward at step 77: -117.0
Total reward at step 78: -119.0
Total reward at step 79: -122.0
Total reward at step 80: -123.0
Total reward at step 81: -124.0
Total reward at step 82: -125.0
Total reward at step 83: -126.0
Total reward at step 84: -127.0
Total reward at step 85: -128.0
Total reward at step 86: -129.0
Total reward at step 87: -132.0
Total reward at step 88: -133.0
Total reward at step 89: -135.0
Total reward at step 90: -137.0
Total reward at step 91: -140.0
Total reward at step 92: -141.0
Total reward at step 93: -142.0
Total reward at step 94: -143.0
Total reward at step 95: -144.0
Total reward at step 96: -145.0
Total reward at step 97: -146.0
Total reward at step 98: -147.0
Total reward at step 99: -150.0
Total reward at step 100: -151.0
Total reward at step 101: -153.0
Total 

Total reward at step 323: -286.0
Total reward at step 324: -287.0
Total reward at step 325: -288.0
Total reward at step 326: -289.0
Total reward at step 327: -292.0
Total reward at step 328: -293.0
Total reward at step 329: -295.0
Total reward at step 330: -297.0
Total reward at step 331: -300.0
Total reward at step 332: -301.0
Total reward at step 333: -302.0
Total reward at step 334: -303.0
Total reward at step 335: -304.0
Total reward at step 336: -305.0
Total reward at step 337: -306.0
Total reward at step 338: -307.0
Total reward at step 339: -310.0
Total reward at step 340: -311.0
Total reward at step 341: -313.0
Total reward at step 342: -315.0
Total reward at step 343: -318.0
Total reward at step 344: -319.0
Total reward at step 345: -320.0
Total reward at step 346: -321.0
Total reward at step 347: -322.0
Total reward at step 348: -323.0
Total reward at step 349: -324.0
Total reward at step 350: -325.0
Total reward at step 351: -328.0
Total reward at step 352: -329.0
Total rewa

{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
------------------------------------------------------------
STARTING EPISODE
------------------------------------------------------------
t: Reward
Total reward at step 0: -1
Total reward at step 1: -2.0
Total reward at step 2: -3.0
Total reward at step 3: -6.0
Total reward at step 4: -7.0
Total reward at step 5: -9.0
Total reward at step 6: -11.0
Total reward at step 7: -14.0
Total reward at step 8: -15.0
Total reward at step 9: -16.0
Total reward at step 10: -17.0
Total reward at step 11: -18.0
Total reward at step 12: -19.0
Total reward at step 13: -20.0
Total reward at step 14: -21.0
Total reward at step 15: -24.0
Total reward at step 16: -25.0
Total reward at step 17: -27.0
Total reward at step 18: -29.0
Total reward at step 19: -32.0
Total reward at step 20: -33.0
Total reward at step 21

Total reward at step 245: -169.0
Total reward at step 246: -171.0
Total reward at step 247: -174.0
Total reward at step 248: -175.0
Total reward at step 249: -176.0
Total reward at step 250: -177.0
Total reward at step 251: -178.0
Total reward at step 252: -179.0
Total reward at step 253: -180.0
Total reward at step 254: -181.0
Total reward at step 255: -184.0
Total reward at step 256: -185.0
Total reward at step 257: -187.0
Total reward at step 258: -189.0
Total reward at step 259: -192.0
Total reward at step 260: -193.0
Total reward at step 261: -194.0
Total reward at step 262: -195.0
Total reward at step 263: -196.0
Total reward at step 264: -197.0
Total reward at step 265: -198.0
Total reward at step 266: -199.0
Total reward at step 267: -202.0
Total reward at step 268: -203.0
Total reward at step 269: -205.0
Total reward at step 270: -207.0
Total reward at step 271: -210.0
Total reward at step 272: -211.0
Total reward at step 273: -212.0
Total reward at step 274: -213.0
Total rewa

Total reward at step 41: -63.0
Total reward at step 42: -65.0
Total reward at step 43: -68.0
Total reward at step 44: -69.0
Total reward at step 45: -70.0
Total reward at step 46: -71.0
Total reward at step 47: -72.0
Total reward at step 48: -73.0
Total reward at step 49: -74.0
Total reward at step 50: -75.0
Total reward at step 51: -78.0
Total reward at step 52: -79.0
Total reward at step 53: -81.0
Total reward at step 54: -83.0
Total reward at step 55: -86.0
Total reward at step 56: -87.0
Total reward at step 57: -88.0
Total reward at step 58: -89.0
Total reward at step 59: -90.0
Total reward at step 60: -91.0
Total reward at step 61: -92.0
Total reward at step 62: -93.0
Total reward at step 63: -96.0
Total reward at step 64: -97.0
Total reward at step 65: -99.0
Total reward at step 66: -101.0
Total reward at step 67: -104.0
Total reward at step 68: -105.0
Total reward at step 69: -106.0
Total reward at step 70: -107.0
Total reward at step 71: -108.0
Total reward at step 72: -109.0
T

Total reward at step 35: -54.0
Total reward at step 36: -55.0
Total reward at step 37: -56.0
Total reward at step 38: -57.0
Total reward at step 39: -60.0
Total reward at step 40: -61.0
Total reward at step 41: -63.0
Total reward at step 42: -65.0
Total reward at step 43: -68.0
Total reward at step 44: -69.0
Total reward at step 45: -70.0
Total reward at step 46: 29.0
Total reward at step 47: 28.0
Total reward at step 48: 27.0
Total reward at step 49: 26.0
Total reward at step 50: 25.0
Total reward at step 51: 22.0
Total reward at step 52: 21.0
Total reward at step 53: 19.0
Total reward at step 54: 17.0
Total reward at step 55: 14.0
Total reward at step 56: 13.0
Total reward at step 57: 12.0
Total reward at step 58: 11.0
Total reward at step 59: 10.0
Total reward at step 60: 9.0
Total reward at step 61: 8.0
Total reward at step 62: 7.0
Total reward at step 63: 4.0
Total reward at step 64: 3.0
Total reward at step 65: 1.0
Total reward at step 66: -1.0
Total reward at step 67: -4.0
Total

Total reward at step 293: -341.0
Total reward at step 294: -343.0
Total reward at step 295: -346.0
Total reward at step 296: -247.0
Total reward at step 297: -248.0
Total reward at step 298: -249.0
Total reward at step 299: -250.0
Total reward at step 300: -251.0
Total reward at step 301: -252.0
Total reward at step 302: -253.0
Total reward at step 303: -256.0
Total reward at step 304: -257.0
Total reward at step 305: -259.0
Total reward at step 306: -261.0
Total reward at step 307: -264.0
Total reward at step 308: -265.0
Total reward at step 309: -266.0
Total reward at step 310: -267.0
Total reward at step 311: -268.0
Total reward at step 312: -269.0
Total reward at step 313: -270.0
Total reward at step 314: -271.0
Total reward at step 315: -274.0
Total reward at step 316: -275.0
Total reward at step 317: -277.0
Total reward at step 318: -279.0
Total reward at step 319: -282.0
Total reward at step 320: -283.0
Total reward at step 321: -284.0
Total reward at step 322: -285.0
Total rewa

Total reward at step 545: -619.0
Total reward at step 546: -621.0
Total reward at step 547: -624.0
Total reward at step 548: -625.0
Total reward at step 549: -626.0
Total reward at step 550: -627.0
Total reward at step 551: -628.0
Total reward at step 552: -629.0
Total reward at step 553: -630.0
Total reward at step 554: -631.0
Total reward at step 555: -634.0
Total reward at step 556: -535.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 557
Total reward = -535.0
Terminating state = {'success': True, 'value': 100.0, 'services': {'ssh': 0.0, 'ftp': 1.0, 'http': 0.0, 'samba': 0.0, 'smtp': 0.0}, 'os': {'linux': 0.0, 'windows': 1.0}, 'processes': {}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovere

Total reward at step 92: -141.0
Total reward at step 93: -142.0
Total reward at step 94: -143.0
Total reward at step 95: -144.0
Total reward at step 96: -145.0
Total reward at step 97: -146.0
Total reward at step 98: -147.0
Total reward at step 99: -150.0
Total reward at step 100: -151.0
Total reward at step 101: -153.0
Total reward at step 102: -155.0
Total reward at step 103: -158.0
Total reward at step 104: -59.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 105
Total reward = -59.0
Terminating state = {'success': True, 'value': 100.0, 'services': {}, 'os': {'linux': 1.0, 'windows': 0.0}, 'processes': {'tomcat': 1.0, 'daclsvc': 0.0, 'schtask': 0.0}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'dis

Total reward at step 93: -142.0
Total reward at step 94: -143.0
Total reward at step 95: -144.0
Total reward at step 96: -145.0
Total reward at step 97: -146.0
Total reward at step 98: -147.0
Total reward at step 99: -150.0
Total reward at step 100: -151.0
Total reward at step 101: -153.0
Total reward at step 102: -155.0
Total reward at step 103: -158.0
Total reward at step 104: -59.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 105
Total reward = -59.0
Terminating state = {'success': True, 'value': 100.0, 'services': {}, 'os': {'linux': 1.0, 'windows': 0.0}, 'processes': {'tomcat': 1.0, 'daclsvc': 0.0, 'schtask': 0.0}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error'

Total reward at step 96: -130.0
Total reward at step 97: -131.0
Total reward at step 98: -132.0
Total reward at step 99: -133.0
Total reward at step 100: -134.0
Total reward at step 101: -135.0
Total reward at step 102: -138.0
Total reward at step 103: -139.0
Total reward at step 104: -141.0
Total reward at step 105: -142.0
Total reward at step 106: -143.0
Total reward at step 107: -144.0
Total reward at step 108: -145.0
Total reward at step 109: -146.0
Total reward at step 110: -147.0
Total reward at step 111: -150.0
Total reward at step 112: -151.0
Total reward at step 113: -153.0
Total reward at step 114: -154.0
Total reward at step 115: -155.0
Total reward at step 116: -156.0
Total reward at step 117: -157.0
Total reward at step 118: -158.0
Total reward at step 119: -159.0
Total reward at step 120: -162.0
Total reward at step 121: -163.0
Total reward at step 122: -165.0
Total reward at step 123: -166.0
Total reward at step 124: -167.0
Total reward at step 125: -168.0
Total reward a

{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
------------------------------------------------------------
STARTING EPISODE
------------------------------------------------------------
t: Reward
Total reward at step 0: -1
Total reward at step 1: -2.0
Total reward at step 2: -3.0
Total reward at step 3: -6.0
Total reward at step 4: -7.0
Total reward at step 5: -9.0
Total reward at step 6: -10.0
Total reward at step 7: -11.0
Total reward at step 8: -12.0
Total reward at step 9: -13.0
Total reward at step 10: -14.0
Total reward at step 11: -15.0
Total reward at step 12: -18.0
Total reward at step 13: -19.0
Total reward at step 14: -21.0
Total reward at step 15: -22.0
Total reward at step 16: -23.0
Total reward at step 17: -24.0
Total reward at step 18: -25.0
Total reward at step 19: -26.0
Total reward at step 20: -27.0
Total reward at step 21

Total reward at step 247: -331.0
Total reward at step 248: -333.0
Total reward at step 249: -334.0
Total reward at step 250: -335.0
Total reward at step 251: -336.0
Total reward at step 252: -337.0
Total reward at step 253: -338.0
Total reward at step 254: -339.0
Total reward at step 255: -342.0
Total reward at step 256: -343.0
Total reward at step 257: -345.0
Total reward at step 258: -346.0
Total reward at step 259: -347.0
Total reward at step 260: -348.0
Total reward at step 261: -349.0
Total reward at step 262: -350.0
Total reward at step 263: -351.0
Total reward at step 264: -354.0
Total reward at step 265: -355.0
Total reward at step 266: -357.0
Total reward at step 267: -358.0
Total reward at step 268: -359.0
Total reward at step 269: -360.0
Total reward at step 270: -361.0
Total reward at step 271: -362.0
Total reward at step 272: -363.0
Total reward at step 273: -366.0
Total reward at step 274: -367.0
Total reward at step 275: -369.0
Total reward at step 276: -370.0
Total rewa

Total reward at step 148: -199.0
Total reward at step 149: -201.0
Total reward at step 150: -202.0
Total reward at step 151: -203.0
Total reward at step 152: -204.0
Total reward at step 153: -205.0
Total reward at step 154: -206.0
Total reward at step 155: -207.0
Total reward at step 156: -210.0
Total reward at step 157: -211.0
Total reward at step 158: -213.0
Total reward at step 159: -214.0
Total reward at step 160: -215.0
Total reward at step 161: -216.0
Total reward at step 162: -217.0
Total reward at step 163: -218.0
Total reward at step 164: -219.0
Total reward at step 165: -222.0
Total reward at step 166: -223.0
Total reward at step 167: -225.0
Total reward at step 168: -226.0
Total reward at step 169: -227.0
Total reward at step 170: -228.0
Total reward at step 171: -229.0
Total reward at step 172: -230.0
Total reward at step 173: -231.0
Total reward at step 174: -234.0
Total reward at step 175: -235.0
Total reward at step 176: -237.0
Total reward at step 177: -238.0
Total rewa

Total reward at step 398: -431.0
Total reward at step 399: -434.0
Total reward at step 400: -435.0
Total reward at step 401: -437.0
Total reward at step 402: -438.0
Total reward at step 403: -339.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 404
Total reward = -339.0
Terminating state = {'success': True, 'value': 100.0, 'services': {}, 'os': {'linux': 0.0, 'windows': 1.0}, 'processes': {'tomcat': 0.0, 'daclsvc': 1.0}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
------------------------------------------------------------
STARTING EPISODE
------------------------------------------------------------
t: Rew

Total reward at step 223: -299.0
Total reward at step 224: -300.0
Total reward at step 225: -301.0
Total reward at step 226: -302.0
Total reward at step 227: -303.0
Total reward at step 228: -306.0
Total reward at step 229: -307.0
Total reward at step 230: -309.0
Total reward at step 231: -310.0
Total reward at step 232: -311.0
Total reward at step 233: -312.0
Total reward at step 234: -313.0
Total reward at step 235: -314.0
Total reward at step 236: -315.0
Total reward at step 237: -318.0
Total reward at step 238: -319.0
Total reward at step 239: -321.0
Total reward at step 240: -322.0
Total reward at step 241: -323.0
Total reward at step 242: -324.0
Total reward at step 243: -325.0
Total reward at step 244: -326.0
Total reward at step 245: -327.0
Total reward at step 246: -330.0
Total reward at step 247: -331.0
Total reward at step 248: -333.0
Total reward at step 249: -334.0
Total reward at step 250: -335.0
Total reward at step 251: -336.0
Total reward at step 252: -337.0
Total rewa

Total reward at step 69: -94.0
Total reward at step 70: -95.0
71: -96.0
Total reward at step 71: -96.0
Total reward at step 72: -97.0
Total reward at step 73: -98.0
Total reward at step 74: -99.0
Total reward at step 75: -102.0
Total reward at step 76: -103.0
Total reward at step 77: -105.0
Total reward at step 78: -106.0
Total reward at step 79: -107.0
Total reward at step 80: -108.0
Total reward at step 81: -109.0
Total reward at step 82: -110.0
Total reward at step 83: -111.0
Total reward at step 84: -114.0
Total reward at step 85: -115.0
Total reward at step 86: -117.0
Total reward at step 87: -118.0
Total reward at step 88: -119.0
Total reward at step 89: -120.0
Total reward at step 90: -121.0
Total reward at step 91: -122.0
Total reward at step 92: -123.0
Total reward at step 93: -126.0
Total reward at step 94: -127.0
Total reward at step 95: -129.0
Total reward at step 96: -130.0
Total reward at step 97: -131.0
Total reward at step 98: -132.0
Total reward at step 99: -133.0
Tota

Total reward at step 319: -327.0
Total reward at step 320: -329.0
Total reward at step 321: -330.0
Total reward at step 322: -331.0
Total reward at step 323: -332.0
Total reward at step 324: -333.0
Total reward at step 325: -334.0
Total reward at step 326: -335.0
Total reward at step 327: -338.0
Total reward at step 328: -339.0
Total reward at step 329: -341.0
Total reward at step 330: -342.0
Total reward at step 331: -243.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = True
Total steps = 332
Total reward = -243.0
Terminating state = {'success': True, 'value': 100.0, 'services': {}, 'os': {'linux': 0.0, 'windows': 1.0}, 'processes': {'tomcat': 0.0, 'daclsvc': 1.0}, 'access': 2, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovere

Total reward at step 216: -289.0
Total reward at step 217: -290.0
Total reward at step 218: -291.0
Total reward at step 219: -294.0
Total reward at step 220: -295.0
Total reward at step 221: -297.0
Total reward at step 222: -298.0
Total reward at step 223: -299.0
Total reward at step 224: -300.0
Total reward at step 225: -301.0
Total reward at step 226: -302.0
Total reward at step 227: -303.0
Total reward at step 228: -306.0
Total reward at step 229: -307.0
Total reward at step 230: -309.0
Total reward at step 231: -310.0
Total reward at step 232: -311.0
Total reward at step 233: -312.0
Total reward at step 234: -313.0
Total reward at step 235: -314.0
Total reward at step 236: -315.0
Total reward at step 237: -318.0
Total reward at step 238: -319.0
Total reward at step 239: -321.0
Total reward at step 240: -322.0
Total reward at step 241: -323.0
Total reward at step 242: -324.0
Total reward at step 243: -325.0
Total reward at step 244: -326.0
Total reward at step 245: -327.0
Total rewa

Total reward at step 466: -523.0
Total reward at step 467: -524.0
Total reward at step 468: -525.0
Total reward at step 469: -526.0
Total reward at step 470: -527.0
Total reward at step 471: -530.0
Total reward at step 472: -531.0
Total reward at step 473: -533.0
Total reward at step 474: -534.0
Total reward at step 475: -535.0
Total reward at step 476: -536.0
Total reward at step 477: -537.0
Total reward at step 478: -538.0
Total reward at step 479: -539.0
Total reward at step 480: -542.0
Total reward at step 481: -543.0
Total reward at step 482: -545.0
Total reward at step 483: -546.0
Total reward at step 484: -547.0
Total reward at step 485: -548.0
Total reward at step 486: -549.0
Total reward at step 487: -550.0
Total reward at step 488: -551.0
Total reward at step 489: -554.0
Total reward at step 490: -555.0
Total reward at step 491: -557.0
Total reward at step 492: -558.0
Total reward at step 493: -559.0
Total reward at step 494: -560.0
Total reward at step 495: -561.0
Total rewa

Total reward at step 716: -757.0
Total reward at step 717: -758.0
Total reward at step 718: -759.0
719: -760.0
Total reward at step 719: -760.0
Total reward at step 720: -761.0
Total reward at step 721: -762.0
Total reward at step 722: -763.0
Total reward at step 723: -766.0
Total reward at step 724: -767.0
Total reward at step 725: -769.0
Total reward at step 726: -770.0
Total reward at step 727: -771.0
Total reward at step 728: -772.0
Total reward at step 729: -773.0
Total reward at step 730: -774.0
Total reward at step 731: -775.0
Total reward at step 732: -778.0
Total reward at step 733: -779.0
Total reward at step 734: -781.0
Total reward at step 735: -782.0
Total reward at step 736: -783.0
Total reward at step 737: -784.0
Total reward at step 738: -785.0
Total reward at step 739: -786.0
Total reward at step 740: -787.0
Total reward at step 741: -790.0
Total reward at step 742: -791.0
Total reward at step 743: -793.0
Total reward at step 744: -794.0
Total reward at step 745: -795.

Total reward at step 961: -1083.0
Total reward at step 962: -1084.0
Total reward at step 963: -1085.0
Total reward at step 964: -1086.0
Total reward at step 965: -1087.0
Total reward at step 966: -1090.0
Total reward at step 967: -1091.0
Total reward at step 968: -1093.0
Total reward at step 969: -1094.0
Total reward at step 970: -1095.0
Total reward at step 971: -1096.0
Total reward at step 972: -1097.0
Total reward at step 973: -1098.0
Total reward at step 974: -1099.0
Total reward at step 975: -1102.0
Total reward at step 976: -1103.0
Total reward at step 977: -1105.0
Total reward at step 978: -1106.0
Total reward at step 979: -1107.0
Total reward at step 980: -1108.0
Total reward at step 981: -1109.0
Total reward at step 982: -1110.0
Total reward at step 983: -1111.0
Total reward at step 984: -1114.0
Total reward at step 985: -1115.0
Total reward at step 986: -1117.0
Total reward at step 987: -1118.0
Total reward at step 988: -1119.0
Total reward at step 989: -1120.0
Total reward a

Total reward at step 190: -254.0
Total reward at step 191: -255.0
Total reward at step 192: -258.0
Total reward at step 193: -259.0
Total reward at step 194: -261.0
Total reward at step 195: -262.0
Total reward at step 196: -263.0
Total reward at step 197: -264.0
Total reward at step 198: -265.0
Total reward at step 199: -266.0
Total reward at step 200: -267.0
Total reward at step 201: -270.0
Total reward at step 202: -271.0
Total reward at step 203: -273.0
Total reward at step 204: -274.0
Total reward at step 205: -275.0
Total reward at step 206: -276.0
Total reward at step 207: -277.0
Total reward at step 208: -278.0
Total reward at step 209: -279.0
Total reward at step 210: -282.0
Total reward at step 211: -283.0
Total reward at step 212: -285.0
Total reward at step 213: -286.0
Total reward at step 214: -287.0
215: -288.0
Total reward at step 215: -288.0
Total reward at step 216: -289.0
Total reward at step 217: -290.0
Total reward at step 218: -291.0
Total reward at step 219: -294.

Total reward at step 440: -388.0
Total reward at step 441: -389.0
Total reward at step 442: -390.0
Total reward at step 443: -391.0
Total reward at step 444: -394.0
Total reward at step 445: -395.0
Total reward at step 446: -397.0
Total reward at step 447: -398.0
Total reward at step 448: -399.0
Total reward at step 449: -400.0
Total reward at step 450: -401.0
Total reward at step 451: -402.0
Total reward at step 452: -403.0
Total reward at step 453: -406.0
Total reward at step 454: -407.0
Total reward at step 455: -409.0
Total reward at step 456: -410.0
Total reward at step 457: -411.0
Total reward at step 458: -412.0
Total reward at step 459: -413.0
Total reward at step 460: -414.0
Total reward at step 461: -415.0
Total reward at step 462: -418.0
Total reward at step 463: -419.0
Total reward at step 464: -421.0
Total reward at step 465: -422.0
Total reward at step 466: -423.0
Total reward at step 467: -424.0
Total reward at step 468: -425.0
Total reward at step 469: -426.0
Total rewa

Total reward at step 690: -722.0
Total reward at step 691: -723.0
Total reward at step 692: -724.0
Total reward at step 693: -725.0
Total reward at step 694: -726.0
Total reward at step 695: -727.0
Total reward at step 696: -730.0
Total reward at step 697: -731.0
Total reward at step 698: -733.0
Total reward at step 699: -734.0
Total reward at step 700: -735.0
Total reward at step 701: -736.0
Total reward at step 702: -737.0
Total reward at step 703: -738.0
Total reward at step 704: -739.0
Total reward at step 705: -742.0
Total reward at step 706: -743.0
Total reward at step 707: -745.0
Total reward at step 708: -746.0
Total reward at step 709: -747.0
Total reward at step 710: -748.0
Total reward at step 711: -749.0
Total reward at step 712: -750.0
Total reward at step 713: -751.0
Total reward at step 714: -754.0
Total reward at step 715: -755.0
Total reward at step 716: -757.0
Total reward at step 717: -758.0
Total reward at step 718: -759.0
719: -760.0
Total reward at step 719: -760.

Total reward at step 940: -1055.0
Total reward at step 941: -1057.0
Total reward at step 942: -1058.0
Total reward at step 943: -1059.0
Total reward at step 944: -1060.0
Total reward at step 945: -1061.0
Total reward at step 946: -1062.0
Total reward at step 947: -1063.0
Total reward at step 948: -1066.0
Total reward at step 949: -1067.0
Total reward at step 950: -1069.0
Total reward at step 951: -1070.0
Total reward at step 952: -1071.0
Total reward at step 953: -1072.0
Total reward at step 954: -1073.0
Total reward at step 955: -1074.0
Total reward at step 956: -1075.0
Total reward at step 957: -1078.0
Total reward at step 958: -1079.0
Total reward at step 959: -1081.0
Total reward at step 960: -1082.0
Total reward at step 961: -1083.0
Total reward at step 962: -1084.0
Total reward at step 963: -1085.0
Total reward at step 964: -1086.0
Total reward at step 965: -1087.0
Total reward at step 966: -1090.0
Total reward at step 967: -1091.0
Total reward at step 968: -1093.0
Total reward a

Total reward at step 169: -227.0
Total reward at step 170: -228.0
Total reward at step 171: -229.0
Total reward at step 172: -230.0
Total reward at step 173: -231.0
Total reward at step 174: -234.0
Total reward at step 175: -235.0
Total reward at step 176: -237.0
Total reward at step 177: -238.0
Total reward at step 178: -239.0
Total reward at step 179: -240.0
Total reward at step 180: -241.0
Total reward at step 181: -242.0
Total reward at step 182: -243.0
Total reward at step 183: -246.0
Total reward at step 184: -247.0
Total reward at step 185: -249.0
Total reward at step 186: -250.0
Total reward at step 187: -251.0
Total reward at step 188: -252.0
Total reward at step 189: -253.0
Total reward at step 190: -254.0
Total reward at step 191: -255.0
Total reward at step 192: -258.0
Total reward at step 193: -259.0
Total reward at step 194: -261.0
Total reward at step 195: -262.0
Total reward at step 196: -263.0
Total reward at step 197: -264.0
Total reward at step 198: -265.0
Total rewa

Total reward at step 419: -361.0
Total reward at step 420: -362.0
Total reward at step 421: -363.0
Total reward at step 422: -364.0
Total reward at step 423: -365.0
Total reward at step 424: -366.0
Total reward at step 425: -367.0
Total reward at step 426: -370.0
Total reward at step 427: -371.0
Total reward at step 428: -373.0
Total reward at step 429: -374.0
Total reward at step 430: -375.0
431: -376.0
Total reward at step 431: -376.0
Total reward at step 432: -377.0
Total reward at step 433: -378.0
Total reward at step 434: -379.0
Total reward at step 435: -382.0
Total reward at step 436: -383.0
Total reward at step 437: -385.0
Total reward at step 438: -386.0
Total reward at step 439: -387.0
Total reward at step 440: -388.0
Total reward at step 441: -389.0
Total reward at step 442: -390.0
Total reward at step 443: -391.0
Total reward at step 444: -394.0
Total reward at step 445: -395.0
Total reward at step 446: -397.0
Total reward at step 447: -398.0
Total reward at step 448: -399.

Total reward at step 669: -694.0
Total reward at step 670: -695.0
Total reward at step 671: -697.0
Total reward at step 672: -698.0
Total reward at step 673: -699.0
Total reward at step 674: -700.0
Total reward at step 675: -701.0
Total reward at step 676: -702.0
Total reward at step 677: -703.0
Total reward at step 678: -706.0
Total reward at step 679: -707.0
Total reward at step 680: -709.0
Total reward at step 681: -710.0
Total reward at step 682: -711.0
Total reward at step 683: -712.0
Total reward at step 684: -713.0
Total reward at step 685: -714.0
Total reward at step 686: -715.0
Total reward at step 687: -718.0
Total reward at step 688: -719.0
Total reward at step 689: -721.0
Total reward at step 690: -722.0
Total reward at step 691: -723.0
Total reward at step 692: -724.0
Total reward at step 693: -725.0
Total reward at step 694: -726.0
Total reward at step 695: -727.0
Total reward at step 696: -730.0
Total reward at step 697: -731.0
Total reward at step 698: -733.0
Total rewa

Total reward at step 919: -1026.0
Total reward at step 920: -1027.0
Total reward at step 921: -1030.0
Total reward at step 922: -1031.0
Total reward at step 923: -1033.0
Total reward at step 924: -1034.0
Total reward at step 925: -1035.0
Total reward at step 926: -1036.0
Total reward at step 927: -1037.0
Total reward at step 928: -1038.0
Total reward at step 929: -1039.0
Total reward at step 930: -1042.0
Total reward at step 931: -1043.0
Total reward at step 932: -1045.0
Total reward at step 933: -1046.0
Total reward at step 934: -1047.0
935: -1048.0
Total reward at step 935: -1048.0
Total reward at step 936: -1049.0
Total reward at step 937: -1050.0
Total reward at step 938: -1051.0
Total reward at step 939: -1054.0
Total reward at step 940: -1055.0
Total reward at step 941: -1057.0
Total reward at step 942: -1058.0
Total reward at step 943: -1059.0
Total reward at step 944: -1060.0
Total reward at step 945: -1061.0
Total reward at step 946: -1062.0
Total reward at step 947: -1063.0
T

Total reward at step 148: -199.0
Total reward at step 149: -201.0
Total reward at step 150: -202.0
Total reward at step 151: -203.0
Total reward at step 152: -204.0
Total reward at step 153: -205.0
Total reward at step 154: -206.0
Total reward at step 155: -207.0
Total reward at step 156: -210.0
Total reward at step 157: -211.0
Total reward at step 158: -213.0
Total reward at step 159: -214.0
Total reward at step 160: -215.0
Total reward at step 161: -216.0
Total reward at step 162: -217.0
Total reward at step 163: -218.0
Total reward at step 164: -219.0
Total reward at step 165: -222.0
Total reward at step 166: -223.0
Total reward at step 167: -225.0
Total reward at step 168: -226.0
Total reward at step 169: -227.0
Total reward at step 170: -228.0
Total reward at step 171: -229.0
Total reward at step 172: -230.0
Total reward at step 173: -231.0
Total reward at step 174: -234.0
Total reward at step 175: -235.0
Total reward at step 176: -237.0
Total reward at step 177: -238.0
Total rewa

Total reward at step 398: -331.0
Total reward at step 399: -334.0
Total reward at step 400: -335.0
Total reward at step 401: -337.0
Total reward at step 402: -338.0
Total reward at step 403: -339.0
Total reward at step 404: -340.0
Total reward at step 405: -341.0
Total reward at step 406: -342.0
Total reward at step 407: -343.0
Total reward at step 408: -346.0
Total reward at step 409: -347.0
Total reward at step 410: -349.0
Total reward at step 411: -350.0
Total reward at step 412: -351.0
Total reward at step 413: -352.0
Total reward at step 414: -353.0
Total reward at step 415: -354.0
Total reward at step 416: -355.0
Total reward at step 417: -358.0
Total reward at step 418: -359.0
Total reward at step 419: -361.0
Total reward at step 420: -362.0
Total reward at step 421: -363.0
Total reward at step 422: -364.0
Total reward at step 423: -365.0
Total reward at step 424: -366.0
Total reward at step 425: -367.0
Total reward at step 426: -370.0
Total reward at step 427: -371.0
Total rewa

Total reward at step 648: -665.0
Total reward at step 649: -666.0
Total reward at step 650: -667.0
Total reward at step 651: -670.0
Total reward at step 652: -671.0
Total reward at step 653: -673.0
Total reward at step 654: -674.0
Total reward at step 655: -675.0
Total reward at step 656: -676.0
Total reward at step 657: -677.0
Total reward at step 658: -678.0
Total reward at step 659: -679.0
Total reward at step 660: -682.0
Total reward at step 661: -683.0
Total reward at step 662: -685.0
Total reward at step 663: -686.0
Total reward at step 664: -687.0
Total reward at step 665: -688.0
Total reward at step 666: -689.0
Total reward at step 667: -690.0
Total reward at step 668: -691.0
Total reward at step 669: -694.0
Total reward at step 670: -695.0
Total reward at step 671: -697.0
Total reward at step 672: -698.0
Total reward at step 673: -699.0
Total reward at step 674: -700.0
Total reward at step 675: -701.0
Total reward at step 676: -702.0
Total reward at step 677: -703.0
Total rewa

Total reward at step 898: -999.0
Total reward at step 899: -1000.0
Total reward at step 900: -1001.0
Total reward at step 901: -1002.0
Total reward at step 902: -1003.0
Total reward at step 903: -1006.0
Total reward at step 904: -1007.0
Total reward at step 905: -1009.0
Total reward at step 906: -1010.0
Total reward at step 907: -1011.0
Total reward at step 908: -1012.0
Total reward at step 909: -1013.0
Total reward at step 910: -1014.0
Total reward at step 911: -1015.0
Total reward at step 912: -1018.0
Total reward at step 913: -1019.0
Total reward at step 914: -1021.0
Total reward at step 915: -1022.0
Total reward at step 916: -1023.0
Total reward at step 917: -1024.0
Total reward at step 918: -1025.0
Total reward at step 919: -1026.0
Total reward at step 920: -1027.0
Total reward at step 921: -1030.0
Total reward at step 922: -1031.0
Total reward at step 923: -1033.0
Total reward at step 924: -1034.0
Total reward at step 925: -1035.0
Total reward at step 926: -1036.0
Total reward at

Total reward at step 127: -170.0
Total reward at step 128: -171.0
Total reward at step 129: -174.0
Total reward at step 130: -175.0
Total reward at step 131: -177.0
Total reward at step 132: -178.0
Total reward at step 133: -179.0
Total reward at step 134: -180.0
Total reward at step 135: -181.0
Total reward at step 136: -182.0
Total reward at step 137: -183.0
Total reward at step 138: -186.0
Total reward at step 139: -187.0
Total reward at step 140: -189.0
Total reward at step 141: -190.0
Total reward at step 142: -191.0
143: -192.0
Total reward at step 143: -192.0
Total reward at step 144: -193.0
Total reward at step 145: -194.0
Total reward at step 146: -195.0
Total reward at step 147: -198.0
Total reward at step 148: -199.0
Total reward at step 149: -201.0
Total reward at step 150: -202.0
Total reward at step 151: -203.0
Total reward at step 152: -204.0
Total reward at step 153: -205.0
Total reward at step 154: -206.0
Total reward at step 155: -207.0
Total reward at step 156: -210.

Total reward at step 377: -304.0
Total reward at step 378: -305.0
Total reward at step 379: -306.0
Total reward at step 380: -307.0
Total reward at step 381: -310.0
Total reward at step 382: -311.0
Total reward at step 383: -313.0
Total reward at step 384: -314.0
Total reward at step 385: -315.0
Total reward at step 386: -316.0
Total reward at step 387: -317.0
Total reward at step 388: -318.0
Total reward at step 389: -319.0
Total reward at step 390: -322.0
Total reward at step 391: -323.0
Total reward at step 392: -325.0
Total reward at step 393: -326.0
Total reward at step 394: -327.0
Total reward at step 395: -328.0
Total reward at step 396: -329.0
Total reward at step 397: -330.0
Total reward at step 398: -331.0
Total reward at step 399: -334.0
Total reward at step 400: -335.0
Total reward at step 401: -337.0
Total reward at step 402: -338.0
Total reward at step 403: -339.0
Total reward at step 404: -340.0
Total reward at step 405: -341.0
Total reward at step 406: -342.0
Total rewa

Total reward at step 627: -638.0
Total reward at step 628: -639.0
Total reward at step 629: -640.0
Total reward at step 630: -641.0
Total reward at step 631: -642.0
Total reward at step 632: -643.0
Total reward at step 633: -646.0
Total reward at step 634: -647.0
Total reward at step 635: -649.0
Total reward at step 636: -650.0
Total reward at step 637: -651.0
Total reward at step 638: -652.0
Total reward at step 639: -653.0
Total reward at step 640: -654.0
Total reward at step 641: -655.0
Total reward at step 642: -658.0
Total reward at step 643: -659.0
Total reward at step 644: -661.0
Total reward at step 645: -662.0
Total reward at step 646: -663.0
647: -664.0
Total reward at step 647: -664.0
Total reward at step 648: -665.0
Total reward at step 649: -666.0
Total reward at step 650: -667.0
Total reward at step 651: -670.0
Total reward at step 652: -671.0
Total reward at step 653: -673.0
Total reward at step 654: -674.0
Total reward at step 655: -675.0
Total reward at step 656: -676.

Total reward at step 877: -971.0
Total reward at step 878: -973.0
Total reward at step 879: -974.0
Total reward at step 880: -975.0
Total reward at step 881: -976.0
Total reward at step 882: -977.0
Total reward at step 883: -978.0
Total reward at step 884: -979.0
Total reward at step 885: -982.0
Total reward at step 886: -983.0
Total reward at step 887: -985.0
Total reward at step 888: -986.0
Total reward at step 889: -987.0
Total reward at step 890: -988.0
Total reward at step 891: -989.0
Total reward at step 892: -990.0
Total reward at step 893: -991.0
Total reward at step 894: -994.0
Total reward at step 895: -995.0
Total reward at step 896: -997.0
Total reward at step 897: -998.0
Total reward at step 898: -999.0
Total reward at step 899: -1000.0
Total reward at step 900: -1001.0
Total reward at step 901: -1002.0
Total reward at step 902: -1003.0
Total reward at step 903: -1006.0
Total reward at step 904: -1007.0
Total reward at step 905: -1009.0
Total reward at step 906: -1010.0
To

Total reward at step 104: -141.0
Total reward at step 105: -142.0
Total reward at step 106: -143.0
Total reward at step 107: -144.0
Total reward at step 108: -145.0
Total reward at step 109: -146.0
Total reward at step 110: -147.0
Total reward at step 111: -150.0
Total reward at step 112: -151.0
Total reward at step 113: -153.0
Total reward at step 114: -154.0
Total reward at step 115: -155.0
Total reward at step 116: -156.0
Total reward at step 117: -157.0
Total reward at step 118: -158.0
Total reward at step 119: -159.0
Total reward at step 120: -162.0
Total reward at step 121: -163.0
Total reward at step 122: -165.0
Total reward at step 123: -166.0
Total reward at step 124: -167.0
Total reward at step 125: -168.0
Total reward at step 126: -169.0
Total reward at step 127: -170.0
Total reward at step 128: -171.0
Total reward at step 129: -174.0
Total reward at step 130: -175.0
Total reward at step 131: -177.0
Total reward at step 132: -178.0
Total reward at step 133: -179.0
Total rewa

Total reward at step 353: -371.0
Total reward at step 354: -374.0
Total reward at step 355: -375.0
Total reward at step 356: -377.0
Total reward at step 357: -378.0
Total reward at step 358: -379.0
359: -380.0
Total reward at step 359: -380.0
Total reward at step 360: -381.0
Total reward at step 361: -382.0
Total reward at step 362: -383.0
Total reward at step 363: -386.0
Total reward at step 364: -387.0
Total reward at step 365: -389.0
Total reward at step 366: -390.0
Total reward at step 367: -391.0
Total reward at step 368: -392.0
Total reward at step 369: -393.0
Total reward at step 370: -394.0
Total reward at step 371: -395.0
Total reward at step 372: -398.0
Total reward at step 373: -399.0
Total reward at step 374: -401.0
Total reward at step 375: -402.0
Total reward at step 376: -403.0
Total reward at step 377: -404.0
Total reward at step 378: -405.0
Total reward at step 379: -406.0
Total reward at step 380: -407.0
Total reward at step 381: -410.0
Total reward at step 382: -411.

Total reward at step 602: -704.0
Total reward at step 603: -705.0
Total reward at step 604: -706.0
Total reward at step 605: -707.0
Total reward at step 606: -710.0
Total reward at step 607: -711.0
Total reward at step 608: -713.0
Total reward at step 609: -714.0
Total reward at step 610: -715.0
Total reward at step 611: -716.0
Total reward at step 612: -717.0
Total reward at step 613: -718.0
Total reward at step 614: -719.0
Total reward at step 615: -722.0
Total reward at step 616: -723.0
Total reward at step 617: -725.0
Total reward at step 618: -726.0
Total reward at step 619: -727.0
Total reward at step 620: -728.0
Total reward at step 621: -729.0
Total reward at step 622: -730.0
Total reward at step 623: -731.0
Total reward at step 624: -734.0
Total reward at step 625: -735.0
Total reward at step 626: -737.0
Total reward at step 627: -738.0
Total reward at step 628: -739.0
Total reward at step 629: -740.0
Total reward at step 630: -741.0
Total reward at step 631: -742.0
Total rewa

Total reward at step 849: -1034.0
Total reward at step 850: -1035.0
Total reward at step 851: -1037.0
Total reward at step 852: -1038.0
Total reward at step 853: -1039.0
Total reward at step 854: -1040.0
Total reward at step 855: -1041.0
Total reward at step 856: -1042.0
Total reward at step 857: -1043.0
Total reward at step 858: -1046.0
Total reward at step 859: -1047.0
Total reward at step 860: -1049.0
Total reward at step 861: -1050.0
Total reward at step 862: -1051.0
863: -1052.0
Total reward at step 863: -1052.0
Total reward at step 864: -1053.0
Total reward at step 865: -1054.0
Total reward at step 866: -1055.0
Total reward at step 867: -1058.0
Total reward at step 868: -1059.0
Total reward at step 869: -1061.0
Total reward at step 870: -1062.0
Total reward at step 871: -1063.0
Total reward at step 872: -1064.0
Total reward at step 873: -1065.0
Total reward at step 874: -1066.0
Total reward at step 875: -1067.0
Total reward at step 876: -1070.0
Total reward at step 877: -1071.0
T

Total reward at step 78: -106.0
Total reward at step 79: -107.0
Total reward at step 80: -108.0
Total reward at step 81: -109.0
Total reward at step 82: -110.0
Total reward at step 83: -111.0
Total reward at step 84: -114.0
Total reward at step 85: -115.0
Total reward at step 86: -117.0
Total reward at step 87: -118.0
Total reward at step 88: -119.0
Total reward at step 89: -120.0
Total reward at step 90: -121.0
Total reward at step 91: -122.0
Total reward at step 92: -123.0
Total reward at step 93: -126.0
Total reward at step 94: -127.0
Total reward at step 95: -129.0
Total reward at step 96: -130.0
Total reward at step 97: -131.0
Total reward at step 98: -132.0
Total reward at step 99: -133.0
Total reward at step 100: -134.0
Total reward at step 101: -135.0
Total reward at step 102: -138.0
Total reward at step 103: -139.0
Total reward at step 104: -141.0
Total reward at step 105: -142.0
Total reward at step 106: -143.0
Total reward at step 107: -144.0
Total reward at step 108: -145.0

Total reward at step 326: -335.0
Total reward at step 327: -338.0
Total reward at step 328: -339.0
Total reward at step 329: -341.0
Total reward at step 330: -342.0
Total reward at step 331: -343.0
Total reward at step 332: -344.0
Total reward at step 333: -345.0
Total reward at step 334: -346.0
Total reward at step 335: -347.0
Total reward at step 336: -350.0
Total reward at step 337: -351.0
Total reward at step 338: -353.0
Total reward at step 339: -354.0
Total reward at step 340: -355.0
Total reward at step 341: -356.0
Total reward at step 342: -357.0
Total reward at step 343: -358.0
Total reward at step 344: -359.0
Total reward at step 345: -362.0
Total reward at step 346: -363.0
Total reward at step 347: -365.0
Total reward at step 348: -366.0
Total reward at step 349: -367.0
Total reward at step 350: -368.0
Total reward at step 351: -369.0
Total reward at step 352: -370.0
Total reward at step 353: -371.0
Total reward at step 354: -374.0
Total reward at step 355: -375.0
Total rewa

Total reward at step 574: -667.0
575: -668.0
Total reward at step 575: -668.0
Total reward at step 576: -669.0
Total reward at step 577: -670.0
Total reward at step 578: -671.0
Total reward at step 579: -674.0
Total reward at step 580: -675.0
Total reward at step 581: -677.0
Total reward at step 582: -678.0
Total reward at step 583: -679.0
Total reward at step 584: -680.0
Total reward at step 585: -681.0
Total reward at step 586: -682.0
Total reward at step 587: -683.0
Total reward at step 588: -686.0
Total reward at step 589: -687.0
Total reward at step 590: -689.0
Total reward at step 591: -690.0
Total reward at step 592: -691.0
Total reward at step 593: -692.0
Total reward at step 594: -693.0
Total reward at step 595: -694.0
Total reward at step 596: -695.0
Total reward at step 597: -698.0
Total reward at step 598: -699.0
Total reward at step 599: -701.0
Total reward at step 600: -702.0
Total reward at step 601: -703.0
Total reward at step 602: -704.0
Total reward at step 603: -705.

Total reward at step 822: -998.0
Total reward at step 823: -999.0
Total reward at step 824: -1001.0
Total reward at step 825: -1002.0
Total reward at step 826: -1003.0
Total reward at step 827: -1004.0
Total reward at step 828: -1005.0
Total reward at step 829: -1006.0
Total reward at step 830: -1007.0
Total reward at step 831: -1010.0
Total reward at step 832: -1011.0
Total reward at step 833: -1013.0
Total reward at step 834: -1014.0
Total reward at step 835: -1015.0
Total reward at step 836: -1016.0
Total reward at step 837: -1017.0
Total reward at step 838: -1018.0
Total reward at step 839: -1019.0
Total reward at step 840: -1022.0
Total reward at step 841: -1023.0
Total reward at step 842: -1025.0
Total reward at step 843: -1026.0
Total reward at step 844: -1027.0
Total reward at step 845: -1028.0
Total reward at step 846: -1029.0
Total reward at step 847: -1030.0
Total reward at step 848: -1031.0
Total reward at step 849: -1034.0
Total reward at step 850: -1035.0
Total reward at 

Total reward at step 45: -61.0
Total reward at step 46: -62.0
Total reward at step 47: -63.0
Total reward at step 48: -66.0
Total reward at step 49: -67.0
Total reward at step 50: -69.0
Total reward at step 51: -70.0
Total reward at step 52: -71.0
Total reward at step 53: -72.0
Total reward at step 54: -73.0
Total reward at step 55: -74.0
Total reward at step 56: -75.0
Total reward at step 57: -78.0
Total reward at step 58: -79.0
Total reward at step 59: -81.0
Total reward at step 60: -82.0
Total reward at step 61: -83.0
Total reward at step 62: -84.0
Total reward at step 63: -85.0
Total reward at step 64: -86.0
Total reward at step 65: -87.0
Total reward at step 66: -90.0
Total reward at step 67: -91.0
Total reward at step 68: -93.0
Total reward at step 69: -94.0
Total reward at step 70: -95.0
71: -96.0
Total reward at step 71: -96.0
Total reward at step 72: -97.0
Total reward at step 73: -98.0
Total reward at step 74: -99.0
Total reward at step 75: -102.0
Total reward at step 76: -10

Total reward at step 296: -296.0
Total reward at step 297: -297.0
Total reward at step 298: -298.0
Total reward at step 299: -299.0
Total reward at step 300: -302.0
Total reward at step 301: -303.0
Total reward at step 302: -305.0
Total reward at step 303: -306.0
Total reward at step 304: -307.0
Total reward at step 305: -308.0
Total reward at step 306: -309.0
Total reward at step 307: -310.0
Total reward at step 308: -311.0
Total reward at step 309: -314.0
Total reward at step 310: -315.0
Total reward at step 311: -317.0
Total reward at step 312: -318.0
Total reward at step 313: -319.0
Total reward at step 314: -320.0
Total reward at step 315: -321.0
Total reward at step 316: -322.0
Total reward at step 317: -323.0
Total reward at step 318: -326.0
Total reward at step 319: -327.0
Total reward at step 320: -329.0
Total reward at step 321: -330.0
Total reward at step 322: -331.0
Total reward at step 323: -332.0
Total reward at step 324: -333.0
Total reward at step 325: -334.0
Total rewa

Total reward at step 548: -632.0
Total reward at step 549: -633.0
Total reward at step 550: -634.0
Total reward at step 551: -635.0
Total reward at step 552: -638.0
Total reward at step 553: -639.0
Total reward at step 554: -641.0
Total reward at step 555: -642.0
Total reward at step 556: -643.0
Total reward at step 557: -644.0
Total reward at step 558: -645.0
Total reward at step 559: -646.0
Total reward at step 560: -647.0
Total reward at step 561: -650.0
Total reward at step 562: -651.0
Total reward at step 563: -653.0
Total reward at step 564: -654.0
Total reward at step 565: -655.0
Total reward at step 566: -656.0
Total reward at step 567: -657.0
Total reward at step 568: -658.0
Total reward at step 569: -659.0
Total reward at step 570: -662.0
Total reward at step 571: -663.0
Total reward at step 572: -665.0
Total reward at step 573: -666.0
Total reward at step 574: -667.0
575: -668.0
Total reward at step 575: -668.0
Total reward at step 576: -669.0
Total reward at step 577: -670.

Total reward at step 800: -968.0
Total reward at step 801: -969.0
Total reward at step 802: -970.0
Total reward at step 803: -971.0
Total reward at step 804: -974.0
Total reward at step 805: -975.0
Total reward at step 806: -977.0
Total reward at step 807: -978.0
Total reward at step 808: -979.0
Total reward at step 809: -980.0
Total reward at step 810: -981.0
Total reward at step 811: -982.0
Total reward at step 812: -983.0
Total reward at step 813: -986.0
Total reward at step 814: -987.0
Total reward at step 815: -989.0
Total reward at step 816: -990.0
Total reward at step 817: -991.0
Total reward at step 818: -992.0
Total reward at step 819: -993.0
Total reward at step 820: -994.0
Total reward at step 821: -995.0
Total reward at step 822: -998.0
Total reward at step 823: -999.0
Total reward at step 824: -1001.0
Total reward at step 825: -1002.0
Total reward at step 826: -1003.0
Total reward at step 827: -1004.0
Total reward at step 828: -1005.0
Total reward at step 829: -1006.0
Tota

Total reward at step 21: -30.0
Total reward at step 22: -31.0
Total reward at step 23: -33.0
Total reward at step 24: -34.0
Total reward at step 25: -35.0
Total reward at step 26: -36.0
Total reward at step 27: -37.0
Total reward at step 28: -38.0
Total reward at step 29: -39.0
Total reward at step 30: -42.0
Total reward at step 31: -43.0
Total reward at step 32: -45.0
Total reward at step 33: -46.0
Total reward at step 34: -47.0
Total reward at step 35: -48.0
Total reward at step 36: -49.0
Total reward at step 37: -50.0
Total reward at step 38: -51.0
Total reward at step 39: -54.0
Total reward at step 40: -55.0
Total reward at step 41: -57.0
Total reward at step 42: -58.0
Total reward at step 43: -59.0
Total reward at step 44: -60.0
Total reward at step 45: -61.0
Total reward at step 46: -62.0
Total reward at step 47: -63.0
Total reward at step 48: -66.0
Total reward at step 49: -67.0
Total reward at step 50: -69.0
Total reward at step 51: -70.0
Total reward at step 52: -71.0
Total re

Total reward at step 273: -266.0
Total reward at step 274: -267.0
Total reward at step 275: -269.0
Total reward at step 276: -270.0
Total reward at step 277: -271.0
Total reward at step 278: -272.0
Total reward at step 279: -273.0
Total reward at step 280: -274.0
Total reward at step 281: -275.0
Total reward at step 282: -278.0
Total reward at step 283: -279.0
Total reward at step 284: -281.0
Total reward at step 285: -282.0
Total reward at step 286: -283.0
287: -284.0
Total reward at step 287: -284.0
Total reward at step 288: -285.0
Total reward at step 289: -286.0
Total reward at step 290: -287.0
Total reward at step 291: -290.0
Total reward at step 292: -291.0
Total reward at step 293: -293.0
Total reward at step 294: -294.0
Total reward at step 295: -295.0
Total reward at step 296: -296.0
Total reward at step 297: -297.0
Total reward at step 298: -298.0
Total reward at step 299: -299.0
Total reward at step 300: -302.0
Total reward at step 301: -303.0
Total reward at step 302: -305.

Total reward at step 521: -596.0
Total reward at step 522: -597.0
Total reward at step 523: -598.0
Total reward at step 524: -599.0
Total reward at step 525: -602.0
Total reward at step 526: -603.0
Total reward at step 527: -605.0
Total reward at step 528: -606.0
Total reward at step 529: -607.0
Total reward at step 530: -608.0
Total reward at step 531: -609.0
Total reward at step 532: -610.0
Total reward at step 533: -611.0
Total reward at step 534: -614.0
Total reward at step 535: -615.0
Total reward at step 536: -617.0
Total reward at step 537: -618.0
Total reward at step 538: -619.0
Total reward at step 539: -620.0
Total reward at step 540: -621.0
Total reward at step 541: -622.0
Total reward at step 542: -623.0
Total reward at step 543: -626.0
Total reward at step 544: -627.0
Total reward at step 545: -629.0
Total reward at step 546: -630.0
Total reward at step 547: -631.0
Total reward at step 548: -632.0
Total reward at step 549: -633.0
Total reward at step 550: -634.0
Total rewa

Total reward at step 770: -929.0
Total reward at step 771: -930.0
Total reward at step 772: -931.0
Total reward at step 773: -932.0
Total reward at step 774: -933.0
Total reward at step 775: -934.0
Total reward at step 776: -935.0
Total reward at step 777: -938.0
Total reward at step 778: -939.0
Total reward at step 779: -941.0
Total reward at step 780: -942.0
Total reward at step 781: -943.0
Total reward at step 782: -944.0
Total reward at step 783: -945.0
Total reward at step 784: -946.0
Total reward at step 785: -947.0
Total reward at step 786: -950.0
Total reward at step 787: -951.0
Total reward at step 788: -953.0
Total reward at step 789: -954.0
Total reward at step 790: -955.0
791: -956.0
Total reward at step 791: -956.0
Total reward at step 792: -957.0
Total reward at step 793: -958.0
Total reward at step 794: -959.0
Total reward at step 795: -962.0
Total reward at step 796: -963.0
Total reward at step 797: -965.0
Total reward at step 798: -966.0
Total reward at step 799: -967.

------------------------------------------------------------
STARTING EPISODE
------------------------------------------------------------
t: Reward
Total reward at step 0: -1
Total reward at step 1: -2.0
Total reward at step 2: -3.0
Total reward at step 3: -6.0
Total reward at step 4: -7.0
Total reward at step 5: -9.0
Total reward at step 6: -10.0
Total reward at step 7: -11.0
Total reward at step 8: -12.0
Total reward at step 9: -13.0
Total reward at step 10: -14.0
Total reward at step 11: -15.0
Total reward at step 12: -18.0
Total reward at step 13: -19.0
Total reward at step 14: -21.0
Total reward at step 15: -22.0
Total reward at step 16: -23.0
Total reward at step 17: -24.0
Total reward at step 18: -25.0
Total reward at step 19: -26.0
Total reward at step 20: -27.0
Total reward at step 21: -30.0
Total reward at step 22: -31.0
Total reward at step 23: -33.0
Total reward at step 24: -34.0
Total reward at step 25: -35.0
Total reward at step 26: -36.0
Total reward at step 27: -37.0
T

Total reward at step 251: -336.0
Total reward at step 252: -337.0
Total reward at step 253: -338.0
Total reward at step 254: -339.0
Total reward at step 255: -342.0
Total reward at step 256: -343.0
Total reward at step 257: -345.0
Total reward at step 258: -346.0
Total reward at step 259: -247.0
Total reward at step 260: -248.0
Total reward at step 261: -249.0
Total reward at step 262: -250.0
Total reward at step 263: -251.0
Total reward at step 264: -254.0
Total reward at step 265: -255.0
Total reward at step 266: -257.0
Total reward at step 267: -258.0
Total reward at step 268: -259.0
Total reward at step 269: -260.0
Total reward at step 270: -261.0
Total reward at step 271: -262.0
Total reward at step 272: -263.0
Total reward at step 273: -266.0
Total reward at step 274: -267.0
Total reward at step 275: -269.0
Total reward at step 276: -270.0
Total reward at step 277: -271.0
Total reward at step 278: -272.0
Total reward at step 279: -273.0
Total reward at step 280: -274.0
Total rewa

Total reward at step 502: -571.0
503: -572.0
Total reward at step 503: -572.0
Total reward at step 504: -573.0
Total reward at step 505: -574.0
Total reward at step 506: -575.0
Total reward at step 507: -578.0
Total reward at step 508: -579.0
Total reward at step 509: -581.0
Total reward at step 510: -582.0
Total reward at step 511: -583.0
Total reward at step 512: -584.0
Total reward at step 513: -585.0
Total reward at step 514: -586.0
Total reward at step 515: -587.0
Total reward at step 516: -590.0
Total reward at step 517: -591.0
Total reward at step 518: -593.0
Total reward at step 519: -594.0
Total reward at step 520: -595.0
Total reward at step 521: -596.0
Total reward at step 522: -597.0
Total reward at step 523: -598.0
Total reward at step 524: -599.0
Total reward at step 525: -602.0
Total reward at step 526: -603.0
Total reward at step 527: -605.0
Total reward at step 528: -606.0
Total reward at step 529: -607.0
Total reward at step 530: -608.0
Total reward at step 531: -609.

Total reward at step 750: -902.0
Total reward at step 751: -903.0
Total reward at step 752: -905.0
Total reward at step 753: -906.0
Total reward at step 754: -907.0
Total reward at step 755: -908.0
Total reward at step 756: -909.0
Total reward at step 757: -910.0
Total reward at step 758: -911.0
Total reward at step 759: -914.0
Total reward at step 760: -915.0
Total reward at step 761: -917.0
Total reward at step 762: -918.0
Total reward at step 763: -919.0
Total reward at step 764: -920.0
Total reward at step 765: -921.0
Total reward at step 766: -922.0
Total reward at step 767: -923.0
Total reward at step 768: -926.0
Total reward at step 769: -927.0
Total reward at step 770: -929.0
Total reward at step 771: -930.0
Total reward at step 772: -931.0
Total reward at step 773: -932.0
Total reward at step 774: -933.0
Total reward at step 775: -934.0
Total reward at step 776: -935.0
Total reward at step 777: -938.0
Total reward at step 778: -939.0
Total reward at step 779: -941.0
Total rewa

Total reward at step 994: -1227.0
Total reward at step 995: -1229.0
Total reward at step 996: -1230.0
Total reward at step 997: -1231.0
Total reward at step 998: -1232.0
Total reward at step 999: -1233.0
------------------------------------------------------------
EPISODE FINISHED
------------------------------------------------------------
Goal reached = False
Total steps = 1000
Total reward = -1233.0
Terminating state = {'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0, 'windows': 0.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}
{'success': True, 'value': 0, 'services': {}, 'os': {'linux': 1.0}, 'processes': {}, 'access': {}, 'discovered': {}, 'connection_error': False, 'permission_error': False, 'newly_discovered': {}}


In [None]:
import csv
import pandas as pd
df = pd.read_csv('bruteforce_agent_sensitive_host.csv')