# Implementing simple pathfinding into random actor

In [None]:
import time
import gym
import nle

import numpy as np
import matplotlib.pyplot as plt

In [None]:
del gym.Wrapper.__getattr__

A wrpper that keeps track of the action history

In [None]:
from collections import deque


class RecentHistory(gym.Wrapper):
    """The base interaction architecture is essentially a middleman, who passes
    the action to the underlying env and intercepts the resulting transition
    data. It also is allowed, but not obliged to interact with the env, while
    intercepting the observations.
    """
    def __init__(self, env, *, n_recent=0):
        super().__init__(env)
        self.recent = deque([], n_recent)
    
    def reset(self, seed=None):
        return self.env.reset()

    def step(self, action):
        self.recent.append(action)
        return self.env.step(action)

We hide the NLE under several layers of wrappers. From the core to the shell:
1. `ReplayToFile` saves the seeds and the takes actions into a file for later inspection and replay.
2. `NLEAtoN` maps ascii actions to opaque actions accpeted by the NLE.
3. `NLEPatches` patches tty-screens, botched by the cr-lf misconfiguration of the NLE's tty term emulator and NetHacks displays (lf only).
4. `NLEFeatures` adds extra features generated on-the-fly from the current NLE's observation. 

In [None]:
from nle_toolbox.wrappers.replay import ReplayToFile, Replay
from nle_toolbox.wrappers.features import NLEPatches, NLEAtoN, NLEFeatures

def gui_factory(seed=None):
    env = NLEPatches(
        ReplayToFile(
            gym.make('NetHackChallenge-v0'),
            save_on='done',
            sticky=True,
            folder='./replays',
        ),
    )

    env.seed(seed)

    return NLEAtoN(env)
    

We start with implementing a simple command evaluator.

In [None]:
def gui_run(
    env,
    *commands
):
    pipe0 = deque([])
    obs, done = env.reset(), False
    for cmd in commands:
        pipe0.extend(cmd)
        while pipe0 and not done:
            obs, rew, done, info = env.step(pipe0.popleft())

        yield obs
        if done:
            break

A renderer

In [None]:
import pprint as pp

from time import sleep
from nle_toolbox.utils.env.render import render as tty_render

def ipynb_render(obs, clear=True, fps=None):
    if fps is None:
        return True

    from IPython.display import clear_output
    if clear:
        clear_output(wait=True)

    print(tty_render(**obs))
    if fps > 0:
        sleep(fps)

    return True

Below is a wrapper, which handles menus (unless an interaction is required) and
fetches all consecutive messages.

In [None]:
from nle_toolbox.bot.chassis import Chassis, ActionMasker

Let's test it in bulk.

In [None]:
# seed = None
# seed = 12513325507677477210, 18325590921330101247  # multi
# seed = 1251332550767747710, 18325590921330101247  # single
# seed = 125133255076774710, 18325590921330101247  # single
# seed = 13765371332493407478, 12246923801353953927
# seed = 12301533412141513004, 11519511065143048485
# seed = 1632082041122464284, 11609152793318129379
seed = 12604736832047991440, 12469632217503715839  # an aspirant
# seed = 5009195464289726085, 12625175316870653325

with Chassis(RecentHistory(gui_factory(seed), n_recent=None), split=False) as env:
    for obs in gui_run(
        env,
        ';j:',         # a paragraph about a cat
#         'acy',         # break a wand "of slow" and blow up
        '\033Zbyyy,',  # cast a sleep ray at a newt and pick up its corpse
    ):
        ipynb_render(obs, clear=False)  # dump(env.env, obs[0])
        pp.pprint(
            (
                env.messages,
            )
        )

The factory for collecting random exploration rollouts

In [None]:
# from nle_toolbox.utils import seeding

def factory(seed=None, folder=None):
    # get the base env and apply tty patches
    env = NLEPatches(gym.make('NetHackChallenge-v0'))
    ctoa = {chr(a): j for j, a in enumerate(env.unwrapped._actions)}

    # setup seed runs capabilities
    if folder is None:
        env = Replay(env, sticky=True)

    else:
        env = ReplayToFile(env, sticky=True,
                           folder=folder, save_on='done')
    env.seed(seed)

    # if not isinstance(seed, tuple):
    #     seed = seeding.generate(seed)
    # seeding.pyroot(env).set_initial_seeds(*seed, False)

    # use chassis
    env = RecentHistory(env, n_recent=32)
    env = Chassis(env, space=ctoa[' '], split=False)
    return ActionMasker(env)

Level and dungeon mapper

In [None]:
from nle.nethack import (
    NLE_BL_X,
    NLE_BL_Y,
    NLE_BL_DNUM,
    NLE_BL_DLEVEL,
    # NLE_BL_DEPTH,  # derived from DNUM and DLEVEL
    # XXX does not uniquely identify floors,
    #  c.f. [`depth`](./nle/src/dungeon.c#L1086-1084)
    DUNGEON_SHAPE,
    MAX_GLYPH,
)

from nle_toolbox.utils.env.defs import \
    glyph_is, dt_glyph_ext, ext_glyphlut
from nle_toolbox.utils.env.obs import npy_fold2d

from nle_toolbox.bot.level import Level, DungeonMapper

A non-illegal random action exploration.

In [None]:
from copy import deepcopy
from nle_toolbox.bot.chassis import get_wrapper


def random_explore(seed=None, n_steps=1000, *, auto=False, fps=None, copy=False):
    """A non-illegal random action explorer.
    """
    ss_pol, ss_env = np.random.SeedSequence(seed).spawn(2)

    rng, j, n_linger, pf = np.random.default_rng(ss_pol), 0, 0, None
    with factory(seed=ss_env) as env:
        # we need access to the Chassis for additional meta state variables
        cha = get_wrapper(env, Chassis)

        # ActionMasker caches the esacpe action id
        ESC = get_wrapper(env, ActionMasker).escape
        
        # setup the dungeon mapper
        dng = DungeonMapper()

        # launch the episode
        (obs, mask), fin = env.reset(), False
        while (
            ipynb_render(obs, clear=True, fps=fps)
            and not (fin or j >= n_steps)
        ):
            # though nle reuses buffers, we do not deep copy them
            #  delegating this to the downstream user instead
            yield deepcopy(obs) if copy else obs

            # default to immediately escaping from any menu or prompt
            act = ESC
            if not (cha.in_menu or cha.prompt):
                dng.update(obs)

                # if we're in LINGER state, pick a random non-forbidden action
                # XXX whelp... tilde on int8 is `two's complement`, not the `logical not`
                act = rng.choice(*np.logical_not(mask).nonzero())

            (obs, mask), rew, fin, info = env.step(act)
            j += 1

            if fin and auto:
                ipynb_render(obs, clear=True, fps=fps)
                (obs, mask), fin = env.reset(), False

Get a random episode

In [None]:
from inspect import getgeneratorlocals
episode = random_explore(
    seed=None,
    n_steps=256,
    auto=False,
    copy=True,
    fps=0.01,
)


glyphs = [next(episode)]
dng = getgeneratorlocals(episode).get('dng')

glyphs.extend(obs['glyphs'] for obs in episode)

In [None]:
from math import isfinite
from collections import namedtuple

from heapq import heappop, heappush
from collections import defaultdict

dir_to_ascii = {
    # (-1, -1): 'y',
        (-1,  0): 'k',
            # (-1, +1): 'u',
    ( 0, -1): 'h',
            ( 0, +1): 'l',
    # (+1, -1): 'j',
        (+1,  0): 'b',
            # (+1, +1): 'n',
}

DijNode = namedtuple('DijNode', 'p,v')

# find shortest paths to all accessible tiles (determined by `cost`)
def dij(cost, source):
    rows, cols = cost.shape

    # we leverage -ve indexing to avoid adding two borders
    value = np.full((rows + 1, cols + 1), np.inf)
    weight = value.copy()
    weight[:-1, :-1] = cost
    path = {}

    # init start
    value[source], path[source] = 0., None

    # run dijkstra
    frontier = [DijNode(source, 0.)]
    while frontier:
        current = heappop(frontier)

        # no need to re-inspect stale heap records
        if value[current.p] < current.v:
            continue

        r, c = current.p
        for dr, dc in dir_to_ascii:
            # checking for finite value ensures that we stay within bounds
            pos = r + dr, c + dc

            # consider tiles with finite +ve walk costs only
            tile = DijNode(pos, value[r, c] + weight[pos])
            if tile.v < value[tile.p]:
                heappush(frontier, tile)
                value[tile.p] = tile.v
                path[tile.p] = r, c

    return value, path

In [None]:
from nle_toolbox.utils.env.defs import symbol, GLYPH_CMAP_OFF, glyph_group, get_group
from nle_toolbox.utils.env.defs import glyphlut, ext_glyphlut

closed_doors = get_group(symbol, GLYPH_CMAP_OFF, *[
    'S_ndoor',
    'S_vcdoor', 'S_hcdoor',
    'S_vcdbridge', 'S_hcdbridge',
])

is_door = np.isin(ext_glyphlut.id.value, np.array(list(closed_doors)))
is_object = np.isin(ext_glyphlut.id.group, np.asarray(list(glyph_group.OBJECTS)))
is_walkable = ext_glyphlut.is_accessible | is_door | is_object

In [None]:
from scipy.special import softmax

def dstination_prob(lvl, pos):
    r, c = pos
    dist = np.maximum(abs(lvl.bg_tiles.rc.r - r), abs(lvl.bg_tiles.rc.c - c))
    mask = is_walkable[lvl.bg_tiles.glyph] | is_walkable[lvl.stg_tiles.glyph]
    mask[r, c] = False  # mask the current position
    return softmax(np.minimum(np.where(mask, dist, -np.inf), 5))

rng = np.random.default_rng()
prob = dstination_prob(dng.level, dng.level.trace[-1])
cost = np.where(prob > 0, 1., float('inf'))

plt.imshow(prob)

In [None]:
plt.imshow(cost)

In [None]:
def backup(path, dest):
    p0 = dest
    while True:
        p0, p1 = path[p0], p0
        yield p1
        if p0 is None:
            return

#         (r0, c0), (r1, c1) = p0, p1
#         yield directions[r1-r0, c1-c0]
        

In [None]:
value, path = dij(cost, dng.level.trace[-1])

In [None]:
val = value.copy()
r, c = rng.choice(dng.level.bg_tiles.rc.flat, p=prob.flat)

fig, ax = plt.subplots(1, 1, dpi=300)
for i, j in backup(path, (r, c)):
    val[i, j] = 0.

val[r, c] = np.inf

ax.imshow(val[:, 10:40])

In [None]:
# ''.join(reversed())
list()

<br>

<br>