# A tiny bot for NLE

In [None]:
import torch
import numpy

import matplotlib.pyplot as plt

import gym
import nle

<br>

## Understanding the actions

Collect the action info by using the `&` command.

In [None]:
with gym.make("NetHackChallenge-v0") as env:
    obs = env.reset()
    env.step(92)  # &
    env.step(19)  # \015 ENTER

    info = {}
    for j, a in enumerate(env.unwrapped.actions):
        obs, *_ = env.step(j)
        info[j] = (
            int(a),
            a.__class__.__name__,
            a._name_,
            obs["message"].view("S256")[0].decode("utf8"),
        )
        while b"--More--" in bytes(obs["tty_chars"]):
            env.step(36)  # \033 ESC
        env.step(92)

Read the command help from the nle's docs.

In [None]:
cmdhelp = {}
with open("/Users/ivannazarov/Github/repos_with_rl/nle/dat/cmdhelp", "tr") as fin:
    for line in fin:
        key, tab, val = line.strip().partition("\t")
        if not tab:
            continue

        cmdhelp[key] = val

The following actions will wither be emulated or totally unused by the agent.

In [None]:
unused_actions = {
    # long actions skip too much game ticks!
    8,  # CompassDirectionLonger.N      75
    9,  # CompassDirectionLonger.E      76
    10,  # CompassDirectionLonger.S      74
    11,  # CompassDirectionLonger.W      72
    12,  # CompassDirectionLonger.NE     85
    13,  # CompassDirectionLonger.SE     78
    14,  # CompassDirectionLonger.SW     66
    15,  # CompassDirectionLonger.NW     89
    # list of extendend commands
    21,  # Command.EXTLIST               191
    # adjust inventory letters
    22,  # Command.ADJUST                225
    # annotate a location on the map
    23,  # Command.ANNOTATE              193
    27,  # Command.CALL                  67
    # inspect char's attributes
    25,  # Command.ATTRIBUTES            24
    76,  # Command.SEEALL                42
    59,  # Command.OVERVIEW              15
    26,  # Command.AUTOPICKUP            64
    31,  # Command.CONDUCT               195
    34,  # Command.DROPTYPE              68
    37,  # Command.ENGRAVE               69
    42,  # Command.GLANCE                59
    81,  # Command.TAKEOFFALL            65
    # inventory is given to us
    44,  # Command.INVENTORY             105
    45,  # Command.INVENTTYPE            73
    # we don't care about the logs
    49,  # Command.KNOWN                 92
    50,  # Command.KNOWNCLASS            96
    # look us emulated through inventory probing
    51,  # Command.LOOK                  58
    # some unclear navigation actions
    54,  # Command.MOVE                  109
    55,  # Command.MOVEFAR               77
    72,  # Command.RUSH                  103
    73,  # Command.RUSH2                 71
    85,  # Command.TRAVEL                95
    # misc, developer
    89,  # Command.VERSION               246
    90,  # Command.VERSIONSHORT          118
    43,  # Command.HISTORY               86
    74,  # Command.SAVE                  83
    65,  # Command.QUIT                  241
    68,  # Command.REDRAW                18
    58,  # Command.OPTIONS               79
    # command help
    92,  # Command.WHATDOES              38
    93,  # Command.WHATIS                47
    # unknown
    100,  # TextCharacters.APOS           39
    99,  # TextCharacters.SPACE          20
    98,  # TextCharacters.MINUS          45
    102,  # TextCharacters.NUM_0          48
    103,  # TextCharacters.NUM_1          49
    104,  # TextCharacters.NUM_2          50
    105,  # TextCharacters.NUM_3          51
    106,  # TextCharacters.NUM_4          52
    107,  # TextCharacters.NUM_5          53
    108,  # TextCharacters.NUM_6          54
    109,  # TextCharacters.NUM_7          55
    110,  # TextCharacters.NUM_8          56
    111,  # TextCharacters.NUM_9          57
    # can read the vloume of the purse from `blstats`.gold
    112,  # TextCharacters.DOLLAR         36
    # these might possibly be useful
    47,  # Command.JUMP                  234
    70,  # Command.RIDE                  210  // mount or dismount a saddled steed.
    71,  # Command.RUB                   242  // Rub a lamp or a touchstone
    53,  # Command.MONSTER               237  // When polymorphed, use a monster's special ability
    60,  # Command.PAY                   112  // Pay your shopping bill
}

A neat table with oct, hex and descriptions

In [None]:
for j, (a, c, f, m) in info.items():
    if j in unused_actions:
        continue
    if m.startswith("No such command"):
        m = cmdhelp.get(chr(a))
    print(
        f"{j:>4d}  {c+'.'+f:28s}  {a:>03o}   {a:>02X}  {a if a > 32 else 46:>2c}  {m}"
    )

<br>

Import level patching and options from `minihack`

In [None]:
import minihack
import minihack.envs.fightcorridor

from minihack.base import MiniHack, MH_NETHACKOPTIONS

from minihack.level_generator import LevelGenerator

lvl_gen = LevelGenerator(
    map="""
-----       ----------------------
|....#######.....................|
-----       ----------------------
""",
    lit=True,
)
lvl_gen.set_start_rect((1, 1), (3, 1))
# lvl_gen.add_monster(name="giant rat", place=(30, 1))
# lvl_gen.add_monster(name="giant rat", place=(29, 1))
lvl_gen.add_goal_pos((32, 1))

<br>

Some observation preprocessing

In [None]:
from nle_toolbox.utils.env.defs import BLStats


def uint8_to_str(
    as_bytes=False, /, *, tty_chars, chars, message, inv_letters, inv_strs, **remaining
):
    """Preprocess all `uint8` arrays to proper `str`, preserving the leading dims."""
    # `tty_chars` is `... x 24 x 80` fixed width string
    tty_chars = tty_chars.view("S80").squeeze(-1)

    # `message` is `... x 256` zero-terminated string
    message = message.view("S256").squeeze(-1)

    # `chars` is `... x 21 x 79` fixed width string (excl. )
    chars = chars.view("S79").squeeze(-1)

    # `inv_letters` is `... x 55` list of single chars (at most 55 items)
    inv_letters = inv_letters.view("c")

    # `inv_strs` is `... x 55 x 80` list of zero-terminated strings
    #  (at most 80 chars per item and at most 55 items)
    inv_strs = inv_strs.view("S80").squeeze(-1)

    # rebuild the kwargs, casting `bytes` to `str` (UCS4 encoding
    #  gives x4 mem blowup!).
    if as_bytes:
        # XXX a tidier `**locals()` also adds unwanted keys, such
        #  as `remaining` and `as_bytes` :(
        return dict(
            tty_chars=tty_chars,
            message=message,
            chars=chars,
            inv_letters=inv_letters,
            inv_strs=inv_strs,
            **remaining,
        )

    return dict(
        tty_chars=tty_chars.astype(str),
        message=message.astype(str),
        chars=chars.astype(str),
        inv_letters=inv_letters.astype(str),
        inv_strs=inv_strs.astype(str),
        **remaining,
    )

A renderer

In [None]:
from io import StringIO
from IPython.display import clear_output
from contextlib import redirect_stdout


def render(env, obs):
    blstats = BLStats(*obs["blstats"])

    with redirect_stdout(StringIO()) as f:
        env.render("human")

    screen = f.getvalue()
    screen += "\n" + (b"".join(obs["inv_letters"].view("c"))).decode()

    screen += f"\ntime: {blstats.time:04d}"
    screen += "\n" + str(obs["message"].view("S256")[0])

    screen += "\n" + str(
        obs["glyphs"][
            blstats.y - 2 : blstats.y + 3,
            blstats.x - 2 : blstats.x + 3,
        ]
    )

    clear_output(wait=False)
    print(screen)
    return True

Actors

In [None]:
class HumanActor:
    def override(self, obs):
        return False

    # human control
    def reset(self, obs):
        pass

    def step(self, obs):
        input = bytes(__builtins__.input("> "), "utf8")
        yield from map(ord, input.decode("unicode-escape"))

Gather actions

In [None]:
import re

from collections import deque
from nle.nethack.actions import Command, MiscAction


class Skeleton:
    re_ynq = re.compile(rb".*\?\s+\[[ynaq\d]{2,}\]", re.I)  # detect YN questions

    def __init__(self, brain, *, auto_more=True, auto_yesman=False):
        self.brain, self.queue = brain, deque()
        self.auto_more, self.auto_yesman = auto_more, auto_yesman

    def reset(self, obs):
        self.brain.reset(obs)
        self.queue.clear()

    def step(self, obs):
        # 1. automatic gui-related actions
        # skip partial info messages in the tty (`--More--`)
        # XXX `ESC` skips all messages, `ENTER` goes through them one-by-one
        if self.auto_more and b"--More--" in bytes(obs["tty_chars"]):
            return Command.ESC  # MiscAction.MORE

        # 1.5 eternal internal `yes-man`: agree to every action we take
        if self.auto_yesman and self.re_ynq.match(bytes(obs["message"])):
            return ord("y")

        # 2. open/closed loop control. Prompt only if we are out of
        #  scheduled actions.
        if self.brain.override(obs):
            self.queue.clear()

        if not self.queue:
            # closed loop control: the brain tells us what to do next
            self.queue.extend(self.brain.step(obs))

        # open loop policy: execute pre-scheduled actions
        if self.queue:
            return self.queue.popleft()

        # just wait
        return ord(".")

Seed

In [None]:
from nle_toolbox.utils.replay import Replay

env = Replay(gym.make("NetHackChallenge-v0"))  # , options=MH_NETHACKOPTIONS))
# MiniHack._patch_nhdat(env.unwrapped, lvl_gen.get_des())
env.seed(123)

play
* `#pray\x0dy` -- pray to a diety and then die
* `#quit\015y` -- quit the game
* `DA\x0djlkh,,\x0d` -- drop all, walk a circle, pick up all

In [None]:
bot = Skeleton(brain=HumanActor())

obs, rew, fin, info = env.reset(), 0.0, False, None
bot.reset(obs)

# base.py#L382 maps gym action numbers to chars
ctoa = {a: j for j, a in enumerate(env.unwrapped.actions)}
while render(env, obs) and not fin:
    try:
        obs, rew, fin, info = env.step(ctoa[bot.step(obs)])
    except KeyError:
        break

In [None]:
assert False

<br>

### A dangeorus freezing combination

```python
# well, this is awkward...
env.reset()
env.step(42)
env.step(51)
```

<br>

# Trunk