__[nethack gym exploration](https://github.com/apowers313/roc/blob/master/experiments/2024.08.19-06.53.51-nethack-gym-exploration/2024.08.19-06.53.51-nethack-gym-exploration.ipynb)__

In [1]:
!date

Mon Aug 19 06:54:13 PDT 2024


# Initialization

In [3]:
import gym
import nle

env = gym.make("NetHackScore-v0")

## Init Options

In [16]:
?env

[0;31mType:[0m           NetHackScore
[0;31mString form:[0m    <NetHackScore<NetHackScore-v0>>
[0;31mFile:[0m           ~/Projects/roc/.venv/lib/python3.11/site-packages/nle/env/tasks.py
[0;31mDocstring:[0m     
Environment for "score" task.

The task is an augmentation of the standard NLE task. The return function is
defined as:
:math:`     ext{score}_t -  ext{score}_{t-1} +      ext{TP}`,
where the :math:`   ext{TP}` is a time penalty that grows with the amount of
environment steps that do not change the state (such as navigating menus).

Args:
    penalty_mode (str): name of the mode for calculating the time step
        penalty. Can be ``constant``, ``exp``, ``square``, ``linear``, or
        ``always``. Defaults to ``constant``.
    penalty_step (float): constant applied to amount of frozen steps.
        Defaults to -0.01.
    penalty_time (float): constant applied to amount of frozen steps.
        Defaults to -0.0.
[0;31mInit docstring:[0m
Constructs a new NLE environ

### TTY Recording

https://github.com/facebookresearch/nle/blob/862a439a84f52abca94d1f744d57061da12c5831/nle/env/base.py#L241

save_ttyrec_every, savedir

# Variables

In [14]:
dir(env)

['StepStatus',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_allow_all_modes',
 '_allow_all_yn_questions',
 '_blstats_index',
 '_check_abort',
 '_close_nethack',
 '_episode',
 '_frozen_steps',
 '_get_observation',
 '_get_time_penalty',
 '_glyph_index',
 '_in_moveloop',
 '_internal_index',
 '_is_episode_end',
 '_is_protocol',
 '_max_episode_steps',
 '_message_index',
 '_np_random',
 '_observation_keys',
 '_original_indices',
 '_original_observation_keys',
 '_perform_known_steps',
 '_program_state_index',
 '_quit_game',
 '_random',

In [15]:
dir(env.nethack)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_copy',
 '_dl',
 '_finalizer',
 '_instances',
 '_nethackoptions',
 '_obs',
 '_obs_buffers',
 '_pynethack',
 '_step_return',
 '_tempdir',
 '_ttyrec',
 '_vardir',
 '_wizard',
 'close',
 'dlpath',
 'get_current_seeds',
 'how_done',
 'in_normal_game',
 'options',
 'reset',
 'set_current_seeds',
 'set_initial_seeds',
 'step']

## Actions

In [5]:
env.actions

(<MiscAction.MORE: 13>,
 <CompassDirection.N: 107>,
 <CompassDirection.E: 108>,
 <CompassDirection.S: 106>,
 <CompassDirection.W: 104>,
 <CompassDirection.NE: 117>,
 <CompassDirection.SE: 110>,
 <CompassDirection.SW: 98>,
 <CompassDirection.NW: 121>,
 <CompassDirectionLonger.N: 75>,
 <CompassDirectionLonger.E: 76>,
 <CompassDirectionLonger.S: 74>,
 <CompassDirectionLonger.W: 72>,
 <CompassDirectionLonger.NE: 85>,
 <CompassDirectionLonger.SE: 78>,
 <CompassDirectionLonger.SW: 66>,
 <CompassDirectionLonger.NW: 89>,
 <MiscDirection.UP: 60>,
 <MiscDirection.DOWN: 62>,
 <MiscDirection.WAIT: 46>,
 <Command.KICK: 4>,
 <Command.EAT: 101>,
 <Command.SEARCH: 115>)

In [4]:
# https://github.com/facebookresearch/nle/blob/main/nle/env/base.py#L332
env.print_action_meanings()

0 13
1 107
2 108
3 106
4 104
5 117
6 110
7 98
8 121
9 75
10 76
11 74
12 72
13 85
14 78
15 66
16 89
17 60
18 62
19 46
20 4
21 101
22 115


In [17]:
nle.nethack.ACTIONS

(<CompassDirection.N: 107>,
 <CompassDirection.E: 108>,
 <CompassDirection.S: 106>,
 <CompassDirection.W: 104>,
 <CompassDirection.NE: 117>,
 <CompassDirection.SE: 110>,
 <CompassDirection.SW: 98>,
 <CompassDirection.NW: 121>,
 <CompassDirectionLonger.N: 75>,
 <CompassDirectionLonger.E: 76>,
 <CompassDirectionLonger.S: 74>,
 <CompassDirectionLonger.W: 72>,
 <CompassDirectionLonger.NE: 85>,
 <CompassDirectionLonger.SE: 78>,
 <CompassDirectionLonger.SW: 66>,
 <CompassDirectionLonger.NW: 89>,
 <MiscDirection.UP: 60>,
 <MiscDirection.DOWN: 62>,
 <MiscDirection.WAIT: 46>,
 <MiscAction.MORE: 13>,
 <Command.EXTCMD: 35>,
 <Command.EXTLIST: 191>,
 <Command.ADJUST: 225>,
 <Command.ANNOTATE: 193>,
 <Command.APPLY: 97>,
 <Command.ATTRIBUTES: 24>,
 <Command.AUTOPICKUP: 64>,
 <Command.CALL: 67>,
 <Command.CAST: 90>,
 <Command.CHAT: 227>,
 <Command.CLOSE: 99>,
 <Command.CONDUCT: 195>,
 <Command.DIP: 228>,
 <Command.DROP: 100>,
 <Command.DROPTYPE: 68>,
 <Command.EAT: 101>,
 <Command.ENGRAVE: 69>,
 <Co

## Observations

In [22]:
print(env.observation_space.keys())
print(env.observation_space["chars"])
env.observation_space

KeysView(Dict(blstats:Box(-2147483648, 2147483647, (27,), int64), chars:Box(0, 255, (21, 79), uint8), colors:Box(0, 15, (21, 79), uint8), glyphs:Box(0, 5976, (21, 79), int16), inv_glyphs:Box(0, 5976, (55,), int16), inv_letters:Box(0, 127, (55,), uint8), inv_oclasses:Box(0, 18, (55,), uint8), inv_strs:Box(0, 255, (55, 80), uint8), message:Box(0, 255, (256,), uint8), screen_descriptions:Box(0, 127, (21, 79, 80), uint8), specials:Box(0, 255, (21, 79), uint8), tty_chars:Box(0, 255, (24, 80), uint8), tty_colors:Box(0, 31, (24, 80), int8), tty_cursor:Box(0, 255, (2,), uint8)))
Box(0, 255, (21, 79), uint8)


Dict(blstats:Box(-2147483648, 2147483647, (27,), int64), chars:Box(0, 255, (21, 79), uint8), colors:Box(0, 15, (21, 79), uint8), glyphs:Box(0, 5976, (21, 79), int16), inv_glyphs:Box(0, 5976, (55,), int16), inv_letters:Box(0, 127, (55,), uint8), inv_oclasses:Box(0, 18, (55,), uint8), inv_strs:Box(0, 255, (55, 80), uint8), message:Box(0, 255, (256,), uint8), screen_descriptions:Box(0, 127, (21, 79, 80), uint8), specials:Box(0, 255, (21, 79), uint8), tty_chars:Box(0, 255, (24, 80), uint8), tty_colors:Box(0, 31, (24, 80), int8), tty_cursor:Box(0, 255, (2,), uint8))

## Character

In [7]:
env.character

'mon-hum-neu-mal'

# Running

In [23]:
env.reset()
env.step(1)
env.render()


[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m 
[0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30m [0;30