yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
# Rendering OpenAI Gym Envs on Binder and Google Colab

> adapted from
> https://towardsdatascience.com/rendering-openai-gym-envs-on-binder-and-google-colab-536f99391cc7
> by [@davidrpugh](https://github.com/davidrpugh)
Done in 0.90s.

In [None]:
import typing

import gym
import IPython.display as display
import matplotlib.pyplot as plt
import numpy as np
import pyvirtualdisplay

yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
## Create the virtual display
Done in 0.89s.

In [None]:
pyvirtualdisplay.Display(visible=False, size=(1400, 900)).start()

yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
## Represent states as `array`s and actions as `int`s
Done in 0.89s.

In [None]:
State = np.ndarray
Action = int

yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
## Agent is just a function!
Done in 0.89s.

In [None]:
Agent = typing.Callable[[State], Action]

In [None]:
def uniform_random_policy(
    state: State, number_actions: int, random_state: np.random.RandomState
) -> Action:
    """Select an action at random from the set of feasible actions."""
    feasible_actions = np.arange(number_actions)
    probs = np.ones(number_actions) / number_actions
    action = random_state.choice(feasible_actions, p=probs)
    return action

In [None]:
def make_random_agent(
    number_actions: int, random_state: np.random.RandomState = None
) -> Agent:
    """Factory for creating an Agent."""
    _random_state = np.random.RandomState() if random_state is None else random_state
    return lambda state: uniform_random_policy(state, number_actions, _random_state)

In [None]:
def simulate(agent: Agent, env: gym.Env, ax: plt.Axes = None) -> None:
    state = env.reset()
    ax = ax or plt.axes()
    img = ax.imshow(env.render(mode="rgb_array"))
    done = False
    while not done:
        action = agent(state)
        img.set_data(env.render(mode="rgb_array"))
        ax.axis("off")
        display.display(plt.gcf())
        display.clear_output(wait=True)
        state, reward, done, _ = env.step(action)
    env.close()

yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
## Create the Gym environment
Done in 1.02s.

In [None]:
lunar_lander_v2 = gym.make("LunarLander-v2")
lunar_lander_v2.seed(42)

yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
## Create an agent
Done in 1.14s.

In [None]:
random_agent = make_random_agent(lunar_lander_v2.action_space.n, random_state=None)

yarn run v1.22.4
$ /home/nbollweg3/foss/gt-coar/rl-lab/node_modules/.bin/prettier --stdin-filepath foo.md --prose-wrap always
## Simulate agent interacting with the environment
Done in 1.12s.

In [None]:
simulate(random_agent, lunar_lander_v2)