# Import `gym` first

In [1]:
import gym

# To setup a Reinforcement Learning problem in `gym`, call `gym.make()` with the name of the problem
- Returns an environment.

In [2]:
env = gym.make("CartPole-v0")

# To initialize the problem, call `env.reset()`
- Returns the initial observation of the Agent once the environment is initialized.
- 1st number: cart position (initialized to nearly the center)
- 2nd number: cart velocity (initialized to nearly zero; hardly moving)
- 3rd number pole angle with the vertical (initialized to nearly vertical)
- 4rth number: pole velocity at tip (initialized to nearly zero; hardly swinging)

In [3]:
observation = env.reset()
print(observation)

[-0.01323823 -0.03179709  0.04452206  0.04445223]


# To query the general nature of the observation, call `env.observation_space`
- `Box(2,)` would look like `[0.1234567, 0.7654321]`
- `Box(n)` means a sequence (`numpy` array) of `n` floating point numbers

In [6]:
env.observation_space

Box(4,)

# To query the general nature of the Agent's actions, call `env.action_space`
- `Discrete` means that the Agent's actions can be represented by a variable that takes discrete value, as opposed to continuous one.
- `Discrete(2)` means that the agent can take two discrete actions: `0` and `1`
- [Example] `Discrete(3)` mean that the agent can take three actions: `0`, `1` and `2`.

In [4]:
env.action_space

Discrete(2)

# To take an action, call `env.step()` with the action as argument

In [15]:
observation, _, _, _ = env.step(0)
print(observation)

[-0.02366462 -0.42475741  0.06151576  0.69074498]


In [7]:
env.step(1)

(array([-0.01842474, -0.03308054,  0.05242797,  0.07281859]), 1.0, False, {})

# `2` is an invalid action in this environment, leads to error

In [8]:
env.step(2)

AssertionError: 2 (<class 'int'>) invalid

# To get a random valid action sampled with equal probability, call `env.action_space.sample()`

In [13]:
env.action_space.sample()

1

In [14]:
env.step(env.action_space.sample())

(array([-0.01908635, -0.22891339,  0.05388434,  0.381571  ]), 1.0, False, {})

# Taking multiple actions in a Python loop. Call `env.render()` after each action to update the environment state and visualize the dynamics in real time
- Use `time.sleep()` to get a slow motion version.

In [21]:
import time
observation = env.reset()
for _ in range(30):
    observation, _, _, _ = env.step(0)
    env.render()
    time.sleep(0.1)

# To get visual representation of the problem after setup, call `env.render()`

In [16]:
env.render()

True

# To close the visual representation, call `env.close()`

In [4]:
env.close()