# Import `gym` first

In [1]:
import gym

# To setup a Reinforcement Learning problem in `gym`, call `gym.make()` with the name of the problem
- Returns an environment.

In [2]:
env = gym.make("CartPole-v0")

# To initialize the problem, call `env.reset()`
- Returns the initial observation of the Agent once the environment is initialized.
- 1st number: cart position (initialized to nearly the center)
- 2nd number: cart velocity (initialized to nearly zero; hardly moving)
- 3rd number pole angle with the vertical (initialized to nearly vertical)
- 4rth number: pole velocity at tip (initialized to nearly zero; hardly swinging)

In [3]:
observation = env.reset()
print(observation)

[0.00870804 0.04742115 0.02188616 0.00178741]


# To take an action, call `env.step()` with the action as argument
- The first element returned by the `env.step()` function is the new environment state.
- The second element returned by the `env.step()` is called the reward. The reward is a judgement of the environment state and the action at any time step.

In [4]:
observation, reward, _, _ = env.step(0)
print("observation is {}".format(observation))
print("reward is {}".format(reward))

observation is [ 0.00965647 -0.14800773  0.02192191  0.30129455]
reward is 1.0


# The learning goal (balancing the pole without crashing) is equivalent to maximizing the reward function over time

# Taking multiple actions in a Python loop. Call `env.render()` after each action to update the environment state and visualize the dynamics in real time
- Use `time.sleep()` to get a slow motion version.
- Check that reward is `0` when pole angle is greater than 12 degrees.

In [5]:
from math import pi
import time
observation = env.reset()
for _ in range(30):
    pole_angle_in_radians = observation[2]
    # 360 degrees is equal to 2 * pi radians
    pole_angle_in_degrees = (pole_angle_in_radians * 360) / (2 * pi)
    observation, reward, _, _ = env.step(0)
    print(pole_angle_in_degrees, reward)
    env.render()
    time.sleep(0.1)

-0.9133875525328697 1.0
-0.8808171123059392 1.0
-0.5186689201043438 1.0
0.17326848975626852 1.0
1.197312747080096 1.0
2.557834489342066 1.0
4.261167382640635 1.0
6.315474910404569 1.0
8.730564390075116 1.0
11.517636185214313 1.0
14.68895474570218 0.0




18.25742873307078 0.0
22.236091135214007 0.0
26.63747810638965 0.0
31.472918137193552 0.0
36.75176089290628 0.0
42.48059550909299 0.0
48.66252634536548 0.0
55.29658240747022 0.0
62.377325950823874 0.0
69.89468979503067 0.0
77.8340123689323 0.0
86.1761654132462 0.0
94.89760163824336 0.0
103.97011236952248 0.0
113.3601002312336 0.0
123.02725711559171 0.0
132.92270843671957 0.0
142.98694976548154 0.0
153.14823921091192 0.0


# To close the visual representation, call `env.close()`

In [6]:
env.close()