<h1 style="text-align:center;"><span style="color:blue;">Reinforcement Learning with OpenAI Gym</span></h1><br />

<center><img src ="area-51.jpg" width="500" /></center>

- **A** - Action
- **R** - Reward
- **E** - Environment
- **A** - Agent

<img src="RL_illustration.png" />

- [Cart Pole Control Environment in OpenAI Gym - Introduction to OpenAI Gym](https://aleksandarhaber.com/cart-pole-control-environment-in-openai-gym-gymnasium-introduction-to-openai-gym/)


In [36]:
# Import the libraries {!pip install as needed}
import gym
import numpy as np
import time

## Create an Environment for our Cart Pole

[gymnasium.Env](https://gymnasium.farama.org/api/env/)

In [37]:
env = gym.make('CartPole-v1', render_mode='human')

(state,_) = env.reset()

### The States - what is happening in each frame

<img src="cart-states.png" />

#### Four States

1. x Position of the cart
2. &#7819; Velocity of the cart
3. &#952; Pole angle
4. &#952; Angular Velocity (Theta dot)

In [38]:
# Run the simulation
env.render()

In [39]:
# Push the cart in one direction (0 = left)
env.step(0)


(array([-0.00814035, -0.24315004, -0.0466187 ,  0.285461  ], dtype=float32),
 1.0,
 False,
 False,
 {})

In [40]:
# Observation space limits
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [41]:
# upper limit
env.observation_space.high

array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38],
      dtype=float32)

In [42]:
# lower limit
env.observation_space.low

array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38],
      dtype=float32)

In [43]:
# action space
env.action_space

Discrete(2)

In [44]:
# All specifications
env.spec

EnvSpec(id='CartPole-v1', entry_point='gym.envs.classic_control.cartpole:CartPoleEnv', reward_threshold=475.0, nondeterministic=False, max_episode_steps=500, order_enforce=True, autoreset=False, disable_env_checker=False, apply_api_compatibility=False, kwargs={'render_mode': 'human'}, namespace=None, name='CartPole', version=1)

In [45]:
# Maximum number of steps per episode
env.spec.max_episode_steps

500

In [46]:
# Reward threshold per episode
env.spec.reward_threshold

475.0

In [47]:
# Simulate the environment
episodeNumber=10000
timeSteps=100

In [48]:
# Run the simulation
for episodeIndex in range(episodeNumber):
    initial_state=env.reset()
    print(episodeIndex)
    appendedObservations = []
    for timeIndex in range(timeSteps):
        print(timeIndex)
        random_action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(random_action)
        appendedObservations.append(observation)
        time.sleep(0.01)
        if (terminated):
            time.sleep(0.1)
            break
env.close()

0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
3
0
1
2
3
4
5
6
7
8
9
10
11
12
4
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
5
0
1
2
3
4
5
6
7
8
9
10
11
12
6
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
7
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
8
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
9
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
10
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
11
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
12
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27


136
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
137
0
1
2
3
4
5
6
7
8
9
10
11
138
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
139
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
140
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
141
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
142
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
143
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
144
0
1
2
3
4
5
6
7
8
9
10
11
145
0
1
2
3
4
5
6
7
8
9
10
11
146
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
147
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
148
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
149
0
1
2
3
4
5
6
7
8
9
10
11
12
150
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
151
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
152
0
1
2
3
4
5
6
7
8
9
10
11
153
0
1
2
3
4
5
6
7
8
9
10
11
12
154
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
155


10
11
285
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
286
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
287
0
1
2
3
4
5
6
7
8
9
10
11
12
288
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
289
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
290
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
291
0
1
2
3
4
5
6
7
8
9
10
292
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
293
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
294
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
295
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
296
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
297
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
298
0
1
2
3
4
5
6
7
8
9
10
299
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
300
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
301
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
1

11
12
13
14
15
16
17
18
19
425
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
426
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
427
0
1
2
3
4
5
6
7
8
9
10
11
12
13
428
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
429
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
430
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
431
0
1
2
3
4
5
6
7
8
9
432
0
1
2
3
4
5
6
7
8
9
10
11
433
0
1
2
3
4
5
6
7
8
9
10
11
12
13
434
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
435
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
436
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
437
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
438
0
1
2
3
4
5
6
7
8
9
10
11
12
439
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
440
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
2

568
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
569
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
570
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
571
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
572
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
573
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
574
0
1
2
3
4
5
6
7
8
9
10
11
575
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
576
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
577
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
578
0
1
2
3
4
5
6
7
8
9
10
11
12
579
0
1
2
3
4
5
6
7
8
9
10
11
12
1

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
693
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
694
0
1
2
3
4
5
6
7
8
9
10
11
12
695
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
696
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
697
0
1
2
3
4
5
6
7
8
9
10
11
12
698
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
699
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
700
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
701
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
702
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
703
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
704
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
705
0
1
2
3
4
5
6
7
8
9
10
11
12
13
706
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
707
0
1
2
3
4
5


21
22
23
24
25
26
27
28
29
30
31
32
33
34
832
0
1
2
3
4
5
6
7
8
9
833
0
1
2
3
4
5
6
7
8
9
10
11
12
834
0
1
2
3
4
5
6
7
8
9
10
11
12
835
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
836
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
837
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
838
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
839
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
840
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
841
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
842
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
843
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
844
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
845
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
846
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
847
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
8

KeyboardInterrupt: 

In [None]:
env.close()