<h1 style="text-align:center;"><span style="color:blue;">Reinforcement Learning with OpenAI Gym</span></h1><br />

<center><img src ="area-51.jpg" width="500" /></center>

- **A** - Action
- **R** - Reward
- **E** - Environment
- **A** - Agent

<img src="RL_illustration.png" />

- [Cart Pole Control Environment in OpenAI Gym - Introduction to OpenAI Gym](https://aleksandarhaber.com/cart-pole-control-environment-in-openai-gym-gymnasium-introduction-to-openai-gym/)


In [1]:
# Import the libraries {!pip install as needed}
import gym
import numpy as np
import time

## Create an Environment for our Cart Pole

[gymnasium.Env](https://gymnasium.farama.org/api/env/)

In [2]:
# Creating the Enviroment
env = gym.make('CartPole-v1', render_mode='human')

(state,_) = env.reset()

### The States - what is happening in each frame

<img src="cart-states.png" />

#### Four States

1. x Position of the cart
2. &#7819; Velocity of the cart
3. &#952; Pole angle
4. &#952; Angular Velocity (Theta dot)

In [3]:
# Run the simulation
env.render()

In [4]:
# Push the cart in one direction (0 = left, 1 = right)
env.step(0)


(array([-0.01478788, -0.20440379, -0.02050955,  0.24124734], dtype=float32),
 1.0,
 False,
 False,
 {})

In [5]:
# Observation space limits
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [6]:
# upper limit
env.observation_space.high

array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38],
      dtype=float32)

In [7]:
# lower limit
env.observation_space.low

array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38],
      dtype=float32)

In [8]:
# action space
env.action_space

Discrete(2)

In [9]:
# All specifications
env.spec

EnvSpec(id='CartPole-v1', entry_point='gym.envs.classic_control.cartpole:CartPoleEnv', reward_threshold=475.0, nondeterministic=False, max_episode_steps=500, order_enforce=True, autoreset=False, disable_env_checker=False, apply_api_compatibility=False, kwargs={'render_mode': 'human'}, namespace=None, name='CartPole', version=1)

In [10]:
# Maximum number of steps per episode
env.spec.max_episode_steps

500

In [11]:
# Reward threshold per episode
env.spec.reward_threshold

475.0

In [12]:
# Simulate the environment
episodeNumber=10000
timeSteps=100

In [None]:
# Run the simulation
for episodeIndex in range(episodeNumber):
    initial_state=env.reset()
    print(episodeIndex)
    appendedObservations = []
    for timeIndex in range(timeSteps):
        print(timeIndex)
        random_action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(random_action)
        appendedObservations.append(observation)
        time.sleep(0.01)
        if (terminated):
            time.sleep(0.1)
            break
env.close()

0
0
1
2
3
4
5
6
7
8
9
10
1
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
4
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
5
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
6
0
1
2
3
4
5
6
7
8
9
10
7
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
8
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
9
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
10
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
11
0
1
2
3
4
5
6
7
8
9
10
12
0
1
2
3
4
5
6
7
8
9
10
13
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
14
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
15
0
1
2
3
4
5
6
7
8
9
10
11
16
0
1
2
3
4
5
6
7
8
9
10
11
12
17
0
1
2
3
4
5
6
7
8
9
10
11
12
13
18
0
1
2
3
4
5
6
7
8
9
10
11
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
1

In [None]:
env.close()