# Cart-Poly Policy Training

A simple algorithm to optimize the policy function for the [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/) problem in gymnasium.


In [1]:
import gymnasium as gym

# Attach to the cart pole environment and see what it is
env = gym.make('CartPole-v1', render_mode='human')

In [2]:
# Create the base variables that are being observed
observation = []
info = {}
terminated = False
truncated = False
reward = 0
# These values will get updated in every step of the environment

In [3]:
# Grab the action space, which we expect to be discrete
action_space = env.action_space
print("Action space: ", action_space)
num_actions = action_space.n

# Grab the observation space, which we expect to be discrete
observation_space = env.observation_space
print("Observation space: ", observation_space)
num_observations = observation_space.shape[0]

print(f"There are {num_actions} actions and {num_observations} observations")

Action space:  Discrete(2)
Observation space:  Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)
There are 2 actions and 4 observations


In [4]:
# Create constants that will exist throughout the model
hidden_layer_size = 8


In [5]:
observation, info = env.reset(seed=42)

In [6]:
print(observation)
print(info)

[ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
{}


In [7]:
# Setup inital values for running
terminated = False
truncated = False

In [8]:
# Render so we can see what is happening
# env.render() Doesn't need to be called if human is specified
# Make 1000 iterations on the environment to see how we do
for _ in range(1000):
    # Choose a random action from the action space
    action = env.action_space.sample()
    # Get back the observation, reward, and whether the episode is terminated
    observation, reward, terminated, truncated, info = env.step(action)
    print(observation, reward, terminated, truncated, info)

    if terminated or truncated:
        env.reset()
print('Resetting the environment...')
env.reset()
# print('Closing the environment...')
# env.close()


[ 0.02727336 -0.20172954  0.03625453  0.32351476] 1.0 False False {}
[ 0.02323877 -0.00714208  0.04272482  0.04248186] 1.0 False False {}
[ 0.02309593 -0.20284982  0.04357446  0.34833285] 1.0 False False {}
[ 0.01903893 -0.39856356  0.05054112  0.65443164] 1.0 False False {}
[ 0.01106766 -0.5943515   0.06362975  0.96259123] 1.0 False False {}
[-0.00081937 -0.40013957  0.08288158  0.6905575 ] 1.0 False False {}
[-0.00882216 -0.20625953  0.09669273  0.425075  ] 1.0 False False {}
[-0.01294735 -0.0126307   0.10519423  0.16437183] 1.0 False False {}
[-0.01319996  0.18084042  0.10848166 -0.09336053] 1.0 False False {}
[-0.00958315  0.37425378  0.10661445 -0.34994406] 1.0 False False {}
[-0.00209808  0.5677107   0.09961557 -0.6071965 ] 1.0 False False {}
[ 0.00925613  0.76130915  0.08747164 -0.8669159 ] 1.0 False False {}
[ 0.02448232  0.5651127   0.07013332 -0.54806256] 1.0 False False {}
[ 0.03578457  0.3690792   0.05917207 -0.23413287] 1.0 False False {}
[ 0.04316616  0.563308    0.054489

(array([ 0.02580877, -0.0145474 ,  0.0470698 ,  0.03931211], dtype=float32),
 {})

: 

In [None]:
import pygame

# Make sure the display closes
pygame.display.quit()