# Mountain Car: Explicit Policy #

## Imports ##

In [1]:
import gym
import numpy as np

## Environment ##

In [2]:
# Create the Cart-Pole game environment
env = gym.make('CartPole-v1')

## Policy ##
The main challenge in keeping episodes from terminating is keeping angle of the pole as close to zero as possible. To achieve this, we want to push the cart in the same direction that the pole is leaning, to force the bottom of the pole back under the top of the pole towards verticality. The exception to this rule is when the pole is already moving towards the vertical position, in which case we should just push on the cart against its direction of movement. This keeps from accelerating the cart excessively, which would make future corrections of the pole angle more difficult.

In [3]:
# Actions
PUSH_LEFT = 0
PUSH_RIGHT = 1

# Explicit policy
def policy(state):
    cart_position, cart_velocity, pole_angle, pole_velocity = state
    
    # Choose next action
    if pole_angle < 0.0:
        if cart_velocity < 0.0 and pole_velocity > 0.0:
            action = PUSH_RIGHT
        else:
            action = PUSH_LEFT
    else:
        if cart_velocity > 0.0 and pole_velocity < 0.0:
            action = PUSH_LEFT
        else:
            action = PUSH_RIGHT
    
    return action

## Testing ##
Run a number of episodes to demonstrate the effectiveness of the explicit policy. Print out the result of each episode and the average reward for all episodes.

In [4]:
NUM_EPISODES = 100
MAX_STEPS = 200

total_reward = 0

for i in np.arange(NUM_EPISODES):
    
    state = env.reset()
    episode_reward = 0
    
    for j in np.arange(MAX_STEPS):
        # Determine the next action to take from the policy
        action = policy(state)
        
        # Take next step
        state, reward, done, _ = env.step(action)
        episode_reward += reward
        
        if done:
            break

    print("Episode", i+1, ": Terminated in", j+1, "steps.")
    
    total_reward += episode_reward

print("Average reward =", total_reward / NUM_EPISODES)

Episode 1 : Terminated in 200 steps.
Episode 2 : Terminated in 170 steps.
Episode 3 : Terminated in 149 steps.
Episode 4 : Terminated in 200 steps.
Episode 5 : Terminated in 200 steps.
Episode 6 : Terminated in 200 steps.
Episode 7 : Terminated in 146 steps.
Episode 8 : Terminated in 200 steps.
Episode 9 : Terminated in 200 steps.
Episode 10 : Terminated in 200 steps.
Episode 11 : Terminated in 200 steps.
Episode 12 : Terminated in 200 steps.
Episode 13 : Terminated in 138 steps.
Episode 14 : Terminated in 200 steps.
Episode 15 : Terminated in 151 steps.
Episode 16 : Terminated in 200 steps.
Episode 17 : Terminated in 131 steps.
Episode 18 : Terminated in 177 steps.
Episode 19 : Terminated in 200 steps.
Episode 20 : Terminated in 200 steps.
Episode 21 : Terminated in 200 steps.
Episode 22 : Terminated in 200 steps.
Episode 23 : Terminated in 167 steps.
Episode 24 : Terminated in 178 steps.
Episode 25 : Terminated in 200 steps.
Episode 26 : Terminated in 182 steps.
Episode 27 : Terminat