# Mountain Car: Explicit Policy #

## Imports ##

In [1]:
import gym
import numpy as np

## Environment ##

In [None]:
# Create the Cart-Pole game environment
env = gym.make('MountainCar-v0')

## Policy ##
In order for the car to reach the top of the mountain, it must build up its energy. This can be achieved by always applying force in the direction of the car's movement. If the car is moving right, that is, the velocity is positive, we push the car to the right, and if the car is moving left, i.e. velocity is negative, we push the car to the left.

In [40]:
# Actions
PUSH_LEFT = 0
PUSH_RIGHT = 2
NO_PUSH = 1

# Explicit Policy: Always accelerate in the direction that the cart is moving.
def policy(state):
    _, velocity = state
    
    # Choose next action
    if velocity >= 0.0:
        action = PUSH_RIGHT
    else:
        action = PUSH_LEFT
    
    return action

## Testing ##
Run a number of episodes to demonstrate the effectiveness of the explicit policy. Print out the result of each episode.

In [41]:
NUM_EPISODES = 20
MAX_STEPS = 200

for i in np.arange(NUM_EPISODES):
    
    state = env.reset()
    
    for j in np.arange(MAX_STEPS):
        # Determine the next action to take from the policy
        action = policy(state)
        
        # Take next step
        state, reward, done, _ = env.step(action)
        
        if done:
            pos = state[0]
            if pos >= 0.5:
                print("Episode", i, ": Succeeded in", j, "steps. Position =", pos)
            else:
                print("Episode", i, ": Failed. Position =", pos)
            break


Episode 0 : Succeeded in 113 steps. Position = 0.5368577983788596
Episode 1 : Succeeded in 120 steps. Position = 0.5160596822401516
Episode 2 : Succeeded in 113 steps. Position = 0.5368577983788596
Episode 3 : Succeeded in 120 steps. Position = 0.5008020508729808
Episode 4 : Succeeded in 115 steps. Position = 0.5368577983788596
Episode 5 : Succeeded in 121 steps. Position = 0.5053060975802396
Episode 6 : Succeeded in 121 steps. Position = 0.5226358665010891
Episode 7 : Succeeded in 112 steps. Position = 0.5368577983788596
Episode 8 : Succeeded in 121 steps. Position = 0.5305009738823626
Episode 9 : Succeeded in 116 steps. Position = 0.5368577983788596
Episode 10 : Succeeded in 119 steps. Position = 0.5368577983788596
Episode 11 : Succeeded in 120 steps. Position = 0.5127520017739965
Episode 12 : Succeeded in 113 steps. Position = 0.5368577983788596
Episode 13 : Succeeded in 112 steps. Position = 0.5368577983788596
Episode 14 : Succeeded in 113 steps. Position = 0.5368577983788596
Episo