# Solve MountainCarContinuous-v0 using a Fixed Deterministic Policy

This notebook solves 'MountainCarContinuous-v0' using a fixed deterministic policy.

### Policy

Action equals +1 when

$\text{position} > -4 \times \text{velocity}$ or $\text{position} < 13 \times \text{velocity} - 0.6$,

otherwise push left.

In [1]:
import itertools
import numpy as np
import gym
np.random.seed(0)
env = gym.make('MountainCarContinuous-v0')
env.seed(0)

[0]

In [2]:
class Agent:
    def decide(self, observation):
        position, velocity = observation
        if position > -4 * velocity or position < 13 * velocity - 0.6:
            force = 1.
        else:
            force = -1.
        action = np.array([force,])
        return action

agent = Agent()

In [3]:
def play_once(env, agent, render=False, verbose=False):
    observation = env.reset()
    episode_reward = 0.
    for step in itertools.count():
        if render:
            env.render()
        action = agent.decide(observation)
        observation, reward, done, _ = env.step(action)
        episode_reward += reward
        if done:
            break
    if verbose:
        print('get {} rewards in {} steps'.format(
                episode_reward, step + 1))
    return episode_reward

Test 100 episode

In [4]:
episode_rewards = [play_once(env, agent) for _ in range(100)]
print('average episode rewards = {}'.format(np.mean(episode_rewards)))

average episode rewards = 93.35499999999998


Test 100000 episodes

In [5]:
episode_rewards = [play_once(env, agent) for _ in range(100000)]
print('average episode rewards = {}'.format(np.mean(episode_rewards)))

average episode rewards = 93.357326


In [6]:
env.close()