https://github.com/openai/gym/wiki/MountainCarContinuous-v0

In [1]:
import gym
from IPython.display import HTML
import numpy as np
from matplotlib.animation import ArtistAnimation
import matplotlib.pyplot as plt

env = gym.make("MountainCarContinuous-v0")
env.action_space.low, env.action_space.high

(array([-1.], dtype=float32), array([1.], dtype=float32))

### naive policy

In [2]:
N = 100
naive_policy = np.ones((N, 1))

env.seed(2019)
env.reset()
frames = []
for i in range(N):
    frames.append(env.render(mode="rgb_array"))
    action = naive_policy[i]
    observation, reward, done, info = env.step(action)
    if done:
        break
frames.append(env.render(mode="rgb_array"))
env.close()

In [3]:
fig = plt.figure(figsize=(9, 6))
ims = [[plt.imshow(frame, animated=True), plt.text(50, 50, "t={}".format(t), fontsize=14)]
       for t, frame in enumerate(frames)]
ani = ArtistAnimation(fig, ims, interval=100)
plt.close()
HTML(ani.to_html5_video())

### domain knowledge

In [4]:
policy = np.concatenate([np.full((20, 1), -1), np.full((N - 20, 1), 1)])

env.seed(2019)
env.reset()
frames = []
for i in range(N):
    frames.append(env.render(mode="rgb_array"))
    action = policy[i]
    observation, reward, done, info = env.step(action)
    if done:
        break
frames.append(env.render(mode="rgb_array"))
env.close()

In [5]:
fig = plt.figure(figsize=(9, 6))
ims = [[plt.imshow(frame, animated=True), plt.text(50, 50, "t={}".format(t), fontsize=14)]
       for t, frame in enumerate(frames)]
ani = ArtistAnimation(fig, ims, interval=100)
plt.close()
HTML(ani.to_html5_video())