# Use Closed-Form Policy to Play LunarLander-v2

In [1]:
import sys
import logging
import itertools

import numpy as np
np.random.seed(0)
import gym

logging.basicConfig(level=logging.INFO,
        format='%(asctime)s [%(levelname)s] %(message)s',
        stream=sys.stdout, datefmt='%H:%M:%S')

In [2]:
env = gym.make('LunarLander-v2')
for key in vars(env):
    logging.info('%s: %s', key, vars(env)[key])
for key in vars(env.spec):
    logging.info('%s: %s', key, vars(env.spec)[key])

00:00:00 [INFO] env: <LunarLander<LunarLander-v2>>
00:00:00 [INFO] action_space: Discrete(4)
00:00:00 [INFO] observation_space: Box(-inf, inf, (8,), float32)
00:00:00 [INFO] reward_range: (-inf, inf)
00:00:00 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}
00:00:00 [INFO] _max_episode_steps: 1000
00:00:00 [INFO] _elapsed_steps: None
00:00:00 [INFO] id: LunarLander-v2
00:00:00 [INFO] entry_point: gym.envs.box2d:LunarLander
00:00:00 [INFO] reward_threshold: 200
00:00:00 [INFO] nondeterministic: False
00:00:00 [INFO] max_episode_steps: 1000
00:00:00 [INFO] _kwargs: {}
00:00:00 [INFO] _env_name: LunarLander


In [3]:
class ClosedFormAgent:
    def __init__(self, _):
        pass

    def reset(self, mode=None):
        pass

    def step(self, observation, reward, terminated):
        x, y, v_x, v_y, angle, v_angle, contact_left, contact_right = observation

        if contact_left or contact_right:  # legs have contact
            f_y = -10. * v_y - 1.
            f_angle = 0.
        else:
            f_y = 5.5 * np.abs(x) - 10. * y - 10. * v_y - 1.
            f_angle = -np.clip(5. * x + 10. * v_x, -4, 4) + 10. * angle + 20. \
                    * v_angle

        if np.abs(f_angle) <= 1 and f_y <= 0:
            action = 0 # do nothing
        elif np.abs(f_angle) < f_y:
            action = 2 # main engine
        elif f_angle < 0.:
            action = 1 # left engine
        else:
            action = 3 # right engine
        return action

    def close(self):
        pass


agent = ClosedFormAgent(env)

In [4]:
def play_episode(env, agent, seed=None, mode=None, render=False):
    observation, _ = env.reset(seed=seed)
    reward, terminated, truncated = 0., False, False
    agent.reset(mode=mode)
    episode_reward, elapsed_steps = 0., 0
    while True:
        action = agent.step(observation, reward, terminated)
        if render:
            env.render()
        if terminated or truncated:
            break
        observation, reward, terminated, truncated, _ = env.step(action)
        episode_reward += reward
        elapsed_steps += 1
    agent.close()
    return episode_reward, elapsed_steps


logging.info('==== test ====')
episode_rewards = []
for episode in range(100):
    episode_reward, elapsed_steps = play_episode(env, agent)
    episode_rewards.append(episode_reward)
    logging.info('test episode %d: reward = %.2f, steps = %d',
            episode, episode_reward, elapsed_steps)
logging.info('average episode reward = %.2f ± %.2f',
        np.mean(episode_rewards), np.std(episode_rewards))

00:00:00 [INFO] ==== test ====
00:00:00 [INFO] test episode 0: reward = 251.10, steps = 175
00:00:00 [INFO] test episode 1: reward = 263.86, steps = 158
00:00:00 [INFO] test episode 2: reward = 296.23, steps = 228
00:00:00 [INFO] test episode 3: reward = 290.20, steps = 201
00:00:00 [INFO] test episode 4: reward = 309.30, steps = 199
00:00:00 [INFO] test episode 5: reward = 304.04, steps = 171
00:00:00 [INFO] test episode 6: reward = 260.22, steps = 199
00:00:00 [INFO] test episode 7: reward = 277.66, steps = 170
00:00:00 [INFO] test episode 8: reward = 255.32, steps = 181
00:00:00 [INFO] test episode 9: reward = 261.03, steps = 168
00:00:00 [INFO] test episode 10: reward = 296.68, steps = 208
00:00:00 [INFO] test episode 11: reward = 300.02, steps = 222
00:00:00 [INFO] test episode 12: reward = 303.89, steps = 234
00:00:00 [INFO] test episode 13: reward = 255.41, steps = 178
00:00:00 [INFO] test episode 14: reward = 267.43, steps = 180
00:00:00 [INFO] test episode 15: reward = 309.62,

In [5]:
env.close()