# COMP47590 Assignment 2 LunarEirLander
## Evaluate Image Based Classifier
Load a pre-trained model for the LunarEirLander game and deploy it into the world

Load packages

In [1]:
import sys, math
import numpy as np

import cv2

# MOD Extra imports for image handling
from PIL import Image
import os
import time
import datetime
from tensorflow import keras

import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)

import gym
from gym import spaces
from gym.utils import seeding

import LunarEirLander


Load the LunarEirLander environment and deploy the agent into it

In [13]:
# Load and initialise the control model
ROWS = 32
COLS = 32
CHANNELS = 1

# Load a pre-trained model
model = keras.models.load_model("cnn_balanced.mod")

# Load the Lunar Lander environment and initialise it
#env = LunarEirLander.LunarEirLander()


# Run the game loop
env = gym.make('LunarLander-v2')

for i in range(20):
    s = env.reset()
    total_reward = 0
    steps = 0
    done = False
    while not done:

        # Access the rednered scrnen image
        raw_image = env.render(mode='rgb_array')

        # Prepare the image for presentation to the network - ensure this matches how the model was trained
        processed_image = cv2.resize(raw_image, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
        processed_image = cv2.cvtColor(processed_image, cv2.COLOR_RGB2GRAY)
        processed_image = np.array(processed_image, dtype=np.float)
        processed_image = processed_image.reshape((1, ROWS, COLS, CHANNELS))
        processed_image = processed_image/255

        # Get the model to make a prediction
        a = np.argmax(model.predict(processed_image), axis=-1)
        a = a[0]

        # Step on the game
        s, r, done, info = env.step(a)
        env.render()
        total_reward += r
        steps += 1
    env.close()
    print(["{:+0.2f}".format(x) for x in s])
    print("step {} total_reward {:+0.2f}".format(steps, total_reward))


['+0.60', '-0.24', '+1.26', '+0.24', '-3.41', '-1.42', '+0.00', '+0.00']
step 102 total_reward -571.62
['+0.76', '-0.15', '+1.40', '-0.36', '-2.51', '-6.60', '+0.00', '+0.00']
step 77 total_reward -507.47
['-0.17', '+0.01', '+0.49', '+0.15', '-4.14', '+2.47', '+0.00', '+1.00']
step 83 total_reward -546.50
['+1.01', '+0.07', '+2.67', '-1.69', '-2.11', '-0.78', '+0.00', '+0.00']
step 94 total_reward -560.32
['-1.00', '+0.02', '-1.94', '+0.05', '+3.22', '+8.20', '+0.00', '+0.00']
step 92 total_reward -587.03
['+0.47', '+0.09', '+0.22', '-0.06', '-4.39', '-0.71', '+0.00', '+1.00']
step 71 total_reward -530.53
['-0.59', '+0.03', '-0.89', '-0.66', '+0.37', '-2.80', '+1.00', '+1.00']
step 80 total_reward -93.37
['+0.96', '-0.02', '+2.07', '-0.44', '-2.96', '-6.53', '+0.00', '+0.00']
step 75 total_reward -549.36
['-1.01', '+5.02', '-2.72', '+2.16', '+0.76', '+0.14', '+0.00', '+0.00']
step 127 total_reward -872.49
['+0.09', '+0.01', '-1.04', '-0.11', '+2.11', '-2.35', '+0.00', '+0.00']
step 80 

In [2]:
# Load and initialise the control model
ROWS = 32
COLS = 32
CHANNELS = 1

# Load a pre-trained model
model = keras.models.load_model("cnn_unbalanced.mod")

# Load the Lunar Lander environment and initialise it
#env = LunarEirLander.LunarEirLander()


# Run the game loop
env = gym.make('LunarLander-v2')

for i in range(20):
    s = env.reset()
    total_reward = 0
    steps = 0
    done = False
    while not done:

        # Access the rednered scrnen image
        raw_image = env.render(mode='rgb_array')

        # Prepare the image for presentation to the network - ensure this matches how the model was trained
        processed_image = cv2.resize(raw_image, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
        processed_image = cv2.cvtColor(processed_image, cv2.COLOR_RGB2GRAY)
        processed_image = np.array(processed_image, dtype=np.float)
        processed_image = processed_image.reshape((1, ROWS, COLS, CHANNELS))
        processed_image = processed_image/255

        # Get the model to make a prediction
        a = np.argmax(model.predict(processed_image), axis=-1)
        a = a[0]

        # Step on the game
        s, r, done, info = env.step(a)
        env.render()
        total_reward += r
        steps += 1
    env.close()
    print(["{:+0.2f}".format(x) for x in s])
    print("step {} total_reward {:+0.2f}".format(steps, total_reward))


['-0.08', '-0.01', '-1.28', '+0.01', '+3.15', '+0.08', '+0.00', '+0.00']
step 77 total_reward -459.81
['-0.54', '-0.06', '-1.21', '-0.24', '+4.32', '+3.80', '+0.00', '+0.00']
step 71 total_reward -605.27
['-0.40', '+0.12', '-1.42', '-0.66', '+2.64', '+7.72', '+0.00', '+0.00']
step 75 total_reward -476.34
['-0.79', '+0.26', '-2.37', '-0.00', '+2.22', '+0.25', '+0.00', '+1.00']
step 89 total_reward -511.52
['-0.22', '-0.01', '-1.77', '-1.06', '+3.22', '+8.23', '+0.00', '+0.00']
step 77 total_reward -491.83
['-0.25', '+0.02', '-1.81', '-0.22', '+2.54', '+5.52', '+0.00', '+0.00']
step 79 total_reward -438.93
['-0.60', '+0.27', '-1.21', '-0.65', '+2.57', '+7.69', '+0.00', '+0.00']
step 66 total_reward -486.11
['-0.57', '-0.14', '-2.14', '-1.15', '+3.39', '+6.50', '+0.00', '+0.00']
step 75 total_reward -592.06
['-0.23', '+0.01', '-1.75', '-0.70', '+2.84', '+7.33', '+0.00', '+0.00']
step 84 total_reward -474.72
['+0.07', '-0.01', '-0.59', '+0.00', '+3.14', '+0.00', '+0.00', '+0.00']
step 74 t

In [5]:
env = gym.make('CartPole-v0')
env.reset()
for _ in range(1000):
    env.render()
    env.step(env.action_space.sample()) # take a random action
env.close()