In [1]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gym import wrappers
from IPython.display import Video
import io
import base64
from IPython.display import HTML
import numpy as np
from collections import deque
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import copy

from eyenes.agent_model import AgentModel

class Agent:
    
    eyemodel = None
    model = None
    state = None
    total_reward = None
    reward = None
    done = None
    info = None
    next_state = None
    update = None
    buffer = None
    freq = None
    intensity = None
    env = None
    ID = None
    lineage = None
    max_steps = None
    fps = None
    lazy_penalty = None
    
    def __init__(self, ID = -1, update = ['reward'], buffer = 3, max_steps = 500, freq = .25, intensity = .25, fps = 5):
        
        self.buffer = buffer
        self.freq = freq
        self.intensity = intensity
        self.env = self.make_env()
        self.start_model()
        self.update = update
        self.state = deque(maxlen = buffer*fps + 1)
        self.ID = ID
        self.max_steps = max_steps
        self.lineage = []
        self.fps = fps
        self.lazy_penalty = -30
        self.death_penalty = -50
        for _ in range(buffer):
            self.state.append(np.zeros(self.env.observation_space.shape))
        
        
    def make_env(self, mode = None, rom_id = 'SuperMarioBros-v0'):
        env = gym_super_mario_bros.make(rom_id)
        env = JoypadSpace(env, SIMPLE_MOVEMENT)
        if mode == 'monitor':
            env = wrappers.Monitor(env, directory, force = True)
        return env 
    
    def start_model(self):
        env = self.make_env()
        self.model = AgentModel(buffer = self.buffer, input_shape = env.observation_space.shape, 
                                output_dim = env.action_space.n, eye_output_dim = 64)
    
    def get_buffered_images(self):
        buffered_states = []
        for i in range(self.buffer):
            buffered_states.append(self.state[(i + 1)*self.fps - 1])
        return buffered_states
    
    def take_action(self, verbose):
        input_imgs = []
        for input_img in self.get_buffered_images():
            input_imgs.append(np.expand_dims(input_img, axis = 0))
        prediction = self.model.predict(input_imgs)
        if verbose:
            print(prediction)
        return np.argmax(prediction)

    def reset_data(self):
        for i in range(self.buffer*self.fps + 1):
            self.state.append(np.zeros(self.env.observation_space.shape))
        self.total_reward = 0
        self.reward = []
        self.done = []
        self.info = dict()
        self.next_state = []
    
    def gather_data(self, step, state, reward, done, info, next_state):
        self.total_reward += reward
        if step%self.fps == 0:
            self.state.append(np.array(state))
            
    def run(self, mode = None, verbose = False, directory = './gym-results/'):    
        env = self.make_env(mode = mode)
        self.reset_data()
        
        patience = 3
        resting = 0
        x_pos = 0
        state = env.reset()
        prev_state = state
        done = False
        for step in range(self.max_steps):

            if step%self.fps == 0:
                action = self.take_action(verbose)
                if mode == 'print':
                    print(action)

            next_state, reward, done, info = env.step(action)
                            
            if info['x_pos'] > x_pos:
                x_pos = info['x_pos']
                resting = 0
                
            if abs(info['x_pos'] - x_pos) < 5:
                resting += 1
                
            if resting > patience*60:
                self.total_reward += self.lazy_penalty
                break
                
            if info['life'] < 2: 
                self.total_reward += self.death_penalty
                break

            self.gather_data(step, state, reward, done, info, next_state)
            prev_state = state
            state = next_state

            if mode == 'render':
                env.render()

        if mode == 'monitor':
            file_name = directory + 'openaigym.video.%s.video000000.mp4'% env.file_infix
            mp4 = Video(file_name, width = 600, height = 450)
            display(mp4)

        if mode == 'render':    
            env.close()
    
    def get_reward(self):
        if self.total_reward == None:
            self.run()
        return self.total_reward

    def itsame(self):
        return 'Mario!'
    
    def copy_model(self, other, new_ID):
        self.ID = new_ID
        self.lineage = copy.copy(other.lineage)
        if other.ID not in self.lineage:
            self.lineage.append(other.ID)
            
        self.model.set_weights(copy.deepcopy(other.model.get_weights()))

    def mutate(self):
        self.model.mutate(freq = self.freq, intensity = self.intensity)
        self.total_reward = None
        
    def print_state(self):
        fig = plt.figure(figsize=(16., 12.))
        grid = ImageGrid(fig, 111,  # similar to subplot(111)
                         nrows_ncols=(1, self.buffer),  # creates 2x2 grid of axes
                         axes_pad=0.1,)  # pad between axes in inch.

        
        for ax, im in zip(grid, self.get_buffered_images()):
            # Iterating over the grid returns the Axes.
            ax.imshow(im)
            ax.set_xticklabels([])
            ax.set_yticklabels([])
        plt.show()


Using TensorFlow backend.


In [2]:
agent = Agent()

TypeError: __init__() got an unexpected keyword argument 'eye_output_dim'

In [7]:
agent.run(mode = 'render', verbose = True)

[[0.00783022 0.02744115 0.05094692 0.05776041 0.         0.
  0.04162228]]
[[0.00783022 0.02744115 0.05094692 0.05776041 0.         0.
  0.04162228]]
[[0.00707317 0.02866959 0.05108013 0.0575881  0.         0.
  0.041514  ]]
[[0.00707317 0.02866959 0.05108013 0.0575881  0.         0.
  0.041514  ]]
[[0.0072532  0.02850747 0.05097101 0.05748655 0.         0.
  0.04162948]]
[[0.00724782 0.02820185 0.05090487 0.05766098 0.         0.
  0.04122948]]
[[0.00715589 0.02842401 0.05099942 0.05761083 0.         0.
  0.04137347]]
[[0.00708774 0.0283042  0.05089481 0.05754652 0.         0.
  0.04129175]]
[[0.00701418 0.02829759 0.05085323 0.05758335 0.         0.
  0.04110042]]
[[0.00733273 0.0279226  0.05080426 0.05764885 0.         0.
  0.04122745]]
[[0.00717413 0.0281424  0.05089784 0.05783534 0.         0.
  0.04112394]]
[[0.00690482 0.02877759 0.05120469 0.05776175 0.         0.
  0.0415271 ]]
[[0.00701619 0.02851313 0.05104012 0.05768705 0.         0.
  0.04155316]]
[[0.00724784 0.02778555 0

  return (self.ram[0x86] - self.ram[0x071c]) % 256


In [8]:
agent.model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, 240, 256, 3)  0                                            
__________________________________________________________________________________________________
input_11 (InputLayer)           (None, 240, 256, 3)  0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           (None, 240, 256, 3)  0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 240, 256, 3)  0           input_10[0][0]                   
                                                                 input_11[0][0]             