In [17]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import cv2 as cv
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import os
import numpy as np
import random

In [18]:
# Define variable
game_url = "file:///Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/web/index.html"
# screen_shot_output = "../screenshot/input_state.png"
screen_shot_output = "input_state.png"
window_size_w = 1200
window_size_h = 600

In [19]:
# #game parameters
ACTIONS = 4 # possible actions: forward, backward,left, right, do nothing
# GAMMA = 0.99 # decay rate of past observations original 0.99
# OBSERVATION = 100. # timesteps to observe before training
# EXPLORE = 100000  # frames over which to anneal epsilon
# FINAL_EPSILON = 0.0001 # final value of epsilon
# INITIAL_EPSILON = 0.1 # starting value of epsilon
# REPLAY_MEMORY = 50000 # number of previous transitions to remember
# BATCH = 16 # size of minibatch
# FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [20]:
class Game:
    def __init__(self):
        self._driver = webdriver.Chrome()
        self._driver.set_window_size(window_size_w,window_size_h)
        self._driver.get(game_url)

    def press_up(self):
        self._driver.find_element_by_id("forward").send_keys(Keys.UP)

    def press_down(self):
        self._driver.find_element_by_id("backward").send_keys(Keys.DOWN)

    def press_left(self):
        self._driver.find_element_by_id('left').send_keys(Keys.LEFT)

    def press_right(self):
        self._driver.find_element_by_id('right').send_keys(Keys.RIGHT)

    def get_score(self):
        return self._driver.find_element_by_id('counter').text

    def get_crashed(self):
        self.element = self._driver.find_element_by_id('retry')
        return self.element.is_displayed()
           
    def restart_game(self):
        WebDriverWait(self._driver, 5).until(EC.element_to_be_clickable((By.ID, "retry"))).click()
    
    def end(self):
        self._driver.close()

In [21]:
class MyChicken:
    def __init__(self,game):
        self._game = game

    def forward(self):
        self._game.press_up()
   
    def backward(self):
        self._game.press_down()

    def left(self):
        self._game.press_left()

    def right(self):
        self._game.press_right()

    def is_crashed(self):
        return self._game.get_crashed()


In [22]:
class Game_state:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game

    def get_state(self,actions):
        reward = 0.1
        is_over = False #game over
        
        # Implement with action for our chicken
        if actions == 0:
            self._agent.forward()
            
        elif actions == 1:
            self._agent.left()
            
        elif actions == 2:
            self._agent.right()
            
        elif actions == 3:
            self._agent.backward()

        
        if self._agent.is_crashed():
#             scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart_game()
#             self._game.end()
            reward = -1
            is_over = True
              
        # Capture screen, crop put of regoin  of interest, and resize to 80x80
        image = grab_screen(self._game._driver)
        image = processing_image()
        
        return image, reward, is_over
    

In [23]:
def grab_screen(_driver):
    # Capture the browser
    _driver.save_screenshot(screen_shot_output)

def processing_image():
    input_shot = cv.imread(screen_shot_output)
    image = cv.cvtColor(input_shot, cv.COLOR_RGBA2GRAY)
    image = image[50:850, 750:1850]
    image = cv.resize(image, (80,80))
    return image

In [24]:
def buildmodel():

    model = Sequential()
    
    
    # input shape : out input to the first convolution layer
    #   --> resize the original image to 80x80 pixels (img_cols x img_row), which is the format to feed into the Neural Network
    #   --> we stack the last 4 frames from the screen, so it becomes 80x80x4
    #  We applied Conv2D
    #   --> Take 8 by 8 at the time then Stripes by using 4x4 matrix
    #   --> The output shape from this layer becomes 20x20x32, where 32 is the features that we would like to extract
    #   --> For your info, we do not have an absolute number for the filters fo Conv2D(32), we need to test and see the result
    #   --> 32 is the hyperparameter that we need to figure out 
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))
    # We do "Max pooling by setting the pool size to 2x2 matrix"
    #   --> the remain shape for this layer becomes 10x10x32, where 32 is still the number of filer
    model.add(MaxPooling2D(pool_size=(2,2)))
    # Then we apply the activation
    #   --> activation layer or Relu is trying to detect the edge
    #   --> after this activation layer, the output shape is still the same since we did not stripe or anything
    #   --> the main point for the activation layer is try to detect the edge on the image
    model.add(Activation('relu'))
    
    
    # --> since we set the strides to 2x2 matrix
    # --> input before this layer is 10x10x32--> becomes 5x5x64
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    # --> we do a max pooling by using 2x2 matrix --> become 2x2x64
    # --> why is it not 3x3? 
    # --> ans: since the size of matrix in 5x5 now, so it will run out of the shape
    model.add(MaxPooling2D(pool_size=(2,2)))
    # --> the output shape is still the same 5x5x64
    model.add(Activation('relu'))
    
    
    # --> input is 2x2x64
    # --> after applying this layer, it becomes 2x2x64
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    # --> after applying this maxpooling, it becomes 1x1x64
    model.add(MaxPooling2D(pool_size=(2,2)))
    # --> become 1x1x64
    model.add(Activation('relu'))
    
    # we apply flatten to have 1D array
    # The result should be 64 --> comes from 64x1x1
    model.add(Flatten())
    
    # Fully connect 
    # 512 is the number of unit --> denote output size of the layer
    # Hyperparameter that we need to try it out
    model.add(Dense(512))
    # output shape = 512
    model.add(Activation('relu'))
    
    
    # output layer, which have 3 possible actions now
    # forward, left, and right
    model.add(Dense(ACTIONS))
    
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    model.save_weights('../object/model.h5')
    #print("We finish building the model")
    return model

In [25]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [9]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [21]:
game = Game()
chicken = MyChicken(game)
game_state = Game_state(chicken,game)
model = buildmodel()
guess_action = random.randint(0,3)
image_screen, reward, is_dead = game_state.get_state(int(guess_action))
dqn = build_agent(model, guess_action)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Now we build the model
We finish building the model


ValueError: Model output "Tensor("dense_4/BiasAdd:0", shape=(?, 4), dtype=float32)" has invalid shape. DQN expects a model that has one dimension for each action, in this case 2.

In [None]:
episodes = 10
for episode in range(1, episodes+1):
    
    done = False
    score = 0 
    game = Game()
    chicken = MyChicken(game)
    game_state = Game_state(chicken,game)
    model = buildmodel()

    
    while not done:
        # Random actions 
        # 0: move forward
        # 1: move left
        # 2: move right
        # 3: move backward
        guess_action = random.randint(0,3)
        image_screen, reward, is_dead = game_state.get_state(int(guess_action))
        score+=reward
        
        if is_dead is True:
            done = True

        
    game.end()

         
    print('Episode:{} Score:{}'.format(episode, score))

In [None]:
# s_t = np.stack((image_screen, image_screen, image_screen, image_screen), axis=2)
# s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])

In [None]:
# q = model.predict(s_t)       #input a stack of 4 images, get the prediction
# max_Q = np.argmax(q)

In [None]:
# max_Q

In [None]:
# s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])

In [None]:
# cv.imshow('new image', image_screen)

In [None]:
# episodes = 5
# for episode in range(1,episodes+1):
#     print(episode)
    

In [None]:
# cv.imshow('new image', temp)
# cv.waitKey()

In [None]:
# model.summary()