In [35]:
from collections import deque

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

import cv2 as cv

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import Adam

import numpy as np
import pandas as pd
import random
import pickle
import time
import os
import base64
from io import BytesIO
from PIL import Image

In [65]:
GAME_URL = 'file:///Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/web/index.html'

OBJ_PATH = '/Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/objects/'

MODEL_FILE_PATH = '/Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/objects/model.h5'

LOSS_FILE_PATH = '/Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/objects/loss_df.csv'
ACTIONS_FILE_PATH = '/Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/objects/actions_df.csv'
Q_VALUES_FILE_PATH = '/Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/objects/q_values.csv'
SCORES_FILE_PATH = '/Users/shuweizhang/Documents/Studies_Local/527/CrossyRoad/objects/scores_df.csv'

# Set the browser size
WINDOW_SIZE_W = 1200
WINDOW_SIZE_H = 600

In [66]:
ACTIONS = 4
LEARNING_RATE = 1e-4
IMG_ROWS, IMG_COLS = 80, 80
IMG_CHANNELS = 4
EXPLORE = 100000
INITIAL_EPSILON = 0.2
FINAL_EPSILON = 0.001
OBSERVE = 35
REPLAY_MEMORY = 50000
BATCH = 32
GAMMA = 0.99

In [67]:
class Game:
    def __init__(self):
        chrome_options = Options()
        chrome_options.add_argument('--no-sandbox')
        # chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-dev-shm-usage')
        self.driver = webdriver.Chrome(chrome_options=chrome_options)
        self.driver.set_window_size(WINDOW_SIZE_W, WINDOW_SIZE_H)
        self.driver.get(GAME_URL)
        self._paused = False
        self.element = self.driver.find_element_by_id('retry')

    def press_up(self):
        self.driver.find_element_by_id("forward").send_keys(Keys.UP)

    def press_down(self):
        self.driver.find_element_by_id("backward").send_keys(Keys.DOWN)

    def press_left(self):
        self.driver.find_element_by_id('left').send_keys(Keys.LEFT)

    def press_right(self):
        self.driver.find_element_by_id('right').send_keys(Keys.RIGHT)

    def get_score(self):
        text_score =  self.driver.find_element_by_id('counter').text
        return int(text_score)

    def get_crashed(self):
        return self.element.is_displayed()

    def restart_game(self):
        WebDriverWait(self.driver, 5).until(EC.element_to_be_clickable((By.ID, "retry"))).click()

    def end(self):
        self.driver.close()

    def pause_or_resume(self):
        action = ActionChains(self.driver)
        action.send_keys(Keys.ENTER).perform()
        self._paused = not self._paused

    def pause(self):
        if not self._paused:
          self.pause_or_resume()

    def resume(self):
        if self._paused:
          self.pause_or_resume()

    def end_game(self):
        self.driver.close()

In [68]:
class Chicken:
    def __init__(self, game):
        self._game = game

    def forward(self):
        self._game.press_up()

    def backward(self):
        self._game.press_down()

    def left(self):
        self._game.press_left()

    def right(self):
        self._game.press_right()

    def is_crashed(self):
        return self._game.get_crashed()

    def pause_or_resume(self):
        self._game.pause_or_resume()

In [69]:
class Game_State:
    def __init__(self, agent, game):
        self._agent = agent
        self._game = game

    def processing_image(self):
        base = self._game.driver.find_element_by_id("imgURL").text
        screen = np.array(Image.open(BytesIO(base64.b64decode(base))))
        image = cv.cvtColor(screen, cv.COLOR_RGBA2GRAY)
        image = image[200:400, 650:850]
        image = cv.resize(image, (80, 80))
        image = cv.Canny(image, threshold1=100, threshold2=200)

        return image

    def pause(self):
        self._game.pause()

    def resume(self):
        self._game.resume()

    def get_state(self, actions, score_dep = True, reward_list = [1, 1, 1, 1, 10]):
        reward = 0 if score_dep else 1

        old_score = self._game.get_score()
        is_over = False

        if int(actions[0]) == 1:
            self._agent.forward()
            reward *= reward_list[0]

        elif int(actions[1]) == 1:
            self._agent.left()
            reward *= reward_list[1]

        elif int(actions[2]) == 1:
            self._agent.right()
            reward *= reward_list[2]

        elif int(actions[3]) == 1:
            self._agent.backward()
            reward *= reward_list[3]

        new_score = 0
        if self._agent.is_crashed():
            self._game_score = self._game.get_score()
            self._game.restart_game()
            is_over = True
            reward = reward_list[4]
        elif score_dep:
            new_score = self._game.get_score()
            reward += max(new_score - old_score, 0)

        image = self.processing_image()

        return image, reward, is_over, new_score

In [70]:
def build_model():
    model = Sequential()

    model.add(Conv2D(32, (8, 8), padding='same', strides=(4, 4), input_shape=(IMG_COLS, IMG_ROWS, IMG_CHANNELS)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Activation('relu'))

    model.add(Conv2D(64, (4, 4), strides=(2, 2), padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Activation('relu'))

    model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))

    model.add(Dense(ACTIONS))

    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse', optimizer=adam)

    if not os.path.isfile(MODEL_FILE_PATH):
        model.save_weights(MODEL_FILE_PATH)
    return model

In [71]:
def save_obj(obj, name):
  with open(OBJ_PATH + name + '.pkl', 'wb') as f:
    pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
  with open(OBJ_PATH + name + '.pkl', 'rb') as f:
    return pickle.load(f)

In [72]:
def init_cache():
    if not os.path.isfile(OBJ_PATH + 'time.pkl'):
        print('Initialize the cache')
        save_obj(INITIAL_EPSILON, 'epsilon')
        t = 0
        save_obj(t, 'time')
        memo = deque()
        save_obj(memo, 'memory')
init_cache()

In [73]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(LOSS_FILE_PATH) if os.path.isfile(LOSS_FILE_PATH) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(SCORES_FILE_PATH) if os.path.isfile(SCORES_FILE_PATH) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(ACTIONS_FILE_PATH) if os.path.isfile(ACTIONS_FILE_PATH) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(Q_VALUES_FILE_PATH) if os.path.isfile(Q_VALUES_FILE_PATH) else pd.DataFrame(columns = ['qvalues'])

In [81]:
def train_nn(model, game_state):
    memo = load_obj('memory')
    do_nothing = np.zeros(ACTIONS)
    t = load_obj('time')
    epsilon = load_obj('epsilon')

    if os.path.isfile(MODEL_FILE_PATH):
        model.load_weights(MODEL_FILE_PATH)
        adam = Adam(learning_rate=LEARNING_RATE)
        model.compile(loss='mse', optimizer=adam)

    image_t, reward_0, is_dead, game_score = game_state.get_state(do_nothing)
    state_t = np.stack((image_t, image_t, image_t, image_t), axis=2)
    state_t = state_t.reshape(1, state_t.shape[0], state_t.shape[1], state_t.shape[2])

    while True:
        loss = 0
        Q_sa = 0
        action_index = 0
        reward = 0
        action_t = np.zeros([ACTIONS])

        game_state.pause()
        if t < OBSERVE:
            action_index = random.randrange(ACTIONS)
            action_t[action_index] = 1
        else:
            pred = model.predict(state_t)
            max_Q = np.argmax(pred)
            action_index = max_Q
            action_t[action_index] = 1
        game_state.resume()

        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        image_t1, reward_t, is_dead, game_score = game_state.get_state(action_t)
        image_t1 = image_t1.reshape(1, image_t1.shape[0], image_t1.shape[1], 1)
        state_t1 = np.append(image_t1, state_t[:,:,:,:3], axis=3)

        memo.append((state_t, action_index, reward_t, state_t1, is_dead))
        if len(memo) > REPLAY_MEMORY:
            memo.popleft()

        game_state.pause()

        if t > OBSERVE:
            minibatch = random.sample(memo, BATCH)
            inputs = np.zeros((BATCH, state_t.shape[1], state_t.shape[2], state_t.shape[3]))
            targets = np.zeros((inputs.shape[0], ACTIONS))

            for i in range(len(minibatch)):
                state_t = minibatch[i][0]
                action_t = minibatch[i][1]
                reward_t = minibatch[i][2]
                state_t1 = minibatch[i][3]
                is_dead = minibatch[i][4]

                inputs[i:i+1] = state_t
                targets[i] = model.predict(state_t)
                Q_sa = model.predict(state_t1)

                if is_dead:
                    targets[i, action_t] = reward_t
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)

        state_t = state_t1
        t += 1

        if t%200 == 0:
            print('Now we save model')
            model.save_weightys(MODEL_FILE_PATH, overwrite=True)
            save_obj(memo, 'memory')
            save_obj(t, 'time')
            save_obj(epsilon, 'epsilon')
            loss_df.to_csv(LOSS_FILE_PATH, index=False)
            scores_df.to_csv(SCORES_FILE_PATH, index=False)
            actions_df.to_csv(ACTIONS_FILE_PATH, index=False)
            q_values_df.to_csv(Q_VALUES_FILE_PATH, index=False)
            print('Finished Saving')

        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif OBSERVE < t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t,
              "/ STATE", state,
              "/ EPSILON", epsilon,
              "/ ACTION", action_index,
              "/ REWARD", reward_t,
              "/ SCORE", game_score,
              "/ Q_MAX " , np.max(Q_sa),
              "/ Loss ", loss)
        
        game_state.resume()

In [90]:
game = Game()
chicken = Chicken(game)

  import sys


In [91]:
game_state = Game_State(chicken, game)
model = build_model()

In [92]:
train_nn(model, game_state)

TIMESTEP 1 / STATE observe / EPSILON 0.2 / ACTION 1 / REWARD 0 / SCORE 0 / Q_MAX  0 / Loss  0
TIMESTEP 2 / STATE observe / EPSILON 0.2 / ACTION 0 / REWARD 1 / SCORE 1 / Q_MAX  0 / Loss  0
TIMESTEP 3 / STATE observe / EPSILON 0.2 / ACTION 2 / REWARD 1 / SCORE 5 / Q_MAX  0 / Loss  0
TIMESTEP 4 / STATE observe / EPSILON 0.2 / ACTION 1 / REWARD 0 / SCORE 5 / Q_MAX  0 / Loss  0
TIMESTEP 5 / STATE observe / EPSILON 0.2 / ACTION 0 / REWARD 10 / SCORE 0 / Q_MAX  0 / Loss  0
TIMESTEP 6 / STATE observe / EPSILON 0.2 / ACTION 1 / REWARD 0 / SCORE 0 / Q_MAX  0 / Loss  0
TIMESTEP 7 / STATE observe / EPSILON 0.2 / ACTION 0 / REWARD 1 / SCORE 1 / Q_MAX  0 / Loss  0
TIMESTEP 8 / STATE observe / EPSILON 0.2 / ACTION 0 / REWARD 2 / SCORE 6 / Q_MAX  0 / Loss  0
TIMESTEP 9 / STATE observe / EPSILON 0.2 / ACTION 3 / REWARD 10 / SCORE 0 / Q_MAX  0 / Loss  0
TIMESTEP 10 / STATE observe / EPSILON 0.2 / ACTION 1 / REWARD 0 / SCORE 0 / Q_MAX  0 / Loss  0
TIMESTEP 11 / STATE observe / EPSILON 0.2 / ACTION 3 / RE

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=89.0.4389.90)


In [85]:
game.end()

WebDriverException: Message: chrome not reachable
  (Session info: chrome=89.0.4389.90)
