# ML-Dinosaur (W.I.P.)

CNN trained to play Google's Dinosaur Run game, based on the following [article](https://blog.paperspace.com/dino-run/).

## 0. Imports

In [1]:
import base64
import io
import json
import os
import pickle
import random
import time
from collections import deque
from io import BytesIO
from random import randint

import cv2
import numpy as np
import pandas as pd
from IPython.display import clear_output
from keras.callbacks import TensorBoard
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation, Dense, Dropout, Flatten
from keras.models import Sequential, model_from_json
from PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from tensorflow.keras.optimizers import SGD, Adam

## 1. Variables Setup

In [2]:
# Paths
game_url = "chrome://dino"
chrome_driver_path = "./drivers/chromedriver_mac" # Select corresponding OS
loss_file_path = "./objects/loss_df.csv"
actions_file_path = "./objects/actions_df.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores_df.csv"

# Scripts
# Creates an id for the canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

# Canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

# Log Structures
loss_df =     pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df =   pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df =  pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df = pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

# Game Parameters
ACTIONS = 2                   # Possible actions: (i) jump, (ii) do nothing
GAMMA = 0.99                  # Decay rate of past observations
OBSERVATION = 100             # Timesteps to observe before training
EXPLORE = 100000              # Frames over which to anneal epsilon
FINAL_EPSILON = 0.0001        # Final value of epsilon
INITIAL_EPSILON = 0.1         # Starting value of epsilon
REPLAY_MEMORY = 50000         # Number of previous transitions to remember
BATCH = 16                    # Size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80, 80
img_channels = 4              # Stack 4 frames

## 2. Training setup

In [3]:
class Game:
    
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get(game_url)
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
        
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
        
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
        
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # The javascript object is of type array 
                                     # with score in the formate[1,0,0] which is 100.
        return int(score)
    
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    
    def end(self):
        self._driver.close()

In [4]:
class DinoAgent:
    def __init__(self,game): # Takes game as input for taking actions
        self._game = game; 
        self.jump();         # To start the game, we need to jump once
        
    def is_running(self):
        return self._game.get_playing()
    
    def is_crashed(self):
        return self._game.get_crashed()
    
    def jump(self):
        self._game.press_up()
        
    def duck(self):
        self._game.press_down()

In [5]:
class Game_sate:
    
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() # Display the processed image on screen using openCV, 
                                   # implemented using python coroutine 
        self._display.__next__()   # Initiliaze the display coroutine 
        
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # Storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False # Game over

        if actions[1] == 1:
            self._agent.jump()
            
        image = grab_screen(self._game._driver) 
        self._display.send(image) # Display the image on screen
        
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # Log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
            
        return image, reward, is_over # Return the Experience tuple

In [6]:
def save_obj(obj, name ):
    with open('objects/'+ name + '.pkl', 'wb') as f: # Dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen) # Process image as required
    return image

def process_img(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # RGB to Grey Scale
    image = image[:300, :500] # Crop Region of Interest (RoI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [7]:
# Training variables saved as checkpoints to filesystem 
# to resume training from the same step
# NOTE: Should only be done as an initial setup
def init_cache():
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")
    
# init_cache()

In [8]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    # Create model file, if not already existant
    if not os.path.isfile(loss_file_path):
        model.save_weights('./models/model.h5')

    print("We finish building the model")
    return model

In [9]:
def train_network(model,game_state,observe=False):
    # Store the previous observations in replay memory
    last_time = time.time()
    
    # Load from file system
    D = load_obj("D")
    
    # Get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1 # 0 => do nothing,
                      # 1 => jump
    
    # Get next step after performing the action
    x_t, r_0, terminal = game_state.get_state(do_nothing)
    
    # Stack 4 images to create placeholder input
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) # 1*20*40*4
    
    initial_state = s_t 

    # Only observes, doesn't train
    if observe :
        OBSERVE = 999999999
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("./models/model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
        
    # Start training mode
    else:                       
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("./models/model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    # Resume from the previous stored time step
    t = load_obj("time")

    # Endless running
    while (True):
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0                   # Reward at 4
        a_t = np.zeros([ACTIONS]) # Action at t
        
        # Choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: # Parameter to skip frames for actions

            if  random.random() <= epsilon: # Randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1

            else: # predict the output
                q = model.predict(s_t)       # Input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)         # Chosing index with maximum q value
                action_index = max_Q 
                a_t[action_index] = 1        # 0 => do nothing, 1 => jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        # Run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time)))   # Helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) # 1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)        # Append the new image to input stack and remove the first one
        
        # Store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        # Only train if done observing
        if t > OBSERVE: 
            
            # Sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3])) # 32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                       # 32, 2

            # Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   # Action index
                reward_t = minibatch[i][2]   # Reward at state_t due to action_t
                state_t1 = minibatch[i][3]   # Next state
                terminal = minibatch[i][4]   # Whether the agent died or survided due the action
                

                inputs[i : i + 1] = state_t    

                targets[i] = model.predict(state_t)  # Predicted q values
                Q_sa = model.predict(state_t1)       # Predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # If terminated, only equals reward

                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)

        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # Save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("./models/model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv",index=False)
            scores_df.to_csv("./objects/scores_df.csv",index=False)
            actions_df.to_csv("./objects/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("./models/model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()

        # Print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")

## 3. Execute training/playing

In [10]:
# Let the game begin!
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()

    try:
        train_network(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [12]:
playGame(observe=False)