In [31]:
import numpy as np
from PIL import Image

import cv2
import io
import time
import random
import pickle
import os
from io import BytesIO
import base64
import json
import pandas as pd
from time import sleep

from collections import deque
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys

from webdriver_manager.chrome import ChromeDriverManager

In [32]:
# https://github.com/Paperspace/DinoRunTutorial/blob/master/Reinforcement%20Learning%20Dino%20Run.ipynb

In [33]:
game_url = "chrome://dino"
chrome_driver_path = ChromeDriverManager().install()

loss_file_path = "./objects/loss.csv"
actions_file_path = "./objects/actions.csv"
q_value_file_path = "./objects/q_values.csv"
scores_file_path = "./objects/scores.csv"

init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); return canvasRunner.toDataURL().substring(22)"

In [34]:
def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)
    return image

def process_img(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (80, 80))
    return image

def show_img(graphs = False):
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)
        imS = cv2.resize(screen, (800, 400))
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [35]:
class Game:
    def __init__(self, custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        service = Service(chrome_driver_path)
        self._driver = webdriver.Chrome(service=service, options=chrome_options)
        self._driver.set_window_position(x=300,y=300)
        self._driver.set_window_size(900, 600)
        
        try : 
            self._driver.get(game_url)
        except:
            pass
        
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
        
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element("tag name", "body").send_keys(Keys.ARROW_UP)
    def press_down(self):
        self._driver.find_element("tag name", "body").send_keys(Keys.ARROW_DOWN)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array)
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [36]:
class DinoAgent:
    def __init__(self, game):
        self._game = game
        sleep(1)
        self.jump()
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

In [37]:
class Game_state:
    def __init__(self, agent, game):
        self._agent = agent
        self._game = game
        self._display = show_img()
        self._display.__next__()
        
    def get_state(self, actions):
        # actions_df.loc[len(actions_df)] = [actions]
        score = self._game.get_score()
        reward = 0.1
        is_over = False
        
        if actions[1] == 1:
            self._agent.jump()
            reward = 0
        
        image = grab_screen(self._game._driver)
        self._display.send(image)
        
        if self._agent.is_crashed():
            # scores_df.loc[len(loss_df)] = score
            self._game.restart()
            reward = -5
            is_over = True
        
        return image, reward, is_over

In [38]:
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(scores_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df = pd.read_csv(q_value_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

In [39]:
# Parameters
PRETRAINED = False
ACTIONS = 2
GAMMA = 0.99
OBSERVATION = 100.  # timesteps to observe before training
EXPLORE = 100000.  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001  # final value of epsilon
INITIAL_EPSILON = 0.01  # starting value of epsilon
REPLAY_MEMORY = 50000  # number of previous transitions to remember
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4

### Model

In [40]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class DinoNet(nn.Module):
    def __init__(self):
        super(DinoNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=8, stride=2, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(1024, 64)
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(64, ACTIONS)
        
    def forward(self, x):
        x = self.max_pool2d(self.relu(self.bn1(self.conv1(x))))
        x = self.max_pool2d(self.relu(self.bn2(self.conv2(x))))
        x = self.max_pool2d(self.relu(self.bn3(self.conv3(x))))
        x = torch.flatten(x, 1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


In [41]:
model = DinoNet()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.MSELoss()

# create a new model if not exist
if not os.path.isdir("./model"):
    os.makedirs("./model")

In [42]:
def load_model():
    model.load_state_dict(torch.load(f"./latest.pth"))
    
if PRETRAINED:
    load_model()

In [43]:
def trainNetwork(model, game_state):
    last_time = time.time()
    epsilon = INITIAL_EPSILON
    t = 0
    
    while(True):
        loss_sum = 0
        action_index = 0
        
        if t == 0:      # initialize
            x_t, _, _ = game_state.get_state(np.array([1, 0]))
            s_t = x_t
        
        # choose an action epsilon greedy
        random_action = np.random.rand() <= epsilon
        s_t_tensor = torch.tensor(s_t).float().unsqueeze(0).unsqueeze(0)
        action_index = np.random.randint(ACTIONS) if random_action else model(s_t_tensor).argmax().item()
        a_t = np.zeros([ACTIONS])
        a_t[action_index] = 1
                
        # reduce epsilon gradually
        if epsilon > FINAL_EPSILON and t % 1000 == 0:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
            
            
        # observe outcome
        x_t1, r_t, terminal = game_state.get_state(a_t)
        s_t1 = x_t1
        
        # only train if done observing
        target = r_t
        if not terminal:
            s_t1_tensor = torch.tensor(s_t1).float().unsqueeze(0).unsqueeze(0)
            Q_sa = model(s_t1_tensor).detach().numpy()
            target = r_t + GAMMA * np.max(Q_sa)
        
        # single step update
        q_val = model(s_t_tensor)
        target_f = q_val.clone().detach()
        target_f[0, action_index] = target
        
        loss = loss_fn(q_val, target_f)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_sum += loss.item()
        
        # Transition to new state
        s_t = s_t1
        t += 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            game_state._game.pause() #pause game while saving to filesystem
            torch.save(model.state_dict(), f"./model/episode_{t}.pth")
            torch.save(model.state_dict(), f"./latest.pth")
            game_state._game.resume()
            
        print(f'timestep: {t}, random: {random_action}, epsilon: {round(epsilon, 3)}, action: {action_index}, reward: {r_t}, Q_max: {round(np.max(Q_sa),3)}, loss: {round(loss_sum, 3)}')
        

In [44]:
def playGame():
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_state(dino, game)
    try :
        trainNetwork(model, game_state)
    except StopIteration:
        game.end()

In [45]:
playGame()

timestep: 1, random: False, epsilon: 0.01, action: 0, reward: 0.1, Q_max: 0.5230000019073486, loss: 0.0
timestep: 2, random: False, epsilon: 0.01, action: 1, reward: -0.1, Q_max: 0.3720000088214874, loss: 0.091
timestep: 3, random: False, epsilon: 0.01, action: 1, reward: -0.1, Q_max: 0.47999998927116394, loss: 0.0
timestep: 4, random: False, epsilon: 0.01, action: 0, reward: 0.1, Q_max: 0.13099999725818634, loss: 0.0
timestep: 5, random: False, epsilon: 0.01, action: 0, reward: 0.1, Q_max: 0.08500000089406967, loss: 0.037
timestep: 6, random: False, epsilon: 0.01, action: 1, reward: -0.1, Q_max: 0.4020000100135803, loss: 0.001
timestep: 7, random: False, epsilon: 0.01, action: 0, reward: 0.1, Q_max: 0.5009999871253967, loss: 0.002
timestep: 8, random: False, epsilon: 0.01, action: 1, reward: -0.1, Q_max: 0.3070000112056732, loss: 0.0
timestep: 9, random: False, epsilon: 0.01, action: 1, reward: -0.1, Q_max: 0.492000013589859, loss: 0.023
timestep: 10, random: False, epsilon: 0.01, act

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=123.0.6312.107)
Stacktrace:
	GetHandleVerifier [0x00A94CA3+225091]
	(No symbol) [0x009C4DF1]
	(No symbol) [0x00869A7A]
	(No symbol) [0x0084E312]
	(No symbol) [0x008C517B]
	(No symbol) [0x008D55A6]
	(No symbol) [0x008BF2F6]
	(No symbol) [0x008979B9]
	(No symbol) [0x0089879D]
	sqlite3_dbdata_init [0x00F09A43+4064547]
	sqlite3_dbdata_init [0x00F1104A+4094762]
	sqlite3_dbdata_init [0x00F0B948+4072488]
	sqlite3_dbdata_init [0x00C0C9A9+930953]
	(No symbol) [0x009D07C4]
	(No symbol) [0x009CACE8]
	(No symbol) [0x009CAE11]
	(No symbol) [0x009BCA80]
	BaseThreadInitThunk [0x77217BA9+25]
	RtlInitializeExceptionChain [0x77A3BDAB+107]
	RtlClearBits [0x77A3BD2F+191]
