In [2]:
from mss import mss
import pydirectinput 
import cv2
import time
import numpy as np
import pytesseract
import matplotlib.pyplot as plt
from gym import Env
from gym.spaces import Box, Discrete
import re

pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract"

In [3]:
class WebGame(Env):

    def __init__(self):
        super().__init__()

        # Step Spaces
        self.observation_space = Box(0, 255, shape= (1,83,100), dtype=np.uint8)
        self.action_space = Discrete(4)

        # Define Extraction params
        self.cap = mss()
        self.game_location = {"top": 280, "left": 200, "width": 540, "height": 530}
        self.done_location = {"top": 470, "left": 260, "width": 420, "height": 70}
        self.score_location = {"top": 130, "left": 518, "width": 80, "height": 30}
    
    def step(self, action):
        action_map = {
            0: "up",
            1: "down",
            2: "left",
            3: "right"
        }

        if action:
            pydirectinput.press(action_map[action])

        done = self.get_done()
        new_observation = self.get_observation()

        reward = 1

        info = {}

        return new_observation, reward, done, info
    
    def render(self):
        cv2.imshow("Game", np.array(self.cap.grab(self.game_location))[:, : , :3])

        if cv2.waitKey(1) & 0xFF == ord("q"):
            self.close()

    def reset(self):
        time.sleep(0.5)
        pydirectinput.click(650,230)
        time.sleep(0.5)
        return self.get_observation()
    
    def close(self):
        cv2.destroyAllWindows()

    def get_observation(self):
        raw = np.array(self.cap.grab(self.game_location))[:,:,:3]
        gray = cv2.cvtColor(raw ,cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, (100, 83))
        channel= np.reshape(resized, (1,83, 100))

        return raw
    
    def get_done(self):
        done_cap = np.array(self.cap.grab(self.done_location))[:,:,:3]

        done_strings = ["Game", "Gahe", "Gaam"]

        done = False

        res = pytesseract.image_to_string(done_cap)[:4]

        if res in done_strings:
            done = True

        return done
    
    # def get_score(self):
    #     score_cap = np.array(self.cap.grab(self.score_location))[:,:,:3]
    #     score = (pytesseract.image_to_string(score_cap))
    #     match = re.search(r'\d+', score)
    #     if match:
    #         int_score = int(match.group())
    #     else:
    #         int_score = score

    #     return int_score, score_cap


In [4]:
env = WebGame()

In [5]:
# # Testing loop for 10 Episodes

# episodes = 1

# for episode in range(1, episodes + 1):
#     obs = env.reset()
#     done = False
#     total_reward = 0

#     while not done:
#         obs, reward, done, info = env.step(env.action_space.sample())
#         total_reward += reward

#     print(f"Total reward for episode {episode} is {total_reward}")

In [187]:
# Create callback

import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker

env_checker.check_env(env)

In [188]:
# For saving models (callback)
class TrainAndLoggingCallback(BaseCallback):
    
    def __init__(self, check_freq, save_path, verbose = 1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok = True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, "best_model_{}".format(self.n_calls))
            self.model.save(model_path)
        
        return True
    
CHECKPOINT_DIR = "./train/"
LOG_DIR = "./logs/"
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR)

In [195]:
# Build DQN
from stable_baselines3 import DQN

model = DQN("CnnPolicy", env = env, tensorboard_log=LOG_DIR, verbose=1, buffer_size=100000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [386]:
model.learn(total_timesteps=1000, callback=callback)