In [1]:
# %pip install tensorflow
# %pip install gym
# %pip install keras
# %pip install keras-rl2
# %pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117

In [2]:
from gym import Env
from gym.spaces import Discrete, Box
from matplotlib import pyplot as plt
from mss import mss
import numpy as np 
import random
import threading
import time
import pydirectinput
import pygetwindow
import cv2
import win32gui
import pytesseract
from image_analysis_test.take_screenshot import Screenshot
from input_sending_test.input_sending import SendInput 
from number_recognition_test.number_recog import RecognizeText 
from pattern_recognition_test.pattern_recog import RecognizePattern 

In [3]:
class StepManiaEnv(Env):
    # Setup
    def __init__(self):
        super().__init__()
        self.action_space = Discrete(5)

        # Observation Array
        self.observation_space = Box(
            low=0, high=255, shape=(1, 170, 100), dtype=np.uint8)
        
        #shape=(1, 495, 290)

        # Define extraction parameters for the game
        self.screenshot_helper = Screenshot()
        self.text_recog_helper = RecognizeText()
        self.input_sending_helper = SendInput()
        self.capture = mss()
        self.game_location = {'top': 35, 'left': 110, 'width': 290, 'height': 495}
        self.combo_location = {'top': 450, 'left': 570, 'width': 285, 'height': 30}
        #'top': 450, 'left': 590, 'width': 285, 'height': 30
        self.done_location = {'top': 40, 'left': 25, 'width': 150, 'height':30}
        pytesseract.pytesseract.tesseract_cmd = r'G:\Programme\Tesseract\tesseract.exe'

    # What is called to do something in the game
    def step(self, action):
        # Apply action
        action_map = {
            0:'no_op',
            1:'a',
            2:'d',
            3:'w',
            4:'s',
        }

        # TODO: Does this work with held notes?
        # TODO: Change action from single input to array with inputs
        # for action in actions:
        if action != 0:
            self.input_sending_helper.tapKey(action_map[action])
            #pydirectinput.press(action_map[action])
        
        # Checking if the game is over
        done = self.get_over()
        # Get the next observation
        new_observation = self.get_observation()
        # Use score as reward
        reward = self.get_reward()
        info = {}

        return new_observation, reward, done, info

    # Restart the game
    def reset(self):
        time.sleep(5)
        pydirectinput.press('enter')
        time.sleep(2)
        pydirectinput.press('d')
        pydirectinput.press('enter')
        return self.get_observation()

    # Get the part of observation of the game that we want
    def get_observation(self):
        # Use Helper class to get screenshot
        stepWindow = pygetwindow.getWindowsWithTitle('StepMania')
        hwnd = stepWindow[0]._hWnd
        bbox = win32gui.GetWindowRect(hwnd)

        raw = np.array(self.capture.grab(self.game_location))[:,:,:-1].astype(np.uint8)
        channel = self.screenshot_helper.downscaleImage(raw, (100, 170), (1, 170, 100))

        return channel

    # Get the current score as a reward
    def get_reward(self):
        stepWindow = pygetwindow.getWindowsWithTitle('StepMania')
        hwnd = stepWindow[0]._hWnd
        bbox = win32gui.GetWindowRect(hwnd)
        
        reward_img = np.array(self.capture.grab(self.combo_location))[:,:,:-1].astype(np.uint8)
        channel = self.screenshot_helper.downscaleImage(reward_img, (285, 30), (1, 30, 285))
        reward = env.text_recog_helper.get_number_from_image(channel[0])
        return reward

    # Get if the game is over
    def get_over(self):
        # Use Helper class to get screenshot
        stepWindow = pygetwindow.getWindowsWithTitle('StepMania')
        hwnd = stepWindow[0]._hWnd
        bbox = win32gui.GetWindowRect(hwnd)

        done_capture = np.array(self.capture.grab(self.done_location))[:,:,::-1].astype(np.uint8)
        gray = cv2.cvtColor(done_capture, cv2.COLOR_BGR2GRAY)
        # Valid done text
        done_strings = ['Your Results']
        return self.text_recog_helper.is_text_in_image(gray, done_strings, 12)[0]


In [4]:
env = StepManiaEnv()

In [5]:
for episode in range(10):
  obs = env.reset()
  done = False
  final_reward = 0
  amount_of_screenshots = 0
  start = time.perf_counter()
  while not done:
    obs, reward, done, info = env.step(env.action_space.sample())
    amount_of_screenshots += 1
    if final_reward < reward:
      final_reward = reward

  stop = time.perf_counter()
  final_time = stop - start
  print(f'Total Reward for episode {episode} is {final_reward}')
  print(f'Total screenshots for episode {episode} is {amount_of_screenshots}')
  print(f'Total duration of episode {episode} is {final_time:0.4f} seconds')
  print(f'This equals an average of {amount_of_screenshots / final_time} images per second')

Total Reward for episode 0 is 2715
Total screenshots for episode 0 is 167
Total duration of episode 0 is 92.4896 seconds
This equals an average of 1.805609124962938 images per second
Total Reward for episode 1 is 8000
Total screenshots for episode 1 is 35
Total duration of episode 1 is 19.9062 seconds
This equals an average of 1.7582484357215307 images per second
Total Reward for episode 2 is 135
Total screenshots for episode 2 is 191
Total duration of episode 2 is 102.4751 seconds
This equals an average of 1.8638681978348468 images per second


KeyboardInterrupt: 

In [17]:
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker

In [None]:
# Check if the environment is valid
env_checker.check_env(env)

In [20]:
class TrainAndLoggingCallback(BaseCallback):
  def __init__(self, checking_freq, save_path, verbose=1):
    super(TrainAndLoggingCallback, self).__init__(verbose)
    self.checking_freq = checking_freq
    self.save_path = save_path

  def _init_callback(self):
    if self.save_path is not None:
      os.makedirs(self.save_path, exist_ok=True)

  def _on_step(self):
    if self.n_calls % self.checking_freq == 0:
      model_path = os.path.join(self.save_path, f'best_model_{self.n_calls}')
      self.model.save(model_path)
    
    return True

In [21]:
CHECKPOINT_DIR = './training/'
LOG_DIR = './logs/'

In [22]:
callback = TrainAndLoggingCallback(checking_freq=1_000, save_path=CHECKPOINT_DIR)

In [23]:
from stable_baselines3 import DQN

In [24]:
env = StepManiaEnv()

In [25]:
model = DQN(
  'CnnPolicy',              
  env,                      # Used environment
  tensorboard_log=LOG_DIR,  # Log directory
  verbose=1,                # Enables logging
  buffer_size=120_000,      # Buffer size depending on amount of ram
  learning_starts=1_000,    # Learning starts after 1000 steps
  #device='cpu'             # Training on cpu or gpu
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [26]:
# Start training
model.learn(total_timesteps=1_000, callback=callback)

Logging to ./logs/DQN_4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1        |
|    ep_rew_mean      | 3e+08    |
|    exploration_rate | 0.962    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 0        |
|    time_elapsed     | 42       |
|    total_timesteps  | 4        |
----------------------------------


KeyboardInterrupt: 

In [None]:
# Load past model 
# model.load(path)

In [None]:
for episode in range(1):
  obs = env.reset()
  done = False
  final_reward = 0
  while not done:
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(int(action))
    if final_reward < reward:
      final_reward = reward

  print(f'Total Reward for episode {episode} is {final_reward}')