In [2]:
# RL dependencies
from gym.spaces import Discrete, Box
import numpy as np
from gym import Env
from stable_baselines3 import PPO
from stable_baselines3.common import env_checker
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack

In [3]:
# memory reading dependencies
import pymem
from pymem.process import module_from_name
from pymem.ptypes import RemotePointer

In [4]:
# Extra dependencies
import matplotlib.pyplot as plt
from PIL.Image import fromarray
import mss
import pydirectinput
import time
import cv2

Read game variables from memory - https://stackoverflow.com/a/73538848

In [5]:
# access game process
pm = pymem.Pymem("BitBlasterXL.exe")
gameModule = module_from_name(pm.process_handle, "UnityPlayer.dll").lpBaseOfDll

In [6]:
healthModule = int(gameModule+0x01ACA468)
scoreModule = int(gameModule+0x01993010)

In [7]:
# Create a custom gym Env
class BitBlaster(Env):
    def __init__(self):
        self.observation_space = Box(0, 255, shape=(1, 80, 80), dtype=np.uint8)
        # low number of action_space actions for testing
        self.action_space = Discrete(2)
        # the game is in windowed mode, and placed on the top left of my screen to be captured
        self.monitor = {"top": 35, "left": 0, "width": 644, "height": 484}
        self.actionsDict = {0:"right", 1:"ctrl"}
        self.previousScore = 0
        self.previousHealth = 28

    def step(self, action:int):
        # the character health is 28 when fully healed, 27 when shield pops, and 26 when dead
        life = pm.read_int(self.getPointerAddress(base=healthModule, offsets=[0x490, 0x10, 0xE0, 0x0, 0xB8, 0xF0, 0xE10]))

        obs = self.get_observation()
        
        if life <= 26:
            done = True
        else:
            done = False

        pydirectinput.press(self.actionsDict[action])
        
        reward = self.reward_fn(life)

        info = {}
        return obs, reward, done, info
    
    def reward_fn(self, life):
        gameScore = pm.read_int(self.getPointerAddress(base=scoreModule, offsets=[0x10, 0x108, 0x0, 0xD0, 0x8, 0x60, 0xC8]))

        # Calculate score reward
        scoreDelta = gameScore - self.previousScore
        self.previousScore = gameScore

        # Calculate health reward
        healthDelta = life - self.previousHealth
        self.previousHealth = life

        # Calculate overall reward
        score = (scoreDelta / 100) + (healthDelta * 100.5) + 0.5
        return score
    
    # function to read memory from game
    def getPointerAddress(self, base, offsets:list):
        remote_pointer = RemotePointer(pm.process_handle, base)
        for offset in offsets:
            if offset != offsets[-1]:
                remote_pointer = RemotePointer(pm.process_handle, remote_pointer.value + offset)
            else:
                return remote_pointer.value + offset

    def reset(self):
        time.sleep(0.9)
        pydirectinput.press("enter")
        time.sleep(1.3)
        obs = self.get_observation()
        # reset variables
        self.previousScore = 0
        self.previousHealth = 28
        return obs
    
    def get_observation(self):
        obs = np.array(mss.mss().grab(self.monitor), dtype=np.uint8)
        small_img = cv2.resize(obs, (80, 80))
        img_gray = cv2.cvtColor(small_img, cv2.COLOR_BGR2GRAY)

        np_img = np.expand_dims(img_gray, axis=0)
        return np_img

    # Visualize the game
    def render(self):
        cv2.imshow("GAME", self.get_observation()[0, :, :])
        if cv2.waitKey(1) & 0xFF == ord("q"):
            self.close

    # This closes down the observation
    def close(self):
        cv2.destroyAllWindows()

In [8]:
SAVE_DIR = "C:/Users/imnot/Source/Repos/2023/Python/RL/BitBlaster/Saved_Models/"
LOG_DIR = "C:/Users/imnot/runs/"

In [9]:
env = BitBlaster()

In [9]:
env.close()

In [10]:
# test code, might delete
#env = Monitor(env, LOG_DIR)
#env = DummyVecEnv([lambda:env])
#env = VecFrameStack(env, 4, channels_order="first")

In [10]:
model = PPO("CnnPolicy", env=env)

In [None]:
# sleep so that i can click on the game window before it learns
time.sleep(2)
model.learn(total_timesteps=10)

In [None]:
# sleep so that i can click on the game window before it learns
time.sleep(2)
# open the render window
env.render()
cv2.moveWindow('GAME',300,700)
for episode in range(5):
    env.reset()
    done = False
    total_reward = 0
    while not done:
        # here, we can see how fast the agent can see the game
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        total_reward += reward
    print(total_reward)

In [29]:
env.close()