# Gym Environment
In order for the agent to interact with the environment, we need an environment for our agent to exist in. This will simply adapt visuals present in the game into actions/observations to be made.

## Event Detector
The only events we need to detect is when the player dies or completes a level.

In [1]:
import cv2
import matplotlib.pyplot as plt

In [2]:
# manually created crop areas
# locations are at...
# complete level tag
#   plt.imshow(frame[110:150, 120:520])
#   120, 110, 400, 40
# retry button
#   plt.imshow(frame[375:425, 160:215])
#   160, 375, 55, 50

In [3]:
cap = cv2.VideoCapture(1)  # Use your screen capture setup
ret, frame = cap.read()
cap.release()

In [4]:
class EventDetectorWithROI:
    def __init__(self, completion_template_path, retry_template_path, completion_roi, retry_roi,):
        # templates
        self.completion_template = cv2.imread(completion_template_path, cv2.IMREAD_GRAYSCALE)
        self.retry_template = cv2.imread(retry_template_path, cv2.IMREAD_GRAYSCALE)

        # roi regions
        self.retry_roi = retry_roi
        self.completion_roi = completion_roi

    def _crop_to_roi(self, frame, roi):
        """
        Crop the frame to the given ROI.
        """
        x, y, w, h = roi
        return frame[y:y+h, x:x+w]

    def detect_event(self, frame):
        """
        Detect if the player has died or completed the level.
        
        returns tring "death", "completion", or None
        """
        # Convert the frame to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Crop the ROIs
        retry_roi_frame = self._crop_to_roi(gray_frame, self.retry_roi)
        completion_roi_frame = self._crop_to_roi(gray_frame, self.completion_roi)

        # Detect completion (Completion screen)
        completion_res = cv2.matchTemplate(completion_roi_frame, self.completion_template, cv2.TM_CCOEFF_NORMED)
        completion_threshold = 0.8
        if (completion_res >= completion_threshold).any():
            return "completion"
        
        # Detect death (Retry button), also after completion
        retry_res = cv2.matchTemplate(retry_roi_frame, self.retry_template, cv2.TM_CCOEFF_NORMED)
        retry_threshold = 0.8
        if (retry_res >= retry_threshold).any():
            return "death"

        # Detect completion (Completion screen)
        completion_res = cv2.matchTemplate(completion_roi_frame, self.completion_template, cv2.TM_CCOEFF_NORMED)
        completion_threshold = 0.8
        if (completion_res >= completion_threshold).any():
            return "completion"

        return None


In [5]:
# In (x, y, width, height)
completion_roi = (120, 110, 400, 40)
retry_roi = (160, 375, 55, 50) 

# Initialize the detector with templates and ROIs
detector = EventDetectorWithROI(
    "./template_img/complete_template.png",
    "./template_img/retry_template.png",
    completion_roi,
    retry_roi
)

# Test the detector
cap = cv2.VideoCapture(1)  # Use your screen capture setup
ret, frame = cap.read()
cap.release()

event = detector.detect_event(frame)
if event == "death":
    print("Player died!")
elif event == "completion":
    print("Level completed!")
else:
    print("No event detected.")

No event detected.


## Input Controller
First thing is to create a framework to interact with the GD game.

In [6]:
import cv2
import win32gui
import win32api
import win32con
import time

In [7]:
def get_gd_hwnd():
    window_name = "Geometry Dash"
    hwnd = win32gui.FindWindow(None, window_name)
    if hwnd == 0:
        raise "Geometry Dash window not found. Make sure the game is running."
    # print(f"Found Geometry Dash window: {hwnd}")
    return hwnd

def send_spacebar(hwnd):
    if hwnd:
        # Send spacebar down (WM_KEYDOWN) and up (WM_KEYUP) messages
        win32api.PostMessage(hwnd, win32con.WM_KEYDOWN, win32con.VK_SPACE, 0)
        time.sleep(0.05)  # Short press duration (50ms)
        # Send key up (WM_KEYUP)
        win32api.PostMessage(hwnd, win32con.WM_KEYUP, win32con.VK_SPACE, 0xC0000000)
        print("Spacebar sent to Geometry Dash!")
    else:
        print("Invalid window handle. Cannot send input.")

## Gym env

In [8]:
import gym

In [None]:
class GeometryDashEnv(gym.Env):
    """
    Custom Gym environment for Geometry Dash with death and completion detection.
    """
    def __init__(self, frame_stacker, resnet_extractor, input_controller):
        super(GeometryDashEnv, self).__init__()

        # Frame stacker, ResNet feature extractor, and input controller
        self.frame_stacker = frame_stacker
        self.resnet_extractor = resnet_extractor
        self.input_controller = input_controller

        # Observation space (e.g., stacked ResNet embeddings)
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(2048 * frame_stacker.stack_size,),  # Adjust for stacked embeddings
            dtype=np.float32,
        )

        # Action space (KeyUp or KeyDown)
        self.action_space = spaces.Discrete(2)

        # Initialize environment variables
        self.current_state = None
        self.done = False
        self.previous_action = None

    def reset(self):
        """
        Reset the environment and return the initial observation.
        """
        self.input_controller.reset_game()  # Reset the game

        # Capture the first frame and process it
        frame = self._capture_frame()
        initial_embedding = self.resnet_extractor.get_image_embeddings(frame)

        # Initialize frame stack
        self.current_state = self.frame_stacker.reset_stack(initial_embedding)
        self.done = False
        self.previous_action = None

        return self.current_state

    def step(self, action):
        """
        Perform an action and return observation, reward, done, and info.
        """
        # Send the action to the game
        if action != self.previous_action:
            self.input_controller.send_input(action)
            self.previous_action = action

        # Capture the next frame and process it
        frame = self._capture_frame()
        next_embedding = self.resnet_extractor.get_image_embeddings(frame)
        self.current_state = self.frame_stacker.add_frame(next_embedding)

        # Check for death or completion
        event = self.input_controller.detect_death_or_completion()
        self.done = event in ["death", "completion"]

        # Calculate reward
        reward = self._calculate_reward(event)

        return self.current_state, reward, self.done, {}

    def _capture_frame(self):
        """
        Capture the game screen and return the frame.
        """
        ret, frame = self.input_controller.get_screen()
        if not ret:
            raise RuntimeError("Failed to capture game screen.")
        return frame

    def _calculate_reward(self, event):
        """
        Calculate the reward based on the current event.
        """
        if event == "death":
            return -100  # Penalty for death
        elif event == "completion":
            return 500  # Reward for completing the level
        else:
            return 0.2  # Survival reward
