In [1]:
import pygame
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecFrameStack

# Define the game environment using Gym
class MyEnv(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.Box(low=0, high=400, shape=(2,))
        self.screen = pygame.display.set_mode((400, 400))
        pygame.display.set_caption("My Game")
        self.clock = pygame.time.Clock()

    def reset(self):
        self.player_pos = [200, 200]  # starting position
        return self.player_pos

    def step(self, action):
        # Move the player based on the action and return the new observation and reward
        if action == 0:
            self.player_pos[0] -= 10  # move left
        elif action == 1:
            self.player_pos[0] += 10  # move right
        elif action == 2:
            self.player_pos[1] -= 10  # move up
        elif action == 3:
            self.player_pos[1] += 10  # move down

        # Check if the game is over and calculate the reward
        reward = 0
        if self.player_pos[0] < 0 or self.player_pos[0] > 400 or self.player_pos[1] < 0 or self.player_pos[1] > 400:
            done = True
            reward = -1
        else:
            done = False

        # Render the game
        self.screen.fill((255, 255, 255))
        pygame.draw.circle(self.screen, (0, 0, 255), self.player_pos, 10)
        pygame.display.flip()
        self.clock.tick(30)

        # Return the new observation, reward, and done flag
        return self.player_pos, reward, done, {}

# Create the game environment and wrap it in a vectorized environment
env = make_vec_env(lambda: MyEnv(), n_envs=1)
env = VecFrameStack(env, n_stack=4)

# Create the PPO agent
model = PPO('MlpPolicy', env, verbose=1)

# Train the agent
model.learn(total_timesteps=10000)

# Play the game using the trained agent
obs = env.reset()
game_over = False
while not game_over:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, game_over, info = env.step(action)