In [33]:
import pygame
import numpy as np
import pandas as pd
import tensorflow as tf
import gymnasium as gym
import math

from gymnasium import spaces

In [34]:
GRID_SIZE = 15
CELL_SIZE = 40
WIDTH, HEIGHT = GRID_SIZE * CELL_SIZE, GRID_SIZE * CELL_SIZE
FPS = 10

# colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)

In [35]:
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
clock = pygame.time.Clock()
pygame.display.set_caption("Capture The Flag")

In [36]:
# Q-learning parameters
alpha = 0.5     # learning rate
gamma = 0.9     # discount factor
epsilon = 0.3   # exploration rate
Q = np.zeros((GRID_SIZE, GRID_SIZE, GRID_SIZE, GRID_SIZE, 4))

In [37]:
class Game():
    def __init__(self):
        super().__init__()

        # 4 actions: up, down, left, right
        self.action_space = spaces.Discrete(4)

        self.agent_score = 0
        self.player_score = 0
        self.has_flag = 0
        self.time = 300

        self.reset()


    def reset(self):
        self.agent_pos = np.array([0,0])
        self.prevdistance = 2 * math.sqrt(GRID_SIZE-1)
        self.player_pos = np.array([GRID_SIZE-1, GRID_SIZE-1])
        self._respawn_flag()
        self.agent_score = 0
        self.player_score = 0
        self.time = 300

        return self._get_state()


    def _get_state(self):
        return tuple([*self.agent_pos, *self.flag_pos])


    def move_agent(self, action):
        # agent movement
        if action == 0:  # up
            self.agent_pos[1] = max(self.agent_pos[1]-1, 0)
        elif action == 1:  # down
            self.agent_pos[1] = min(self.agent_pos[1]+1, GRID_SIZE-1)
        elif action == 2:  # left
            self.agent_pos[0] = max(self.agent_pos[0]-1, 0)
        elif action == 3:  # right
            self.agent_pos[0] = min(self.agent_pos[0]+1, GRID_SIZE-1)


    def move_player(self, key):
        if key == pygame.K_UP:
            self.player_pos[1] = max(self.player_pos[1]-1, 0)
        if key == pygame.K_DOWN:
            self.player_pos[1] = min(self.player_pos[1]+1, GRID_SIZE-1)
        if key == pygame.K_LEFT:
            self.player_pos[0] = max(self.player_pos[0]-1, 0)
        if key == pygame.K_RIGHT:
            self.player_pos[0] = min(self.player_pos[0]+1, GRID_SIZE-1)


    def score(self):
    # flag check
        reward = 0
        done = False
        distance = math.sqrt((self.agent_pos[0] - self.flag_pos[0])**2 + (self.agent_pos[1] - self.flag_pos[1])**2)

        if self.agent_pos[0] == self.flag_pos[0] and self.agent_pos[1] == self.flag_pos[1]:
            self.has_flag = 1
            self.agent_score += 1
            reward = 100
            done = True
            self._respawn_flag()
        else:
            if self.player_pos[0] == self.flag_pos[0] and self.player_pos[1] == self.flag_pos[1]:
                reward = -100
                self.player_score += 1
                done = True
                self._respawn_flag()
            else:
                reward = -1  # small penalty
                if distance < self.prevdistance:
                    reward += 2
                else:
                    if distance > self.prevdistance:
                        reward -= 2
        
        self.prevdistance = distance

        return reward, done


    def _respawn_flag(self):
        while True:
            self.flag_pos = np.random.randint(0, GRID_SIZE, size=2)
            if ((self.flag_pos[0] != self.agent_pos[0] or self.flag_pos[1] != self.agent_pos[1])  # different positions
                and (self.flag_pos[0] != self.player_pos[0] or self.flag_pos[1] != self.player_pos[1])):
                break


    def render(self):
        screen.fill(WHITE)

        # flag
        fx, fy = self.flag_pos
        pygame.draw.rect(screen, GREEN, (fx*CELL_SIZE, fy*CELL_SIZE, CELL_SIZE, CELL_SIZE))

        # agent
        ax, ay = self.agent_pos
        pygame.draw.rect(screen, RED, (ax*CELL_SIZE, ay*CELL_SIZE, CELL_SIZE, CELL_SIZE))

        # player
        px, py = self.player_pos
        pygame.draw.rect(screen, BLUE, (px*CELL_SIZE, py*CELL_SIZE, CELL_SIZE, CELL_SIZE))

        # grid
        for x in range(0, WIDTH, CELL_SIZE):
            pygame.draw.line(screen, BLACK, (x, 0), (x, HEIGHT))

        for y in range(0, HEIGHT, CELL_SIZE):
            pygame.draw.line(screen, BLACK, (0, y), (WIDTH, y))

        # score
        font = pygame.font.SysFont("Arial", 20)
        score_text = font.render(f"Agent: {self.agent_score}  Player: {self.player_score}  Time left: {self.time/10} sec", True, BLACK)
        screen.blit(score_text, (10, 10))

        pygame.display.flip()


    def final_screen(self):
        screen.fill(WHITE)

        font = pygame.font.SysFont("Arial", 30)

        if self.agent_score > self.player_score:
            text = font.render("Agent wins", True, RED)
        elif self.agent_score < self.player_score:
            text = font.render("Player wins", True, BLUE)
        else:
            text = font.render("Tie", True, BLACK)

        screen.blit(text, (WIDTH//2 - text.get_width()//2, HEIGHT//2)) # copy the pixels from one surface onto another
        pygame.display.flip() # otherwise the modifications remain invisible
        pygame.time.wait(3000) # 3 seconds

In [38]:
# Q-Learning functions
def choose_action(state):
    if np.random.rand() < epsilon:
        return np.random.choice(4)
    else:
        ax, ay, fx, fy = state
        return np.argmax(Q[ax, ay, fx, fy])

def update_Q(state, action, reward, next_state):
    ax, ay, fx, fy = state
    nx, ny, nfx, nfy = next_state
    Q[ax, ay, fx, fy, action] += alpha * (reward + gamma * np.max(Q[nx, ny, nfx, nfy]) - Q[ax, ay, fx, fy, action])


In [39]:
EPISODES = 5000
game = Game()

for ep in range(EPISODES):
    state = game.reset()
    done = False
    while not done:
        state = game._get_state()
        action = choose_action(state)
        game.move_agent(action)
        reward, done = game.score()
        next_state = game._get_state()

        # update q-table
        update_Q(state, action, reward, next_state)

np.save("q_table.npy", Q)
print("Training finished")

Training finished


In [None]:
Q = np.load("q_table.npy")

running = True
epsilon = 0  # no exploration in the game

while running and game.time > 0:
    just_reset = False

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
            break
        elif event.type == pygame.KEYDOWN:
            if event.key == pygame.K_r:
                game.reset()
                just_reset = True # no longer move the agent in this frame
            if event.key == pygame.K_q or event.key == pygame.K_ESCAPE:
                running = False
                break

    if not running:
        break

    keys = pygame.key.get_pressed()

    if keys[pygame.K_UP]:
        game.move_player(pygame.K_UP)
    if keys[pygame.K_DOWN]:
        game.move_player(pygame.K_DOWN)
    if keys[pygame.K_LEFT]:
        game.move_player(pygame.K_LEFT)
    if keys[pygame.K_RIGHT]:
        game.move_player(pygame.K_RIGHT)


    if not just_reset:
        state = game._get_state()
        action = choose_action(state)
        game.move_agent(action)
        reward, done = game.score()
        next_state = game._get_state()
 
        # update q-table
        update_Q(state, action, reward, next_state)

    game.time -= 1

    game.render()
    clock.tick(FPS)

    if game.time <= 0:
        game.final_screen()
        game.reset()

pygame.quit()