In [3]:
import gym
from gym import spaces
import numpy as np


In [32]:
class HexEnv(gym.Env):
    def __init__(self, size=5):
        self.size = size
        self.action_space = spaces.Discrete(size * size)
        self.observation_space = spaces.Box(low=-1, high=1, shape=(size, size), dtype=np.int8)
        self.reset()
    
    def reset(self):
        self.board = np.zeros((self.size, self.size), dtype=np.int8)
        self.current_player = 1
        return self.board.copy()

    def step(self, action):
        x, y = divmod(action, self.size)
        if self.board[x][y] != 0:
            return self.board.copy(), -10, True, {}  # illegal move penalty
        
        self.board[x][y] = self.current_player
        done = self.check_win()
        reward = 1 if done else 0
        self.current_player *= -1
        return self.board.copy(), reward, done, {}

    def check_win(self):
        size = self.size
        player = self.current_player
        visited = set()

        def dfs(x, y):
            if (x, y) in visited:
                return False
            visited.add((x, y))

            if player == 1 and x == size - 1:
                return True
            if player == -1 and y == size - 1:
                return True

            directions = [(-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0)]
            for dx, dy in directions:
                nx, ny = x + dx, y + dy
                if 0 <= nx < size and 0 <= ny < size:
                    if self.board[nx][ny] == player:
                        if dfs(nx, ny):
                            return True
            return False

        if player == 1:
            for col in range(size):
                if self.board[0][col] == player:
                    if dfs(0, col):
                        return True
        else:
            for row in range(size):
                if self.board[row][0] == player:
                    if dfs(row, 0):
                        return True
        return False


Make sure I have everything installed:

In [13]:
env = HexEnv(size=5)

obs = env.reset()
print("Initial Board:")
print(obs)

# Try a move
action = 12  # e.g., center of 5x5
obs, reward, done, _ = env.step(action)
print("\nAfter Move:")
print(obs)


Initial Board:
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]

After Move:
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 1 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]


DFS

In [33]:
env = HexEnv(size=5)
env.reset()
for i in range(5):
    env.board[i][2] = 1
env.current_player = 1
print(env.board)
print("Player 1 win?", env.check_win())


[[0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 1 0 0]]
Player 1 win? True
