## Environment Setup ##

### Required Imports ###
1. chess: The python-chess library. It handles the basic chess board logic such as legal moves, checkmate, etc.
2. numpy: The numerical computing library. It has been used for the tensors representing the chess board.
3. AECEnv: The Agent Environment Cycle. It builds the environment to handle the cycle of agents' turn sequence.
4. AgentSelector: A utitlity which manages the turns of the agents.
5. spaces: It is a gym-style API used to define action and observation spaces (what agents can do and observe).
6. gym: It has been used for the general reinforcement learning compatibility.

In [10]:
import chess
import numpy as np
from pettingzoo import AECEnv
from pettingzoo.utils.agent_selector import AgentSelector
from gymnasium import spaces
import gym
import random

### Chess Environment Class ###
This defines the basic chess environment. The environment inherits from AECEnv, a part of the pettingzoo framework, for the multiagent interactions.
- The "metadata" specifies environment settings. 
- The "init(self)" constructor method that initializes the environment. 
- The "reset()" resets the environment for a new game. 
- The "observe()" returns the current board state in tensor format which is supposed to be used by the agent for the observation. 
- The "board_to_tensor()" converts the board to a tensor. 
- The "step()" handles the agent's action, checks for the end of game and returns the state. 
- The "render()" print the current board state. 

In [33]:
class ChessEnvironment(AECEnv):
    metadata = {'render_modes': ['human'], 'name': "Chess-v0"}

    def __init__(self):
        super().__init__()
        self.board = chess.Board()                                         # Chess board
        self.agents = ["white", "black"]                                   # The Agents for the game
        self.possible_agents = self.agents[:]                              # All the possible agents for the agent selection
        self.agent_selector = AgentSelector(self.agents)                   # The agent selector to choose the agent for the turn
        self.agent_order = self._agent_selector.reset()                    # Assigns the agent a turn in order starting from white
        self.rewards = {agent: 0 for agent in self.agents}                 # Stores the reward for each agent, starts from 0
        self.dones = {agent: False for agent in self.agents}               # Check for the game over state for the current agent
        self.infos = {agent: {} for agent in self.agents}
        self.current_agent = self.agent_order[0]

        # 4672 is upper bound of legal chess moves in UCI encoding
        self.action_space = {agent: spaces.Discrete(4672) for agent in self.agents}     # Sets the action space for each agent, all the legal moves
        
        # A tensor representing the board, with shape (8, 8, 12) for encoding all possible pieces and positions
        self.observation_space = {agent: spaces.Box(low=0, high=1, shape=(8, 8, 12), dtype=np.int8) for agent in self.agents}

    # This function resets everything, the agent turn order, their reward, game state, board
    def reset(self, seed=None, options=None):
        self.board.reset()
        self.agent_order = self._agent_selector.reset()
        self.rewards = {agent: 0 for agent in self.agents}
        self.dones = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}
        self.current_agent = self.agent_order[0]
        self._cumulative_rewards = {agent: 0 for agent in self.agents}

    # Calls the board to tensor function and returns the tensor of the current state of the board
    def observe(self, agent):
        return self._board_to_tensor()

    # Converts the board to a tensor format and return that tensor
    def _board_to_tensor(self):
        # Encode the board into (8, 8, 12) binary tensor
        # 6 pieces x 2 colors
        piece_map = self.board.piece_map()
        tensor = np.zeros((8, 8, 12), dtype=np.int8)

        # Iterating over each piece and assigning to a tensor
        # Square is a number between 0 and 63 (inclusive)
        for square, piece in piece_map.items():
            row = 7 - (square // 8)                                         # square // 8 gives row index, row 0 is the bottom row, 7 is the top
            col = square % 8                                                # gives the column index, column 0 starts from left and 7 is the last
            piece_type = piece.piece_type - 1  # 0 to 5                     # gives the type(pawn, knight), subtracting 1 for easier tensor indexing
            color_offset = 0 if piece.color == chess.WHITE else 6           # tells which agent the current piece belongs to
            tensor[row, col, piece_type + color_offset] = 1                 # encoding the piece into tensor
        return tensor

    # performs the agent action, checks the game state, updates the reward and the agent
    def step(self, action):
        agent = self.current_agent                                         # Gets the current agent whose turn it is to make a move
        legal_moves = list(self.board.legal_moves)                         # Checks the action space of the agent for all the valid moves left
        move = None

        # Checks if the action is a valid move
        if 0 <= action < len(legal_moves):
            move = legal_moves[action]
        else:
            # Invalid move: assign loss and return
            self.rewards[agent] = -1
            self.dones = {a: True for a in self.agents}
            return self.observe(agent), -1, True, {}

        # Updating the board according to the move made
        self.board.push(move)

        # Reward if game ended
        if self.board.is_game_over():
            result = self.board.result()
            if result == "1-0":
                self.rewards = {"white": 1, "black": -1}
            elif result == "0-1":
                self.rewards = {"white": -1, "black": 1}
            else:
                self.rewards = {"white": 0, "black": 0}
            self.dones = {agent: True for agent in self.agents}
        else:
            self.rewards = {agent: 0 for agent in self.agents}
            self.dones = {agent: False for agent in self.agents}

        # Changing the agent for the next turn
        self.current_agent = self._agent_selector.next()
        return self.observe(self.current_agent), self.rewards[self.current_agent], self.dones[self.current_agent], self.infos[self.current_agent]

    # prints the board in human understandable format
    def render(self):
        print(self.board)

    def close(self):
        pass

In [22]:
# # Testing
# env = ChessEnvironment()

# # Reset environment
# env.reset()

# # Test by picking a random legal action for the current agent
# legal_moves = list(env.board.legal_moves)
# action = random.choice(range(len(legal_moves)))

# # Perform the action in the environment
# obs, reward, done, info = env.step(action)

# # Print the board and the result
# env.render()
# print(f"Reward: {reward}, Done: {done}")

In [29]:
env = ChessEnvironment()

In [31]:
env.reset()
env.render()
print()

# Play until the game is over
while not all(env.dones.values()):
    agent = env.current_agent

    # Get all legal moves
    legal_moves = list(env.board.legal_moves)
    
    if not legal_moves:
        print(f"No legal moves for {agent}, skipping turn.")
        env.agent_selection = env._agent_selector.next()
        continue

    # Choose a random move
    move_index = random.choice(range(len(legal_moves)))
    
    # Step with the chosen action
    env.step(move_index)

    # Display the board
    print(f"\nMove by {agent}: {legal_moves[move_index]}")
    env.render()

# Final game result
result = env.board.result()
print("\nGame Over!")
print("Result:", result)

if result == "1-0":
    print("White wins")
elif result == "0-1":
    print("Black wins")
else:
    print("Draw")

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


Move by w: d2d4
r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . P . . . .
. . . . . . . .
P P P . P P P P
R N B Q K B N R

Move by black: e7e5
r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . P . . . .
. . . . . . . .
P P P . P P P P
R N B Q K B N R

Move by white: e1d2
r n b q k b n r
p p p p . p p p
. . . . . . . .
. . . . p . . .
. . . P . . . .
. . . . . . . .
P P P K P P P P
R N B Q . B N R

Move by black: g8h6
r n b q k b . r
p p p p . p p p
. . . . . . . n
. . . . p . . .
. . . P . . . .
. . . . . . . .
P P P K P P P P
R N B Q . B N R

Move by white: g1f3
r n b q k b . r
p p p p . p p p
. . . . . . . n
. . . . p . . .
. . . P . . . .
. . . . . N . .
P P P K P P P P
R N B Q . B . R

Move by black: e5d4
r n b q k b . r
p p p p . p p p
. . . . . . . n
. . . . . . . .
. . . p . . . .
. . . . . N . .
P P P K P P P

In [32]:
print("Game over due to:")
print("Stalemate:", env.board.is_stalemate())
print("Repetition:", env.board.is_repetition())
print("50-move rule:", env.board.can_claim_fifty_moves())
print("Insufficient material:", env.board.is_insufficient_material())


Game over due to:
Stalemate: False
Repetition: False
50-move rule: False
Insufficient material: True
