In [1]:
from stable_baselines3 import ppo
import gym
from pettingzoo.classic import chess_v5
import time
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Use gym to wrap the PettingZoo environment
class GymChessEnv(gym.Env):
    def __init__(self, **kwargs):
        self.env = chess_v5.env(render_mode='ansi', **kwargs)
        self.turn = 0
        self.reset()

    def reset(self):
        self.env.reset()
        self.done = False
        self.turn = 0
        self.obs = self.env.observe(f'player_{self.turn % 2}')
        self.action_space = self.env.action_space(f'player_{self.turn % 2}')
        self.observation_space = self.env.observe(f'player_{self.turn % 2}')['observation']
        self.action_mask = self.env.observe(f'player_{self.turn % 2}')['action_mask']
        return self.obs

    def step(self, action):
        self.env.step(action)
        done = self.env.terminations[f'player_{self.turn % 2}']
        reward = self.env.rewards[f'player_{self.turn % 2}']
        info = self.env.infos[f'player_{self.turn % 2}']
        self.turn += 1
        obs = self.env.observe(f'player_{self.turn % 2}')
        self.action_mask = self.env.observe(f'player_{self.turn % 2}')['action_mask']
        return obs, reward, done, info

    def render(self):
        return self.env.render()

In [3]:
# Create the environment
env = GymChessEnv()
env.reset()
print(env.render())

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


In [4]:
# Play the game using a random agent
_ = env.reset()
agent1_done = False
agent2_done = False
i = 0
#for i in range(10):
while True:
    # End the game if it is done
    if agent2_done or agent1_done:
        break
    print(f"--- Step: {i:03d} ---")
    # Player 1
    # Sample an action from the action mask
    print("All possible actions", np.where(env.action_mask > 0)[0])
    action = np.random.choice(np.where(env.action_mask > 0)[0])
    print(f"Selected action: {action}")
    agent2_obs, agent1_reward, agent1_done, agent1_info = env.step(action)
    print("Reward: ", agent1_reward)
    print("Info", agent1_info)
    # Player 2
    # Sample an action from the action mask
    print("All possible actions", np.where(env.action_mask > 0)[0])
    action = np.random.choice(np.where(env.action_mask > 0)[0])
    print(f"Selected action: {action}")
    agent1_obs, agent2_reward, agent2_done, agent2_info = env.step(action)
    print("Reward: ", agent1_reward)
    print("Info", agent2_info)
    print(env.render())
    i += 1

print("Game over")
# Summarize the game
print(f"Agent 1 reward: {agent1_reward}")
print(f"Agent 2 reward: {agent2_reward}")
print(f"Agent 1 done: {agent1_done}")
print(f"Agent 2 done: {agent2_done}")
print(f"Agent 1 info: {agent1_info}")
print(f"Agent 2 info: {agent2_info}")


--- Step: 000 ---
All possible actions [  77   85  643  645  661  669 1245 1253 1829 1837 2413 2421 2997 3005
 3563 3565 3581 3589 4165 4173]
Selected action: 4173
Reward:  0
Info {}
All possible actions [  77   85  643  645  661  669 1245 1253 1829 1837 2413 2421 2997 3005
 3563 3565 3581 3589 4165 4173]
Selected action: 1829
Reward:  0
Info {}
r n b q k b n r
p p p . p p p p
. . . p . . . .
. . . . . . . .
. . . . . . . P
. . . . . . . .
P P P P P P P .
R N B Q K B N R
--- Step: 001 ---
All possible actions [  77   85  643  645  661  669 1245 1253 1829 1837 2413 2421 2997 3005
 3563 3565 3581 3589 4092 4100 4311]
Selected action: 2997
Reward:  0
Info {}
All possible actions [  77   85  643  645  647  661  669 1175 1183 1191 1199 1207 1245 1253
 1756 1902 2338 2413 2421 2997 3005 3563 3565 3581 3589 4165 4173]
Selected action: 2421
Reward:  0
Info {}
r n b q k b n r
p p p . . p p p
. . . p . . . .
. . . . p . . .
. . . . . . . P
. . . . . P . .
P P P P P . P .
R N B Q K B N R
--- Step