In [2]:
import numpy as np
import pygame
import random
import torch
import torch.nn as nn

from cathedral_rl import cathedral_v0
from cathedral_rl.game.manual_policy import ManualPolicy

## Commandes 


Liste des touches possibles et leur effet :

- Espace (K_SPACE) : Parcourt la liste des pièces disponibles en passant de la plus grande à la plus petite.
- E (K_e) : Fait tourner la pièce dans le sens horaire (rotation à -90° par incrément, en tenant compte du plateau inversé).
- Q (K_q) : Fait tourner la pièce dans le sens anti-horaire (rotation à +90° par incrément).
- Flèche droite (K_RIGHT) : Déplace la pièce vers la droite, en vérifiant que le déplacement est légal.
- Flèche gauche (K_LEFT) : Déplace la pièce vers la gauche, en vérifiant que le déplacement est légal.
- Flèche haut (K_UP) : Déplace la pièce vers le haut (attention : en pygame, la coordonnée y augmente vers le bas), en vérifiant que le déplacement est légal.
- Flèche bas (K_DOWN) : Déplace la pièce vers le bas, en vérifiant que le déplacement est légal.


## Chose starting player

In [3]:
starting_player = "human" # human or AI

## Choise AI model

In [None]:
ai_model = "PPO" # DQN or PPO or random

In [4]:
if starting_player == "AI":
    controlled_agent = "player_0"
    ai_agent = "player_1"
else:
    controlled_agent = "player_1"
    ai_agent = "player_0"

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
env = cathedral_v0.env(
    board_size=8,
    render_mode="human",
    per_move_rewards=True,
    final_reward_score_difference=True,
).unwrapped

env.reset()

## Load PPO policy

In [7]:
from PPO.models.ppo_cnn import PPOCNN

### Model 

In [8]:
model_ppo = "cathedral_ppo_self_play_adversarial_final.pth"

### Hyperparameters

In [9]:
K_epochs = 40               # update policy for K epochs
eps_clip = 0.2              # clip parameter for PPO
gamma = 0.95                # discount factor
gae_lambda = 0.95

lr_actor = 0.005       # learning rate for actor network
lr_critic = 0.002

In [10]:
n_actions = env.action_space(ai_agent).n
obs_shape = env.observe(ai_agent)["observation"].shape 

checkpoint_ppo = torch.load(f"models/{model_ppo}.pth", weights_only=False)
ppo_agent = PPOCNN(
    obs_shape=obs_shape,
    action_dim=n_actions,
    lr_actor=lr_actor, 
    lr_critic=lr_critic,
    gamma=gamma,
    K_epochs=K_epochs,
    eps_clip=eps_clip
)
if 'model_state_dict' in checkpoint_ppo:
    ppo_agent.policy.load_state_dict(checkpoint_ppo['model_state_dict'])
else:
    # Direct state dict (from .save() method)
    ppo_agent.policy.load_state_dict(checkpoint_ppo)

FileNotFoundError: [Errno 2] No such file or directory: 'models/cathedral_ppo_self_play_adversarial_final.pth.pth'

## Get correct action method

In [None]:
def ai_action(observation, legal_moves):
    if ai_model == "random":
        return np.random.choice(legal_moves)
    if ai_model == "PPO":
        return ppo_agent.select_action_evaluation(observation["observation"], observation["action_mask"])

## Play against AI

In [None]:
env.reset()
env.render()


iter = 1

# Agent_id can be 0 or 1 : indicates starting player
if starting_player == "AI":
    human_agent_id = 1
else:
    human_agent_id = 0

manual_policy = ManualPolicy(env, agent_id=human_agent_id) # Policy controlled by player

while env.agents:
    observation, reward, termination, truncation, info = env.last()
    mask = observation["action_mask"]
    legal_moves = [i for i, valid in enumerate(observation["action_mask"]) if valid]
    agent = env.agent_selection
    state = observation["observation"]

    print(
        f"\nTurn: {iter} | ({agent}) "
        f"Legal pieces : {list(env.legal_pieces[agent])}, "
        f"Legal moves total: {np.count_nonzero(mask)}, "
        f"Remaining pieces: {env.board.unplaced_pieces[agent]}"
    )

    if agent == manual_policy.agent:                # Human action
        action = manual_policy(observation, agent)
    else:                                           # AI action
        state = observation["observation"]
        action = ai_action(observation, legal_moves)

    env.step(action)

    print(
        f"Turn: {iter} | "
        f"Action: {action}, "
        f"Piece: {env.board.action_to_piece_map(action)[0]}, "
        f"Position: {env.board.action_to_pos_rotation_mapp(agent, action)[0]}, "
    )
    print(
        f"Turn: {iter} | Reward: {env.rewards[agent]}, "
        f"Cumulative reward: {env._cumulative_rewards[agent]}, "
    )
    if env.turns["player_0"] == env.turns["player_1"]:
        print()
        for agent in env.agents:
            print(
                f"SCORE ({agent}): {env.score[agent]['total']:0.2f}, "
                f"Squares/turn: {env.score[agent]['squares_per_turn']:0.2f}, "
                f"Remaining pieces difference: {env.score[agent]['remaining_pieces']}, "
                f"Territory difference: {env.score[agent]['territory']}"
            )

    iter += 1

print("Terminated") if termination else print("Truncated")
print("\nWINNER: ", env.winner)
for agent in env.possible_agents:
    print(f"\n{agent} Final reward: {env.rewards[agent]}")
    print(f"{agent} Cumulative reward: {env._cumulative_rewards[agent]}")
    print(
        f"{agent} Final remaining pieces: {[p.name for p in env.final_pieces[agent]]}"
    )
    print(
        f"{agent} Score: {env.score[agent]['total']:0.2f}, "
        f"Squares/turn: {env.score[agent]['squares_per_turn']:0.2f}, "
        f"Remaining pieces difference: {env.score[agent]['remaining_pieces']}, "
        f"Territory difference: {env.score[agent]['territory']}"
    )
pygame.quit()


Turn: 1 | (player_1) Legal pieces : [14], Legal moves total: 120, Remaining pieces: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]


2025-03-12 17:25:58.682 python[42405:6182076] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-12 17:25:58.682 python[42405:6182076] +[IMKInputSession subclass]: chose IMKInputSession_Modern


Turn: 1 | Action: 1678, Piece: 14, Position: (3, 3), 
Turn: 1 | Reward: 0, Cumulative reward: 0, 

SCORE (player_1): 0.00, Squares/turn: 0.00, Remaining pieces difference: 0, Territory difference: 0
SCORE (player_0): 0.00, Squares/turn: 0.00, Remaining pieces difference: 0, Territory difference: 0

Turn: 2 | (player_0) Legal pieces : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], Legal moves total: 1139, Remaining pieces: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Turn: 2 | Action: 1489, Piece: 13, Position: (1, 1), 
Turn: 2 | Reward: 3, Cumulative reward: 3, 

Turn: 3 | (player_1) Legal pieces : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], Legal moves total: 924, Remaining pieces: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Turn: 3 | Action: 1612, Piece: 13, Position: (6, 1), 
Turn: 3 | Reward: 3, Cumulative reward: 3, 

SCORE (player_1): 5.00, Squares/turn: 5.00, Remaining pieces difference: 0, Territory difference: 0
SCORE (player_0): 5.00, Squares/turn: 5.00, Remaining p

: 