In [1]:
from game import Move, Player, Game
from quixo import Quixo
from symmetry import SymmetryGenerator
from main import RandomPlayer, MyPlayer
from reinforcement_learning import MonteCarloPlayer
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### Check Symmetry Functionalities

In [2]:
# Example usage:
board = np.ones((5, 5), dtype=np.uint8) * -1
board[2, 1] = 0
board[1, 1] = 1
symmetry_generator = SymmetryGenerator()
all_symmetries = symmetry_generator.board_get_symmetries(board)

# Print all symmetries as full boards
print("All Symmetries:")
for label, state in all_symmetries:
    print(f"Transformation: {label}")
    print(state)

# Find the lexicographically minimum state and its transformation label
print("\n\nLexicographically minimum state:")
transf_performed, base_state = symmetry_generator.get_base_state(board)
print(f"Transformation: {transf_performed}")
print(base_state)

# Action
from_pos = (3, 1)
slide = Move.LEFT

# Get the action in the lexicographically minimum state
print("\n\nAction in lexicographically minimum state:")
base_from_pos, base_slide = symmetry_generator.get_base_action(from_pos, slide, transf_performed)
print(f"From position: {base_from_pos}")
print(f"Slide: {base_slide}")

# Retrieve the original action
print("\n\nOriginal action:")
original_from_pos, original_slide = symmetry_generator.get_original_action(base_from_pos, base_slide, transf_performed)
print(f"From position: {original_from_pos}")
print(f"Slide: {original_slide}")

All Symmetries:
Transformation: identity
[[-1 -1 -1 -1 -1]
 [-1  1 -1 -1 -1]
 [-1  0 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: rotate_90
[[-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1  1  0 -1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: rotate_180
[[-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1  0 -1]
 [-1 -1 -1  1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: rotate_270
[[-1 -1 -1 -1 -1]
 [-1 -1  0  1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: reflect_horizontal
[[-1 -1 -1 -1 -1]
 [-1 -1 -1  1 -1]
 [-1 -1 -1  0 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: reflect_vertical
[[-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1  0 -1 -1 -1]
 [-1  1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: reflect_diagonal
[[-1 -1 -1 -1 -1]
 [-1  1  0 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]
Transformation: reflect_antidiagonal
[[-1 -1 -1 -1 -1]
 [-1 -1  0  1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]


Lexicograph

### Example match
In this example, we'll show the phases of a game of Quixo between two random players. We're using our extended class `Quixo(Game)` which allows us to have more control over the visualization of each game state.

This is done in order to verify that the extensions that we have applied are working as expected.

In [3]:
g = Quixo()
player1 = RandomPlayer()
player2 = RandomPlayer()
winner = g.play(player1, player2, verbose=True, debug=True)
print(f"Winner: Player {winner}")


Current player: ❌
*****************
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ 

Number of possible actions: 44
Possible actions:  [((0, 0), <Move.BOTTOM: 1>), ((0, 4), <Move.TOP: 0>), ((0, 0), <Move.RIGHT: 3>), ((4, 0), <Move.LEFT: 2>), ((1, 0), <Move.BOTTOM: 1>), ((1, 0), <Move.LEFT: 2>), ((1, 0), <Move.RIGHT: 3>), ((1, 4), <Move.TOP: 0>), ((1, 4), <Move.LEFT: 2>), ((1, 4), <Move.RIGHT: 3>), ((0, 1), <Move.RIGHT: 3>), ((0, 1), <Move.TOP: 0>), ((0, 1), <Move.BOTTOM: 1>), ((4, 1), <Move.LEFT: 2>), ((4, 1), <Move.TOP: 0>), ((4, 1), <Move.BOTTOM: 1>), ((2, 0), <Move.BOTTOM: 1>), ((2, 0), <Move.LEFT: 2>), ((2, 0), <Move.RIGHT: 3>), ((2, 4), <Move.TOP: 0>), ((2, 4), <Move.LEFT: 2>), ((2, 4), <Move.RIGHT: 3>), ((0, 2), <Move.RIGHT: 3>), ((0, 2), <Move.TOP: 0>), ((0, 2), <Move.BOTTOM: 1>), ((4, 2), <Move.LEFT: 2>), ((4, 2), <Move.TOP: 0>), ((4, 2), <Move.BOTTOM: 1>), ((3, 0), <Move.BOTTOM: 1>), ((3, 0), <Move.LEFT: 2>), ((3, 0), <Move.RIGHT: 3>), ((3, 4), <Move.TOP: 0>), ((3, 4),

In [4]:
def collect_results(player1, player2, n_games=100):
    
    # 1. Initialize the results
    results = defaultdict(int)

    # 2. Play the games
    for _ in tqdm(range(n_games)):
        g = Quixo()
        winner = g.play(player1, player2, verbose=False, debug=False)
        results[winner] += 1
        
    return results

## Training process Monte Carlo Player

In [5]:
g = Quixo()
mc_player = MonteCarloPlayer()
opponent = RandomPlayer()

training_steps = [100, 250, 500]
training_results_move_first = []
training_results_move_second = []

previous_steps = 0

for steps in tqdm(training_steps):
    
    # 1. Train the RL-agent against the opponent for a given number of steps
    mc_player.train(steps - previous_steps)
    previous_steps = steps

    # 2. Play 100 games against the opponent making the first move
    results = collect_results(mc_player, opponent, n_games=100)
    training_results_move_first.append(results)

    # 3. Play 100 games against the opponent making the second move
    results = collect_results(opponent, mc_player, n_games=100)
    training_results_move_second.append(results)

# 4. Plot the results
def plot_results(training_results, move_first=True):
    plt.title(f"Monte Carlo Training Results ({'MC' if move_first else 'Random'} Player Moves First)")
    plt.plot(training_steps, [res[0] for res in training_results], label=('MC' if move_first else 'Random') + ' Player Wins', color='red')
    plt.plot(training_steps, [res[1] for res in training_results], label=('MC' if move_first else 'Random') + ' Player Losses', color='black')
    plt.legend()
    plt.xlabel("Training Steps")
    plt.ylabel("Number of Wins")
    plt.show()

plot_results(training_results_move_first, move_first=True)
plot_results(training_results_move_second, move_first=False)

  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:02<00:00, 33.62it/s]
100%|██████████| 100/100 [00:01<00:00, 71.75it/s]
  0%|          | 0/3 [00:04<?, ?it/s]


TypeError: collect_results() got an unexpected keyword argument 'verbose'