# Imports

In [None]:
import numpy as np

from tqdm import tqdm

import matplotlib.pyplot as plt

In [None]:
from Nim.Nim import Nim
from Nim.NimLogic import NimLogic

from Agents.Minimax.MinimaxAgentV1 import MinimaxAgentV1
from Agents.Minimax.MinimaxAgentV2 import MinimaxAgentV2
from Agents.Minimax.MinimaxAgentV3 import MinimaxAgentV3

# Constants

In [None]:
# Random seed for reproducibility
np.random.seed(42)

# Test parameters
EPISODES = 10000

# Game parameters
PILE_COUNT = 2
MAX_PILE = 63

# Minimax parameters
MAX_DEPTH = 1

# Agent Testing Function

In [None]:
def test_agent(_misereAgent, _normalAgent, _misere, _initial_piles):
    explored_nodes = 0
    moves_count = 0
    mean_nodes = 0

    for i in tqdm(range(EPISODES)):
        game = Nim(
            initial_piles=_initial_piles[i],
            misere=_misere[i]
        )

        agent = _misereAgent if _misere[i] else _normalAgent

        winner = game.play(
            player1=agent,
            player2=agent,
            verbose=False
        )

        assert winner == NimLogic.is_p_position(_initial_piles[i], _misere[i])

        agent.compute_mean_nodes()
        mean_nodes += agent.mean_nodes
        moves_count += agent.moves_count
        explored_nodes += agent.nodes_explored

    weighted_mean_nodes = explored_nodes / moves_count
    unweighted_mean_nodes = mean_nodes / EPISODES

    explored_nodes /= EPISODES
    moves_count /= EPISODES

    values = [unweighted_mean_nodes, weighted_mean_nodes, explored_nodes, moves_count]

    labels = [
        "unweighted average explored nodes per move:",
        "weighted average explored nodes per move:",
        "average explored nodes per game:",
        "average moves per game:"
    ]

    label_width = max(len(lbl) for lbl in labels)

    for lbl, val in zip(labels, values):
        print(f"{lbl:<{label_width}} {val:>10.2f}")

    return values

# Agent Initialization For First 2 Tests

In [None]:
misereAgent1 = MinimaxAgentV1(misere=True, max_depth=MAX_DEPTH)
misereAgent2 = MinimaxAgentV2(misere=True, max_depth=MAX_DEPTH)
misereAgent3a = MinimaxAgentV3(misere=True, max_depth=MAX_DEPTH, reverse=True)
misereAgent3b = MinimaxAgentV3(misere=True, max_depth=MAX_DEPTH, reverse=False)

normalAgent1 = MinimaxAgentV1(misere=False, max_depth=MAX_DEPTH)
normalAgent2 = MinimaxAgentV2(misere=False, max_depth=MAX_DEPTH)
normalAgent3a = MinimaxAgentV3(misere=False, max_depth=MAX_DEPTH, reverse=True)
normalAgent3b = MinimaxAgentV3(misere=False, max_depth=MAX_DEPTH, reverse=False)

# Random Game All Agent Test - Max Pile

In [None]:
MAX_PILE_RESULTS = []

for max_pile in [63, 127, 255]:
    initial_piles = np.random.randint(1, max_pile, size=(EPISODES, PILE_COUNT))
    misere = np.random.choice([True, False], size=EPISODES)

    print("---------------------------------------------------------------------------------")
    print(f"\tTesting with max_depth={MAX_DEPTH}, pile_count={PILE_COUNT}, max_pile={max_pile}")
    print("---------------------------------------------------------------------------------")

    MAX_PILE_RESULTS.append(test_agent(misereAgent1, normalAgent1, misere, initial_piles))
    MAX_PILE_RESULTS.append(test_agent(misereAgent2, normalAgent2, misere, initial_piles))
    MAX_PILE_RESULTS.append(test_agent(misereAgent3a, normalAgent3a, misere, initial_piles))
    MAX_PILE_RESULTS.append(test_agent(misereAgent3b, normalAgent3b, misere, initial_piles))

# Random Game All Agent Test - Pile Count


In [None]:
PILE_COUNT_RESULTS = []

for pile_count in [2, 4, 8]:
    initial_piles = np.random.randint(1, MAX_PILE, size=(EPISODES, pile_count))
    misere = np.random.choice([True, False], size=EPISODES)

    print("---------------------------------------------------------------------------------")
    print(f"\tTesting with max_depth={MAX_DEPTH}, pile_count={pile_count}, max_pile={MAX_PILE}")
    print("---------------------------------------------------------------------------------")

    PILE_COUNT_RESULTS.append(test_agent(misereAgent1, normalAgent1, misere, initial_piles))
    PILE_COUNT_RESULTS.append(test_agent(misereAgent2, normalAgent2, misere, initial_piles))
    PILE_COUNT_RESULTS.append(test_agent(misereAgent3a, normalAgent3a, misere, initial_piles))
    PILE_COUNT_RESULTS.append(test_agent(misereAgent3b, normalAgent3b, misere, initial_piles))

# Random Game All Agent Test - Max Depth


In [None]:
MAX_DEPTH_RESULTS = []

INITIAL_PILES = np.random.randint(1, MAX_PILE, size=(EPISODES, PILE_COUNT))
MISERE = np.random.choice([True, False], size=EPISODES)

for max_depth in [1, 2, 3]:
    _misereAgent1 = MinimaxAgentV1(misere=True, max_depth=max_depth)
    _misereAgent2 = MinimaxAgentV2(misere=True, max_depth=max_depth)
    _misereAgent3a = MinimaxAgentV3(misere=True, max_depth=max_depth, reverse=True)
    _misereAgent3b = MinimaxAgentV3(misere=True, max_depth=max_depth, reverse=False)

    _normalAgent1 = MinimaxAgentV1(misere=False, max_depth=max_depth)
    _normalAgent2 = MinimaxAgentV2(misere=False, max_depth=max_depth)
    _normalAgent3a = MinimaxAgentV3(misere=False, max_depth=max_depth, reverse=True)
    _normalAgent3b = MinimaxAgentV3(misere=False, max_depth=max_depth, reverse=False)

    print("---------------------------------------------------------------------------------")
    print(f"\tTesting with max_depth={max_depth}, pile_count={PILE_COUNT}, max_pile={MAX_PILE}")
    print("---------------------------------------------------------------------------------")

    MAX_DEPTH_RESULTS.append(test_agent(_misereAgent1, _normalAgent1, MISERE, INITIAL_PILES))
    MAX_DEPTH_RESULTS.append(test_agent(_misereAgent2, _normalAgent2, MISERE, INITIAL_PILES))
    MAX_DEPTH_RESULTS.append(test_agent(_misereAgent3a, _normalAgent3a, MISERE, INITIAL_PILES))
    MAX_DEPTH_RESULTS.append(test_agent(_misereAgent3b, _normalAgent3b, MISERE, INITIAL_PILES))

# Plotting results

In [None]:
MAX_PILE_RESULTS = np.array(MAX_PILE_RESULTS).reshape(3, 4, 4)
PILE_COUNT_RESULTS = np.array(PILE_COUNT_RESULTS).reshape(3, 4, 4)
MAX_DEPTH_RESULTS = np.array(MAX_DEPTH_RESULTS).reshape(3, 4, 4)

colors = ['#4B0082', '#4169E1', '#9370DB', '#8A2BE2']
models = ['Minimax V1', 'Minimax V2', 'Minimax V3a', 'Minimax V3b']
metrics = ['Unweighted Avg Nodes/Move', 'Weighted Avg Nodes/Move',
           'Avg Nodes/Game', 'Avg Moves/Game']

x_values = {
    'max_pile': [63, 127, 255],
    'pile_count': [2, 4, 8],
    'max_depth': [1, 2, 3]
}

def plot_results(results, x_label, x_vals):
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    axes = axes.ravel()

    for metric_idx in range(4):
        ax = axes[metric_idx]
        for model_idx in range(4):
            ax.plot(x_vals, results[:, model_idx, metric_idx],
                    color=colors[model_idx], marker='o', label=models[model_idx])

        ax.set_xlabel(x_label)
        ax.set_ylabel(metrics[metric_idx])
        ax.grid(True, alpha=0.3)
        if metric_idx == 0:
            ax.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_results(MAX_PILE_RESULTS, 'Max Pile Size', x_values['max_pile'])
plot_results(PILE_COUNT_RESULTS, 'Pile Count', x_values['pile_count'])
plot_results(MAX_DEPTH_RESULTS, 'Max Depth', x_values['max_depth'])