# Imports

In [1]:
import os
import numpy as np

from tqdm import tqdm

import matplotlib.pyplot as plt

In [2]:
from Nim.Nim import Nim
from Nim.NimLogic import NimLogic

from Agents.Minimax.MinimaxAgentV1 import MinimaxAgentV1
from Agents.Minimax.MinimaxAgentV2 import MinimaxAgentV2
from Agents.Minimax.MinimaxAgentV3 import MinimaxAgentV3

# Constants

In [3]:
# Random seed for reproducibility
np.random.seed(42)

# Test parameters
EPISODES = 10000

# Game parameters
PILE_COUNT = 4
MAX_PILE = 127

# Minimax parameters
MAX_DEPTH = 2

# Agent Testing Function

In [4]:
def test_agent(_misereAgent, _normalAgent, _misere, _initial_piles):
    explored_nodes = np.ndarray(EPISODES)
    moves_count = np.ndarray(EPISODES)
    mean_nodes = np.ndarray(EPISODES)

    for i in tqdm(range(EPISODES)):
        game = Nim(
            initial_piles=_initial_piles[i],
            misere=_misere[i]
        )

        agent = _misereAgent if _misere[i] else _normalAgent

        winner = game.play(
            player1=agent,
            player2=agent,
            verbose=False
        )

        assert winner == NimLogic.is_p_position(_initial_piles[i], _misere[i])

        agent.compute_mean_nodes()

        mean_nodes[i] = agent.mean_nodes
        moves_count[i] = agent.moves_count
        explored_nodes[i] = agent.nodes_explored

    weighted_mean_nodes = explored_nodes.sum() / moves_count.sum()
    unweighted_mean_nodes = mean_nodes.mean()

    avg_explored_nodes = explored_nodes.mean()
    avg_moves_count = moves_count.mean()

    values = [unweighted_mean_nodes, weighted_mean_nodes, avg_explored_nodes, avg_moves_count]

    labels = [
        "unweighted average explored nodes per move:",
        "weighted average explored nodes per move:",
        "average explored nodes per game:",
        "average moves per game:"
    ]

    label_width = max(len(lbl) for lbl in labels)

    for lbl, val in zip(labels, values):
        print(f"{lbl:<{label_width}} {val:>10.2f}")

    return explored_nodes, moves_count, mean_nodes

# Agent Initialization For First 2 Tests

In [5]:
misereAgent1 = MinimaxAgentV1(misere=True, max_depth=MAX_DEPTH)
misereAgent2 = MinimaxAgentV2(misere=True, max_depth=MAX_DEPTH)
misereAgent3 = MinimaxAgentV3(misere=True, max_depth=MAX_DEPTH)

normalAgent1 = MinimaxAgentV1(misere=False, max_depth=MAX_DEPTH)
normalAgent2 = MinimaxAgentV2(misere=False, max_depth=MAX_DEPTH)
normalAgent3 = MinimaxAgentV3(misere=False, max_depth=MAX_DEPTH)

os.makedirs("savedData/Minimax", exist_ok=True)

# Random Game All Agent Test - Max Pile

In [None]:
for i, max_pile in zip([0, 1, 2], [63, 127, 255]):
    initial_piles = np.random.randint(1, max_pile, size=(EPISODES, PILE_COUNT))
    misere = np.random.choice([True, False], size=EPISODES)

    print("------------------------------------------------------------")
    print(f"\tTesting with max_depth={MAX_DEPTH}, pile_count={PILE_COUNT}, max_pile={max_pile}")
    print("------------------------------------------------------------")

    test_v1 = test_agent(misereAgent1, normalAgent1, misere, initial_piles)
    test_v2 = test_agent(misereAgent2, normalAgent2, misere, initial_piles)
    test_v3 = test_agent(misereAgent3, normalAgent3, misere, initial_piles)

    np.savez(f"savedData/Minimax/minimax-v1-{max_pile}-MAX-PILE.npz", test_v1)
    np.savez(f"savedData/Minimax/minimax-v2-{max_pile}-MAX-PILE.npz", test_v2)
    np.savez(f"savedData/Minimax/minimax-v3-{max_pile}-MAX-PILE.npz", test_v3)

------------------------------------------------------------
	Testing with max_depth=2, pile_count=4, max_pile=63
------------------------------------------------------------


  1%|          | 86/10000 [00:02<04:18, 38.41it/s]

# Random Game All Agent Test - Pile Count


In [None]:
for i, pile_count in zip([0, 1, 2], [2, 4, 8]):
    initial_piles = np.random.randint(1, MAX_PILE, size=(EPISODES, pile_count))
    misere = np.random.choice([True, False], size=EPISODES)

    print("------------------------------------------------------------")
    print(f"\tTesting with max_depth={MAX_DEPTH}, pile_count={pile_count}, max_pile={MAX_PILE}")
    print("------------------------------------------------------------")

    test_v1 = test_agent(misereAgent1, normalAgent1, misere, initial_piles)
    test_v2 = test_agent(misereAgent2, normalAgent2, misere, initial_piles)
    test_v3 = test_agent(misereAgent3, normalAgent3, misere, initial_piles)

    np.savez(f"savedData/Minimax/minimax-v1-{pile_count}-PILE-COUNT.npz", test_v1)
    np.savez(f"savedData/Minimax/minimax-v2-{pile_count}-PILE-COUNT.npz", test_v2)
    np.savez(f"savedData/Minimax/minimax-v3-{pile_count}-PILE-COUNT.npz", test_v3)

# Random Game All Agent Test - Max Depth


In [None]:
INITIAL_PILES = np.random.randint(1, MAX_PILE, size=(EPISODES, PILE_COUNT))
MISERE = np.random.choice([True, False], size=EPISODES)

for i, max_depth in zip([0, 1, 2], [1, 2, 3]):
    _misereAgent1 = MinimaxAgentV1(misere=True, max_depth=max_depth)
    _misereAgent2 = MinimaxAgentV2(misere=True, max_depth=max_depth)
    _misereAgent3 = MinimaxAgentV3(misere=True, max_depth=max_depth)

    _normalAgent1 = MinimaxAgentV1(misere=False, max_depth=max_depth)
    _normalAgent2 = MinimaxAgentV2(misere=False, max_depth=max_depth)
    _normalAgent3 = MinimaxAgentV3(misere=False, max_depth=max_depth)

    print("------------------------------------------------------------")
    print(f"\tTesting with max_depth={max_depth}, pile_count={PILE_COUNT}, max_pile={MAX_PILE}")
    print("------------------------------------------------------------")

    test_v1 = test_agent(_misereAgent1, _normalAgent1, MISERE, INITIAL_PILES)
    test_v2 = test_agent(_misereAgent2, _normalAgent2, MISERE, INITIAL_PILES)
    test_v3 = test_agent(_misereAgent3, _normalAgent3, MISERE, INITIAL_PILES)

    np.savez(f"savedData/Minimax/minimax-v1-{max_depth}-MAX-DEPTH.npz", test_v1)
    np.savez(f"savedData/Minimax/minimax-v2-{max_depth}-MAX-DEPTH.npz", test_v2)
    np.savez(f"savedData/Minimax/minimax-v3-{max_depth}-MAX-DEPTH.npz", test_v3)

# Plotting results

In [None]:
def plot_results(test_type, values):
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    axes = axes.ravel()

    colors = ['#4B0082', '#4169E1', '#9370DB']
    models = ['Minimax V1', 'Minimax V2', 'Minimax V3']
    metrics = ['Unweighted Avg Nodes/Move', 'Weighted Avg Nodes/Move',
              'Avg Nodes/Game', 'Avg Moves/Game']

    for metric_idx in range(4):
        ax = axes[metric_idx]

        for model_idx, model in enumerate(['v1', 'v2', 'v3']):
            metric_data = []

            for val in values:
                data = np.load(f"minimax-{model}-{val}-{test_type}.npz")
                test_data = data.get('arr_0')

                explored_nodes, moves_count, mean_nodes = test_data

                if metric_idx == 0:
                    metric_value = mean_nodes.mean()
                elif metric_idx == 1:
                    metric_value = explored_nodes.sum() / moves_count.sum()
                elif metric_idx == 2:
                    metric_value = explored_nodes.mean()
                else:
                    metric_value = moves_count.mean()

                metric_data.append(metric_value)

            ax.plot(values, metric_data, color=colors[model_idx],
                   marker='o', label=models[model_idx])

        ax.set_xlabel(test_type.replace('-', ' '))
        ax.set_ylabel(metrics[metric_idx])
        ax.grid(True, alpha=0.3)
        if metric_idx == 0:
            ax.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_results('MAX-PILE', [63, 127, 255])
plot_results('PILE-COUNT', [2, 4, 8])
plot_results('MAX-DEPTH', [1, 2, 3])