# Imports

In [38]:
import numpy as np

from tqdm import tqdm

In [39]:
from nim.Nim import Nim

from agents.Minimax.MinimaxAgentV1 import MinimaxAgentV1
from agents.Minimax.MinimaxAgentV2 import MinimaxAgentV2

from agents.QLearning.QLearningAgentV1 import QLearningAgentV1
from agents.QLearning.QLearningAgentV2 import QLearningAgentV2

Constants

In [60]:
np.random.seed(42)

# Test parameters
EPISODES = 10000

# Game parameters
MISERE = False
MAX_PILE = 127
PILE_COUNT = 4
MAX_PILES = [MAX_PILE] * PILE_COUNT
INITIAL_PILES = np.random.randint(1, MAX_PILE, size=PILE_COUNT)

# Minimax parameters
MAX_DEPTH = 3

# QLearning Parameters
ALPHA = 0.5
EPSILLON = 0.1
GAMMA = 0.9
DECAY_RATE = 0.9999

ParametrizedAgent class (helper)

In [61]:
class ParametrizedAgent:
    def __init__(self, agent_class, *param_names):
        self.agent_class = agent_class
        self.param_names = param_names

    def __call__(self, **kwargs):
        params = {k: v for k, v in kwargs.items() if k in self.param_names}
        return self.agent_class(**params)

Agent selection

In [62]:
# AGENT = ParametrizedAgent(MinimaxAgentV1, "misere", "max_depth")
AGENT = ParametrizedAgent(MinimaxAgentV2, "misere", "max_depth")

# AGENT = ParametrizedAgent(QLearningAgentV1, "misere", "max_piles", "alpha", "epsilon", "gamma", "decay_rate")
# AGENT = ParametrizedAgent(QLearningAgentV2, "misere", "max_piles", "alpha", "epsilon", "gamma", "decay_rate")

Assert function (helper)

In [63]:
def hard_assert(_misere, _initial_piles, _winner):
    piles = np.array(_initial_piles)

    if _misere:
        if np.all(piles <= 1):
            assert _winner == np.sum(piles) % 2, "Misere Nim - Corner Case"

    else:
        assert _winner == int(np.bitwise_xor.reduce(piles) != _misere), f"{'Misere' if _misere else 'Normal'} Nim - All Cases"

def soft_assert(_misere, _initial_piles, _winner, _wins):
    piles = np.array(_initial_piles)
    nim_sum = np.bitwise_xor.reduce(piles)

    game_type = "M" if _misere else "N"
    start_sum = "=0" if int(nim_sum == 0) else ">0"
    p_who_won = "P" + str(_winner + 1)

    _wins[f"{game_type}_{start_sum}_{p_who_won}"] = _wins.get(f"{game_type}_{start_sum}_{p_who_won}", 0) + 1

# Agent Setup

In [64]:
misereAgent = AGENT(misere=True, max_piles=MAX_PILES, max_depth=MAX_DEPTH)
normalAgent = AGENT(misere=False, max_piles=MAX_PILES, max_depth=MAX_DEPTH)

# One game demo

In [65]:
game = Nim(
    initial_piles=INITIAL_PILES,
    misere=MISERE
)

winner = game.play(
    player1=misereAgent if MISERE else normalAgent,
    player2=misereAgent if MISERE else normalAgent
)

Normal game
Piles: [103  52  93  15]
Player 1 (MinimaxV2) takes 1 from pile 3
Agent 1 (MinimaxV2) NPM: 18096.00
Agent 2 (MinimaxV2) NPM: 18096.00
Piles: [103  52  93  14]
Player 2 (MinimaxV2) takes 103 from pile 0
Agent 1 (MinimaxV2) NPM: 18088.00
Agent 2 (MinimaxV2) NPM: 18088.00
Piles: [ 0 52 93 14]
Player 1 (MinimaxV2) takes 35 from pile 2
Agent 1 (MinimaxV2) NPM: 12509.67
Agent 2 (MinimaxV2) NPM: 12509.67
Piles: [ 0 52 58 14]
Player 2 (MinimaxV2) takes 58 from pile 2
Agent 1 (MinimaxV2) NPM: 9901.00
Agent 2 (MinimaxV2) NPM: 9901.00
Piles: [ 0 52  0 14]
Player 1 (MinimaxV2) takes 38 from pile 1
Agent 1 (MinimaxV2) NPM: 7928.80
Agent 2 (MinimaxV2) NPM: 7928.80
Piles: [ 0 14  0 14]
Player 2 (MinimaxV2) takes 14 from pile 1
Agent 1 (MinimaxV2) NPM: 6620.50
Agent 2 (MinimaxV2) NPM: 6620.50
Piles: [ 0  0  0 14]
Player 1 (MinimaxV2) takes 13 from pile 3
Agent 1 (MinimaxV2) NPM: 5675.00
Agent 2 (MinimaxV2) NPM: 5675.00
Piles: [0 0 0 1]
Player 2 (MinimaxV2) takes 1 from pile 3
Player 2 (Min

## 10k random game test (Misere)

In [66]:
wins = {}

mean_nodes = 0

for _ in tqdm(range(EPISODES)):
    misere = np.random.choice([True, False])
    initial_piles = list(np.random.randint(1, MAX_PILE, size=PILE_COUNT))

    game = Nim(
        initial_piles=initial_piles,
        misere=misere
    )

    winner, mn = game.play(
        player1=misereAgent if MISERE else normalAgent,
        player2=misereAgent if MISERE else normalAgent,
        verbose=False
    )

    mean_nodes += mn

    # hard_assert(misere, initial_piles, winner)
    soft_assert(misere, initial_piles, winner, wins)

for k, v in wins.items():
    print(f"{k}: {v}")

if mean_nodes:
    mean_nodes /= EPISODES
    print(f"\nMean nodes: {mean_nodes:.2f}")

  3%|▎         | 291/10000 [00:11<06:15, 25.85it/s]


KeyboardInterrupt: 