# Imports

In [1]:
import numpy as np

from tqdm import tqdm

In [2]:
from nim.Nim import Nim

from agents.Minimax.MinimaxAgentV1 import MinimaxAgentV1
from agents.Minimax.MinimaxAgentV2 import MinimaxAgentV2

from agents.QLearning.QLearningAgentV1 import QLearningAgentV1
from agents.QLearning.QLearningAgentV2 import QLearningAgentV2

ParametrizedAgent class (helper)

In [3]:
class ParametrizedAgent:
    def __init__(self, agent_class, *param_names):
        self.agent_class = agent_class
        self.param_names = param_names

    def __call__(self, **kwargs):
        params = {k: v for k, v in kwargs.items() if k in self.param_names}
        return self.agent_class(**params)

Agent selection

In [4]:
# AGENT = ParametrizedAgent(MinimaxAgentV1, "misere", "max_depth")
# AGENT = ParametrizedAgent(MinimaxAgentV2, "misere", "max_depth")

# AGENT = ParametrizedAgent(QLearningAgentV1, "misere", "max_piles", "alpha", "epsilon", "gamma", "decay_rate")
AGENT = ParametrizedAgent(QLearningAgentV2, "misere", "max_piles", "alpha", "epsilon", "gamma", "decay_rate")

Constants

In [5]:
# Game related
MISERE = False
MAX_PILE = 127
PILE_COUNT = 8
MAX_PILES = [MAX_PILE] * PILE_COUNT
INITIAL_PILES = [21, 20, 19, 18, 56]

# Minimax Agents
MAX_DEPTH = 1

# QLearning Agents
ALPHA = 0.5
EPSILLON = 0.1
GAMMA = 0.9
DECAY_RATE = 0.9999

Assert function (helper)

In [6]:
def hard_assert(_misere, _initial_piles, _winner):
    piles = np.array(_initial_piles)

    if _misere:
        if np.all(piles <= 1):
            assert _winner == np.sum(piles) % 2, "Misere Nim - Corner Case"

    else:
        assert _winner == int(np.bitwise_xor.reduce(piles) != _misere), f"{'Misere' if _misere else 'Normal'} Nim - All Cases"

def soft_assert(_misere, _initial_piles, _winner, _wins):
    piles = np.array(_initial_piles)
    nim_sum = np.bitwise_xor.reduce(piles)

    game_type = "M" if _misere else "N"
    start_sum = "=0" if int(nim_sum == 0) else ">0"
    p_who_won = "P" + str(_winner + 1)

    _wins[f"{game_type}_{start_sum}_{p_who_won}"] = _wins.get(f"{game_type}_{start_sum}_{p_who_won}", 0) + 1

# Agent Setup

In [None]:
misereAgent = AGENT(misere=True, max_piles=MAX_PILES, max_depth=MAX_DEPTH)
normalAgent = AGENT(misere=False, max_piles=MAX_PILES, max_depth=MAX_DEPTH)

  5%|▌         | 2617/50000 [00:07<02:20, 337.85it/s]

# One game demo

In [None]:
game = Nim(
    initial_piles=INITIAL_PILES,
    misere=MISERE
)

winner = game.play(
    player1=misereAgent if MISERE else normalAgent,
    player2=misereAgent if MISERE else normalAgent
)

## 10k random game test (Misere)

In [None]:
wins = {}

for _ in tqdm(range(10000)):
    misere = np.random.choice([True, False])
    initial_piles = list(np.random.randint(1, MAX_PILE, size=PILE_COUNT))

    game = Nim(
        initial_piles=initial_piles,
        misere=misere
    )

    winner = game.play(
        player1=misereAgent if MISERE else normalAgent,
        player2=misereAgent if MISERE else normalAgent,
        verbose=False
    )

    # hard_assert(misere, initial_piles, winner)
    soft_assert(misere, initial_piles, winner, wins)

for k, v in wins.items():
    print(f"{k}: {v}")