# Imports

In [9]:
import numpy as np

from tqdm import tqdm

In [10]:
from nim.Nim import Nim

from agents.Minimax.MinimaxAgentV1 import MinimaxAgentV1
from agents.Minimax.MinimaxAgentV2 import MinimaxAgentV2

from agents.QLearning.QLearningAgentV1 import QLearningAgentV1
from agents.QLearning.QLearningAgentV2 import QLearningAgentV2

ParametrizedAgent class (helper)

In [11]:
class ParametrizedAgent:
    def __init__(self, agent_class, *param_names):
        self.agent_class = agent_class
        self.param_names = param_names

    def __call__(self, **kwargs):
        params = {k: v for k, v in kwargs.items() if k in self.param_names}
        return self.agent_class(**params)

# AGENT = ParametrizedAgent(MinimaxAgentV1, "misere", "max_depth")
# AGENT = ParametrizedAgent(MinimaxAgentV2, "misere", "max_depth")
# AGENT = ParametrizedAgent(QLearningAgentV1, "misere", "max_piles", "alpha", "epsilon", "gamma", "decay_rate")
AGENT = ParametrizedAgent(QLearningAgentV2, "misere", "max_piles", "alpha", "epsilon", "gamma", "decay_rate")

Constants

In [12]:
# Game related
INITIAL_PILES = [21, 20, 19, 18, 56]
MISERE = False

# Minimax Agents
MAX_DEPTH = 1

# QLearning Agents
ALPHA = 0.5
EPSILLON = 0.1
GAMMA = 0.9
DECAY_RATE = 0.9999
MAX_PILES = [255] * 8

Assert function (helper)

In [13]:
def hard_assert(_misere, _initial_piles, _winner):
    piles = np.array(_initial_piles)

    if _misere:
        if np.all(piles <= 1):
            assert _winner == np.sum(piles) % 2, "Misere Nim - Corner Case"

    else:
        assert _winner == int(np.bitwise_xor.reduce(piles) != _misere), f"{'Misere' if _misere else 'Normal'} Nim - All Cases"

def soft_assert(_misere, _initial_piles, _winner, _wins):
    piles = np.array(_initial_piles)
    nim_sum = np.bitwise_xor.reduce(piles)

    game_type = "M" if _misere else "N"
    start_sum = "=0" if int(nim_sum == 0) else ">0"
    p_who_won = "P" + str(_winner + 1)

    _wins[f"{game_type}_{start_sum}_{p_who_won}"] = _wins.get(f"{game_type}_{start_sum}_{p_who_won}", 0) + 1

# Agent Setup

In [14]:
misereAgent = AGENT(misere=True, max_piles=MAX_PILES, max_depth=MAX_DEPTH)
normalAgent = AGENT(misere=False, max_piles=MAX_PILES, max_depth=MAX_DEPTH)

Q-values loaded from savedAgents/qlearningV2-255-255-255-255-255-255-255-255-True.json
Q-table dimensions: 1657433
Q-values loaded from savedAgents/qlearningV2-255-255-255-255-255-255-255-255-False.json
Q-table dimensions: 1657793


# One game demo

In [15]:
game = Nim(
    initial_piles=INITIAL_PILES,
    misere=MISERE
)

winner = game.play(
    player1=misereAgent if MISERE else normalAgent,
    player2=misereAgent if MISERE else normalAgent
)

Normal game
Piles: [21, 20, 19, 18, 56]
Player 1 (Q-LearningV2 agent) takes 13 from pile 0
Piles: [8, 20, 19, 18, 56]
Player 2 (Q-LearningV2 agent) takes 49 from pile 4
Piles: [8, 20, 19, 18, 7]
Player 1 (Q-LearningV2 agent) takes 3 from pile 3
Piles: [8, 20, 19, 15, 7]
Player 2 (Q-LearningV2 agent) takes 7 from pile 0
Piles: [1, 20, 19, 15, 7]
Player 1 (Q-LearningV2 agent) takes 10 from pile 1
Piles: [1, 10, 19, 15, 7]
Player 2 (Q-LearningV2 agent) takes 10 from pile 2
Piles: [1, 10, 9, 15, 7]
Player 1 (Q-LearningV2 agent) takes 4 from pile 1
Piles: [1, 6, 9, 15, 7]
Player 2 (Q-LearningV2 agent) takes 5 from pile 1
Piles: [1, 1, 9, 15, 7]
Player 1 (Q-LearningV2 agent) takes 1 from pile 1
Piles: [1, 0, 9, 15, 7]
Player 2 (Q-LearningV2 agent) takes 6 from pile 4
Piles: [1, 0, 9, 15, 1]
Player 1 (Q-LearningV2 agent) takes 1 from pile 0
Piles: [0, 0, 9, 15, 1]
Player 2 (Q-LearningV2 agent) takes 4 from pile 3
Piles: [0, 0, 9, 11, 1]
Player 1 (Q-LearningV2 agent) takes 7 from pile 2
Piles:

## 10k random game test (Misere)

In [16]:
wins = {}

for _ in tqdm(range(10000)):
    misere = np.random.choice([True, False])
    initial_piles = list(np.random.randint(1, 50, size=8))

    game = Nim(
        initial_piles=initial_piles,
        misere=misere
    )

    winner = game.play(
        player1=misereAgent if MISERE else normalAgent,
        player2=misereAgent if MISERE else normalAgent,
        verbose=False
    )

    # hard_assert(misere, initial_piles, winner)
    soft_assert(misere, initial_piles, winner, wins)

for k, v in wins.items():
    print(f"{k}: {v}")

100%|██████████| 10000/10000 [00:05<00:00, 1834.86it/s]

N_>0_P2: 2405
M_>0_P1: 2465
M_>0_P2: 2548
N_>0_P1: 2432
M_=0_P1: 41
N_=0_P2: 38
M_=0_P2: 35
N_=0_P1: 36



