# Imports

In [1]:
import numpy as np

from tqdm import tqdm

In [2]:
from nim.Nim import Nim

from agents.Minimax.MinimaxAgentV1 import MinimaxAgentV1
from agents.Minimax.MinimaxAgentV2 import MinimaxAgentV2

from agents.QLearning.QLearningAgentV1 import QLearningAgentV1

ParametrizedAgent class (helper)

In [3]:
class ParametrizedAgent:
    def __init__(self, agent_class, *param_names):
        self.agent_class = agent_class
        self.param_names = param_names

    def __call__(self, **kwargs):
        params = {k: v for k, v in kwargs.items() if k in self.param_names}
        return self.agent_class(**params)

# AGENT = ParametrizedAgent(MinimaxAgentV1, "misere", "max_depth")
# AGENT = ParametrizedAgent(MinimaxAgentV2, "misere", "max_depth")
AGENT = ParametrizedAgent(QLearningAgentV1, "misere", "initial_piles", "alpha", "epsilon", "gamma", "decay_rate")

Constants

In [4]:
# Minimax Agents
MAX_DEPTH = 1

# QLearning Agents
ALPHA = 0.5
EPSILLON = 0.1
GAMMA = 0.9
DECAY_RATE = 0.9999

Assert function (helper)

In [5]:
def hard_assert(_misere, _initial_piles, _winner):
    piles = np.array(_initial_piles)

    if _misere:
        if np.all(piles <= 1):
            assert _winner == np.sum(piles) % 2, "Misere Nim - Corner Case"

    else:
        assert _winner == int(np.bitwise_xor.reduce(piles) != _misere), f"{'Misere' if misere else 'Normal'} Nim - All Cases"

# Game Setup

In [12]:
misere = True
initial_piles = [1, 0, 3, 1]

player1 = AGENT(misere=misere, initial_piles=initial_piles, max_depth=MAX_DEPTH)
player2 = AGENT(misere=misere, initial_piles=initial_piles, max_depth=MAX_DEPTH)

game = Nim(
    initial_piles=initial_piles,
    misere=misere
)

Q-values loaded from savedAgents/qlearning-1-0-3-1-True.json
Q-values loaded from savedAgents/qlearning-1-0-3-1-True.json


## One game test (verbose)

In [13]:
winner = game.play(
    player1=player1,
    player2=player2,
    verbose=True
)

hard_assert(misere, initial_piles, winner)

Misere game
Piles: [1, 0, 3, 1]
Player 1 (QLearningAgent agent) takes 1 from pile 2
Piles: [1, 0, 2, 1]
Player 2 (MinimaxV2 agent) takes 1 from pile 2
Piles: [1, 0, 1, 1]
Player 1 (QLearningAgent agent) takes 1 from pile 3
Piles: [1, 0, 1, 0]
Player 2 (MinimaxV2 agent) takes 1 from pile 0
Piles: [0, 0, 1, 0]
Player 1 (QLearningAgent agent) takes 1 from pile 2
Player 2 (MinimaxV2 agent) wins!


## 10k game test

In [14]:
wins = [0, 0]

for _ in range(10000):
    winner = game.play(
        player1=player1,
        player2=player2,
        verbose=False
    )

    hard_assert(misere, initial_piles, winner)
    wins[winner] += 1

## 10k random game test (Misere)

In [10]:
wins = [0, 0]

for _ in tqdm(range(10000)):
    misere = np.random.choice([True, False])
    initial_piles = list(np.random.randint(1, 255, size=8))

    player1 = AGENT(misere=misere, initial_piles=initial_piles, max_depth=MAX_DEPTH)
    player2 = AGENT(misere=misere, initial_piles=initial_piles, max_depth=MAX_DEPTH)

    game = Nim(
        initial_piles=initial_piles,
        misere=misere
    )

    winner = game.play(
        player1=player1,
        player2=player2,
        verbose=False
    )

    hard_assert(misere, initial_piles, winner)
    wins[winner] += 1

  0%|          | 0/10000 [00:00<?, ?it/s]

Training Q-Learning agent for 50000 episodes...
Episode 1000/50000 - Epsilon: 0.090483
Episode 2000/50000 - Epsilon: 0.081872
Episode 3000/50000 - Epsilon: 0.074081
Episode 4000/50000 - Epsilon: 0.067031
Episode 5000/50000 - Epsilon: 0.060652


  0%|          | 0/10000 [00:27<?, ?it/s]


KeyboardInterrupt: 