In [None]:
import os
from tqdm import tqdm

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

In [None]:
from Nim.Nim import Nim
from Nim.NimLogic import NimLogic

from Agents.QLearningAgent import QLearningAgent

In [None]:
np.random.seed(42)

MAX_PILE = 7
PILE_COUNT = 4

EPISODES = 1000000

In [None]:
def test_agent(_misereAgent, _normalAgent, _misere, _initial_piles):
    for i in tqdm(range(EPISODES)):
        game = Nim(
            initial_piles=_initial_piles[i],
            misere=_misere[i]
        )

        agent = _misereAgent if _misere[i] else _normalAgent

        winner = game.play(
            player1=agent,
            player2=agent,
            verbose=False
        )

        """ AGENT VALIDATION """
        assert winner == NimLogic.is_p_position(_initial_piles[i], _misere[i]), "Bad agent!"

In [None]:
def run_tests(misere_agents, normal_agents, pile_count, max_pile, episodes):
    print("-" * 60)

    initial_piles = np.random.randint(1, max_pile, size=(episodes, pile_count))

    print(f"Configuration: pile_count: {pile_count}, max_pile: {max_pile}")

    print("-" * 60)
    misere_modes = np.random.choice([True, False], size=episodes)

    for agent_key in misere_agents.keys():
        misere_agent = misere_agents[agent_key]
        normal_agent = normal_agents[agent_key]

        test_agent(misere_agent, normal_agent, misere_modes, initial_piles)

In [None]:
misereAgents = {
    'n': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=100000),
    'c': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, canonical=True),
    'r': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=1000, reduced=True)
}

normalAgents = {
    'n': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=100000),
    'c': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, canonical=True),
    'r': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=1000, reduced=True)
}

In [None]:
run_tests(misereAgents, normalAgents, pile_count=PILE_COUNT, max_pile=MAX_PILE, episodes=EPISODES)