In [1]:
from tqdm import tqdm

import numpy as np

In [2]:
from nim.Nim import Nim
from nim.NimLogic import NimLogic

from agents.AlgorithmicAgent import AlgorithmicAgent

from agents.QLearningAgent import QLearningAgent

In [3]:
np.random.seed(42)

MAX_PILE = 7
PILE_COUNT = 4

EPISODES = 1000000

In [11]:
def test_agent(_misereAgent, _normalAgent, _misere, _initial_piles, _perfect=False):
    for i in tqdm(range(EPISODES)):
        game = Nim(
            initial_piles=_initial_piles[i],
            misere=_misere[i]
        )

        agent1 = _misereAgent if _misere[i] else _normalAgent
        agent2 = agent1 if _perfect else AlgorithmicAgent(misere=_misere[i])

        winner = game.play(
            player1=agent1,
            player2=agent2,
            verbose=False
        )

        """ AGENT VALIDATION """
        assert winner == NimLogic.is_p_position(_initial_piles[i], _misere[i]), "Bad agent!"

In [12]:
def run_tests(misere_agents, normal_agents, pile_count, max_pile, episodes, perfect=False):
    print("-" * 60)
    print(f"Configuration: pile_count: {pile_count}, max_pile: {max_pile}")
    print("-" * 60)

    initial_piles = np.random.randint(1, max_pile, size=(episodes, pile_count))
    misere_modes = np.random.choice([False, True], size=episodes)

    for agent_key in misere_agents.keys():
        misere_agent = misere_agents[agent_key]
        normal_agent = normal_agents[agent_key]

        print(f"Testing {misere_agent}")

        test_agent(misere_agent, normal_agent, misere_modes, initial_piles, perfect)

In [9]:
misereAgents = {
    'n': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=100000),
    'c': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, canonical=True),
    'r': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, reduced=True),
}

normalAgents = {
    'n': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=100000),
    'c': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, canonical=True),
    'r': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, reduced=True),
}

Loaded agent from ../savedAgents/QLearning/qlearning-4-7-misere-100000.json
QLearning Agent ready. Q-table size: 53374
Loaded agent from ../savedAgents/QLearning/qlearning-4-7-misere-canonical-10000.json
Canonical QLearning Agent ready. Q-table size: 4420
Loaded agent from ../savedAgents/QLearning/qlearning-4-7-misere-reduced-10000.json
Reduced QLearning Agent ready. Q-table size: 4438
Loaded agent from ../savedAgents/QLearning/qlearning-4-7-normal-100000.json
QLearning Agent ready. Q-table size: 53505
Loaded agent from ../savedAgents/QLearning/qlearning-4-7-normal-canonical-10000.json
Canonical QLearning Agent ready. Q-table size: 4446
Loaded agent from ../savedAgents/QLearning/qlearning-4-7-normal-reduced-10000.json
Reduced QLearning Agent ready. Q-table size: 4446


In [10]:
run_tests(misereAgents, normalAgents, pile_count=PILE_COUNT, max_pile=MAX_PILE, episodes=EPISODES)

------------------------------------------------------------
Configuration: pile_count: 4, max_pile: 7
------------------------------------------------------------
Testing QLearning Agent


100%|██████████| 1000000/1000000 [00:57<00:00, 17386.49it/s]


Testing Canonical QLearning Agent


100%|██████████| 1000000/1000000 [00:46<00:00, 21442.68it/s]


Testing Reduced QLearning Agent


100%|██████████| 1000000/1000000 [00:49<00:00, 20240.55it/s]


In [13]:
run_tests(misereAgents, normalAgents, pile_count=PILE_COUNT, max_pile=MAX_PILE, episodes=EPISODES, perfect=True)

------------------------------------------------------------
Configuration: pile_count: 4, max_pile: 7
------------------------------------------------------------
Testing QLearning Agent


100%|██████████| 1000000/1000000 [00:55<00:00, 17900.01it/s]


Testing Canonical QLearning Agent


100%|██████████| 1000000/1000000 [00:46<00:00, 21673.58it/s]


Testing Reduced QLearning Agent


100%|██████████| 1000000/1000000 [00:49<00:00, 20406.88it/s]
