In [7]:
from tqdm import tqdm

import numpy as np

In [8]:
from nim import NimGame
from nim import NimLogic

from agents import AlgorithmicAgent

from agents import QLearningAgent

In [9]:
np.random.seed(42)

MAX_PILE = 7
PILE_COUNT = 4

EPISODES = 1000000

In [10]:
def test_agent(_misereAgent, _normalAgent, _misere, _initial_piles, _perfect=False):
    for i in tqdm(range(EPISODES)):
        game = NimGame(
            initial_piles=_initial_piles[i],
            misere=_misere[i]
        )

        agent1 = _misereAgent if _misere[i] else _normalAgent
        agent2 = agent1 if _perfect else AlgorithmicAgent(misere=_misere[i])

        winner = game.play(
            player1=agent1,
            player2=agent2,
            verbose=False
        )

        """ AGENT VALIDATION """
        assert winner == NimLogic.is_p_position(_initial_piles[i], _misere[i]), "Bad agent!"

In [11]:
def run_tests(misere_agents, normal_agents, pile_count, max_pile, episodes, perfect=False):
    print("-" * 60)
    print(f"Configuration: pile_count: {pile_count}, max_pile: {max_pile}")
    print("-" * 60)

    initial_piles = np.random.randint(1, max_pile, size=(episodes, pile_count))
    misere_modes = np.random.choice([False, True], size=episodes)

    for agent_key in misere_agents.keys():
        misere_agent = misere_agents[agent_key]
        normal_agent = normal_agents[agent_key]

        print(f"Testing {misere_agent}")

        test_agent(misere_agent, normal_agent, misere_modes, initial_piles, perfect)

In [12]:
misereAgents = {
    'n': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=100000),
    'c': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, canonical=True),
    'r': QLearningAgent(misere=True, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, reduced=True),
}

normalAgents = {
    'n': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=100000),
    'c': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, canonical=True),
    'r': QLearningAgent(misere=False, pile_count=PILE_COUNT, max_pile=MAX_PILE, num_episodes=10000, reduced=True),
}

100%|██████████| 100000/100000 [00:08<00:00, 12398.47it/s]


Saved agent to ../savedAgents/QLearning/qlearning-4-7-misere-100000.json
QLearning Agent ready. Q-table size: 53374


100%|██████████| 10000/10000 [00:00<00:00, 10958.69it/s]


Saved agent to ../savedAgents/QLearning/qlearning-4-7-misere-canonical-10000.json
Canonical QLearning Agent ready. Q-table size: 4420


100%|██████████| 10000/10000 [00:00<00:00, 10978.99it/s]


Saved agent to ../savedAgents/QLearning/qlearning-4-7-misere-reduced-10000.json
Reduced QLearning Agent ready. Q-table size: 4435


100%|██████████| 100000/100000 [00:08<00:00, 12324.43it/s]


Saved agent to ../savedAgents/QLearning/qlearning-4-7-normal-100000.json
QLearning Agent ready. Q-table size: 53402


100%|██████████| 10000/10000 [00:00<00:00, 10855.62it/s]


Saved agent to ../savedAgents/QLearning/qlearning-4-7-normal-canonical-10000.json
Canonical QLearning Agent ready. Q-table size: 4450


100%|██████████| 10000/10000 [00:00<00:00, 10875.70it/s]

Saved agent to ../savedAgents/QLearning/qlearning-4-7-normal-reduced-10000.json
Reduced QLearning Agent ready. Q-table size: 4442





In [13]:
run_tests(misereAgents, normalAgents, pile_count=PILE_COUNT, max_pile=MAX_PILE, episodes=EPISODES)

------------------------------------------------------------
Configuration: pile_count: 4, max_pile: 7
------------------------------------------------------------
Testing QLearning Agent


100%|██████████| 1000000/1000000 [00:45<00:00, 21984.58it/s]


Testing Canonical QLearning Agent


100%|██████████| 1000000/1000000 [00:37<00:00, 27001.46it/s]


Testing Reduced QLearning Agent


100%|██████████| 1000000/1000000 [00:36<00:00, 27062.93it/s]


In [14]:
run_tests(misereAgents, normalAgents, pile_count=PILE_COUNT, max_pile=MAX_PILE, episodes=EPISODES, perfect=True)

------------------------------------------------------------
Configuration: pile_count: 4, max_pile: 7
------------------------------------------------------------
Testing QLearning Agent


100%|██████████| 1000000/1000000 [00:58<00:00, 17236.36it/s]


Testing Canonical QLearning Agent


100%|██████████| 1000000/1000000 [00:47<00:00, 20844.04it/s]


Testing Reduced QLearning Agent


100%|██████████| 1000000/1000000 [00:46<00:00, 21358.94it/s]
