# Imports

In [None]:
import os
import time
import numpy as np

from tqdm import tqdm

import matplotlib.pyplot as plt

In [None]:
from Nim.Nim import Nim
from Nim.NimLogic import NimLogic

from Agents.QLearning.QLearningAgentV1 import QLearningAgentV1

# Constants

In [None]:
# Random seed for reproducibility
np.random.seed(42)

# Training parameters
EPISODES = 10000

# Game parameters
PILE_COUNT = 2
MAX_PILE = 4
MAX_PILES = [MAX_PILE] * PILE_COUNT

# Q-Learning parameters
ALPHA = 0.5
GAMMA = 0.95
EPSILON = 0.3
DECAY_RATE = 0.9999
NUM_EPISODES = 10000000

In [None]:
misereAgent1 = QLearningAgentV1(
    misere=True,
    pile_count=PILE_COUNT,
    max_pile=MAX_PILE,
    alpha=ALPHA,
    gamma=GAMMA,
    epsilon=EPSILON,
    decay_rate=DECAY_RATE,
    num_episodes=NUM_EPISODES
)

normalAgent1 = QLearningAgentV1(
    misere=False,
    pile_count=PILE_COUNT,
    max_pile=MAX_PILE,
    alpha=ALPHA,
    gamma=GAMMA,
    epsilon=EPSILON,
    decay_rate=DECAY_RATE,
    num_episodes=NUM_EPISODES
)

In [None]:
initial_piles = np.random.randint(1, MAX_PILE, size=(EPISODES, PILE_COUNT))
misere = np.random.choice([True, False], EPISODES)

In [None]:
count = 0

def test_agent(_misereAgent, _normalAgent, _misere, _initial_piles):
    global count
    for i in tqdm(range(EPISODES)):
        game = Nim(
            initial_piles=_initial_piles[i],
            misere=_misere[i]
        )

        agent = _misereAgent if _misere[i] else _normalAgent

        winner = game.play(
            player1=agent,
            player2=agent,
            verbose=False
        )

        if winner == NimLogic.is_p_position(_initial_piles[i], _misere[i]):
            count += 1

In [None]:
test_agent(misereAgent1, normalAgent1, misere, initial_piles)
print(count)
