In [None]:
# Let the games begin

from TicTacToe import TicTacToe
from Agent import RandomAgent, HumanAgent, LearningAgent

params = {
    'Q_initial_value' : 0.0, # initial Q value
    'alpha_start' : 0.1,  # initial learning rate
    'alpha_min' : 0.1, # minimum learning rate
    'gamma' : 0.9,  # discount factor
    'epsilon_start' : 0.5,  # initial exploration rate
    'epsilon_min' : 0.25, # minimum exploration rate
    'waiting_time' : 1.0, # waiting in seconds for display
    'nr_of_episodes' : 500000, # number of episodes for training
    'rewards' : {'W' : 1.0, 'L' : -1.0, 'D' : 0.0}, # dictionary for rewards ('W' = win, 'L' = loose, 'D' = draw)
}

nr_of_episodes = 500000
params['nr_of_episodes'] = nr_of_episodes
outcomes = {'X' : 0, 'O' : 0, 'D' : 0}

agent_o = LearningAgent('O', params)
agent_x = RandomAgent('X')
game = TicTacToe(agent_x, agent_o, display=False)

for _ in range(nr_of_episodes):
    outcome = game.play()
    outcomes[outcome] += 1

print("Outcomes:")
print(f"X wins: {outcomes['X']/nr_of_episodes}, O wins: {outcomes['O']/nr_of_episodes}, draws: {outcomes['D']/nr_of_episodes}")

Outcomes with players choosing action based on Q-values:
X wins: 0.138, O wins: 0.6906, draws: 0.1714

In [None]:
# import dill

# with open('SymmetricQ.pkl', 'wb') as f:
#     dill.dump(Q.get(), f)

In [None]:
random_outcomes = {'X' : 91/138, 'O' : 44/138, 'D' : 3/138}
print(random_outcomes)

In [None]:
import numpy as np

from playGame import get_empty_positions

def displayQ(Q, board):
    actions = get_empty_positions(board)
    Qs = [f"{Q.get(board, action):.5f}" for action in actions]
    Qboard = list(board)
    for i, action in enumerate(actions):
        Qboard[action] = Qs[i]

    print(f"{np.array(Qboard).reshape(3,3)}")

history = params['history']
for i in range(len(history)):
    board, action = history[i]
    print(f"q value for action {action} is {Q.get(board, action)}")
    displayQ(Q, board)

In [None]:
from SymmetricMatrix import TotallySymmetricMatrix, QTotallySymmetricMatrix

board = (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ')
Q = QTotallySymmetricMatrix(default_value=0.0)
Q.set(board, 0, 1.0)
displayQ(Q, board)

In [None]:
print(Q.get(initialize_board(), 0))
print(Q.get(initialize_board(), 2))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

q_matrix = Q.get()
# print(f"Total unique states encountered: {len(q_matrix.keys())}")

print(Q.get(initialize_board()))

# Extract all Q-values from the nested dictionary
# all_q_values = [q for actions in q_matrix.values() for q in actions.values()]
all_q_values = [q for q in q_matrix.values()]
print(f"Total number of elements in Q: {len(all_q_values)}")

mean_q = np.mean(all_q_values)
median_q = np.median(all_q_values)
std_q = np.std(all_q_values)
min_q = np.min(all_q_values)
max_q = np.max(all_q_values)

print("Q-value Statistics:")
print(f"Mean: {mean_q}")
print(f"Median: {median_q}")
print(f"Standard Deviation: {std_q}")
print(f"Minimum: {min_q}")
print(f"Maximum: {max_q}")

plt.figure(figsize=(10, 6))
plt.hist(all_q_values, bins=20, edgecolor='black', alpha=0.7)
plt.title("Histogram of Q-values")
plt.xlabel("Q-value")
plt.ylabel("Frequency")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

Number of canonical boards: 1520
Number of canonical state-action pairs: 4808

In [None]:
visits = Visits.get()
# all_v = [v for states in visits.values() for v in states.values()]
all_v = [q for q in visits.values()]

print("Statistics of visited state-action pairs:")
print(f"Number of state-action pairs visited: {len(all_v)}")

mean_v = np.mean(all_v)
median_v = np.median(all_v)
std_v = np.std(all_v)
min_v = np.min(all_v)
max_v = np.max(all_v)

print(f"Mean: {mean_v}")
print(f"Median: {median_v}")
print(f"Standard Deviation: {std_v}")
print(f"Minimum: {min_v}")
print(f"Maximum: {max_v}")

plt.figure(figsize=(10, 6))
plt.hist(all_v, bins=20, edgecolor='black', alpha=0.7)
plt.title("Histogram of visited states")
plt.xlabel("Visits")
plt.ylabel("States")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
rewards = Rewards.get()

# all_r = [r for states in rewards.values() for r in states.values()]
# all_rX = [r for states in rewards.values() for r in states.values() if r == 1.0]
# all_rO = [r for states in rewards.values() for r in states.values() if r == -1.0]
# all_rD = [r for states in rewards.values() for r in states.values() if r == 0.5]

all_r = [r for r in rewards.values()]
all_rX = [r for r in rewards.values() if r == 1.0]
all_rO = [r for r in rewards.values() if r == -1.0]
all_rD = [r for r in rewards.values() if r == 0.5]

print("Statistics of rewards")
print(f"Number of state-action pairs with rewards: {len(all_r)}")
print(f"Number of state-action pairs with rewards for X: {len(all_rX)}")
print(f"Number of state-action pairs with rewards for O: {len(all_rO)}")
print(f"Number of state-action pairs with rewards for D: {len(all_rD)}")