In [6]:
# Let the games begin

from Matrix import Matrix, QMatrix
from SymmetricMatrix import SymmetricMatrix, QSymmetricMatrix
from playGame import play_game

# Parameters
params = {
    'Q_initial_value' : 1.0, # initial Q value
    'alpha_start' : 0.1,  # initial learning rate
    'alpha_min' : 0.1, # minimum learning rate
    'gamma' : 0.9,  # discount factor
    'epsilon_start' : 0.5,  # initial exploration rate
    'epsilon_min' : 0.25, # minimum exploration rate
    'waiting_time' : 1.0, # waiting in seconds for display
    'episode' : 0, # number of episodes played during training
    'outcomes' : {'X' : 0, 'O' : 0, 'D' : 0}, # dictionary to save the outcomes for games ('X' = X wins, 'O' = O wins, 'D' = draws) 
    'nr_of_episodes' : 500000, # number of episodes for training
    'eps' : {'X' : -1.0, 'O' : -1.0}, # epsilon values for non-training determine how randomized the computer plays
}

# Initialize matrices
# # Q = QMatrix(file='Q.pkl')
# Q = QMatrix(default_value=params['Q_initial_value'])
# Visits = Matrix(default_value=0)
# Rewards = Matrix(default_value=0)

Q = QSymmetricMatrix(file='SymmetricQ.pkl')
# Q = QSymmetricMatrix(default_value=params['Q_initial_value'])
Visits = SymmetricMatrix(default_value=0)
Rewards = SymmetricMatrix(default_value=0.0)

matrices = (Q, Visits, Rewards)

nr_of_episodes = 5000
params['nr_of_episodes'] = nr_of_episodes
params['episode'] = 0

flags = {
    'training': True,
    'display': False,
    'interactive': False
}
for _ in range(nr_of_episodes):
    play_game(matrices, params, flags=flags)
    params['episode'] += 1

print("Outcomes:")
print(f"X wins: {params['outcomes']['X']/nr_of_episodes}, O wins: {params['outcomes']['O']/nr_of_episodes}, draws: {params['outcomes']['D']/nr_of_episodes}")

Outcomes:
X wins: 0.4542, O wins: 0.41, draws: 0.1358


In [7]:
nr_of_episodes = 5000
params['nr_of_episodes'] = nr_of_episodes
params['eps'] = {'X' : -0.1, 'O' : -0.1}
params['outcomes'] = {'X' : 0, 'O' : 0, 'D' : 0}
flags = {
    'training': False,
    'display': False,
    'interactive': False
}
for _ in range(nr_of_episodes):
    play_game(matrices, params, flags=flags)

print("Outcomes with players choosing actions based on Q-values:")
print(f"X wins: {params['outcomes']['X']/nr_of_episodes}, O wins: {params['outcomes']['O']/nr_of_episodes}, draws: {params['outcomes']['D']/nr_of_episodes}")

Outcomes with players choosing actions based on Q-values:
X wins: 0.1418, O wins: 0.6998, draws: 0.1584


Outcomes with players choosing action based on Q-values:
X wins: 0.138, O wins: 0.6906, draws: 0.1714

In [8]:
# import dill

# with open('SymmetricQ.pkl', 'wb') as f:
#     dill.dump(Q.get(), f)

In [9]:
flags = {
    'training': False,
    'display': True,
    'interactive': False
}
play_game(matrices, params, flags=flags)



 O  |  X  |  X 
----+-----+----
 O  |     |  O 
----+-----+----
 O  |  X  |  X 


Player O wins!



In [10]:
flags = {
    'training': False,
    'display': True,
    'interactive': True
}
# play_game(matrices, params, flags=flags)

In [None]:
import matplotlib.pyplot as plt

q_matrix = Q.get()
print(f"Total unique states encountered: {len(q_matrix.keys())}")

# Extract all Q-values from the nested dictionary
all_q_values = [q for actions in q_matrix.values() for q in actions.values()]
print(f"Total number of elements in Q: {len(all_q_values)}")

mean_q = np.mean(all_q_values)
median_q = np.median(all_q_values)
std_q = np.std(all_q_values)
min_q = np.min(all_q_values)
max_q = np.max(all_q_values)

print("Q-value Statistics:")
print(f"Mean: {mean_q}")
print(f"Median: {median_q}")
print(f"Standard Deviation: {std_q}")
print(f"Minimum: {min_q}")
print(f"Maximum: {max_q}")

plt.figure(figsize=(10, 6))
plt.hist(all_q_values, bins=20, edgecolor='black', alpha=0.7)
plt.title("Histogram of Q-values")
plt.xlabel("Q-value")
plt.ylabel("Frequency")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# with open('Q_optimal.pkl', 'wb') as f:
#     dill.dump(Q, f)

Number of canonical boards: 1520
Number of canonical state-action pairs: 4808

In [None]:
visits = Visits.get()
all_v = [v for states in visits.values() for v in states.values()]

print("Statistics of visited state-action pairs:")
print(f"Number of state-action pairs visited: {len(all_v)}")

mean_v = np.mean(all_v)
median_v = np.median(all_v)
std_v = np.std(all_v)
min_v = np.min(all_v)
max_v = np.max(all_v)

print(f"Mean: {mean_v}")
print(f"Median: {median_v}")
print(f"Standard Deviation: {std_v}")
print(f"Minimum: {min_v}")
print(f"Maximum: {max_v}")

plt.figure(figsize=(10, 6))
plt.hist(all_v, bins=20, edgecolor='black', alpha=0.7)
plt.title("Histogram of visited states")
plt.xlabel("Visits")
plt.ylabel("States")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
rewards = Rewards.get()
all_r = [r for states in rewards.values() for r in states.values()]
all_rX = [r for states in rewards.values() for r in states.values() if r == 1.0]
all_rO = [r for states in rewards.values() for r in states.values() if r == -1.0]
all_rD = [r for states in rewards.values() for r in states.values() if r == 0.5]

print("Statistics of rewards")
print(f"Number of state-action pairs with rewards: {len(all_r)}")
print(f"Number of state-action pairs with rewards for X: {len(all_rX)}")
print(f"Number of state-action pairs with rewards for O: {len(all_rO)}")
print(f"Number of state-action pairs with rewards for D: {len(all_rD)}")