In [None]:
# Let the games begin

import copy

from TicTacToe import TicTacToe
from Agent import RandomAgent, HumanAgent
from QAgent import QLearningAgent, QPlayingAgent
from DeepQAgent import DeepQLearningAgent, DeepQPlayingAgent

params = {
    'nr_of_episodes' : 500000, # number of episodes for training
    'epsilon_start' : 0.15,  # initial exploration rate
    'epsilon_min' : 0.005, # minimum exploration rate
    'alpha_start' : 0.1,  # initial learning rate
    'alpha_min' : 0.1, # minimum learning rate
    'gamma' : 0.9,  # discount factor
    'switching' : False, # switch between X and O
    'debug' : False, # print debug messages
    'width' : 3, # width = height of the board
    'evaluation' : True, # save data for evaluation

    # Parameters for QAgent
    'lazy_evaluation' : True, # use lazy evaluation
    'Q_initial_value' : 0.0, # initial Q value

    # Parameters for DeepQAgent
    'batch_size' : 32, # batch size for deep learning
    'double_q_learning' : True, # flag to switch on double Q-learnning
    # 'target_update_frequency' : 500, # target network update frequency
    'target_update_frequency' : 50, # target network update frequency
    }

nr_of_episodes = 250
# nr_of_episodes = 1500
params['nr_of_episodes'] = nr_of_episodes

paramsX = copy.deepcopy(params)
paramsO = copy.deepcopy(params)
paramsX['player'] = 'X'
paramsX['Q_optimal'] = 'TotallySymmetricQ_optimalX.pkl'
paramsO['player'] = 'O'
paramsO['Q_optimal'] = 'TotallySymmetricQ_optimalO.pkl'

outcomes = {'X' : 0, 'O' : 0, 'D' : 0}

learning_agent1 = DeepQLearningAgent(paramsX)
# learning_agent2 = DeepQLearningAgent(paramsO)
random_agent1 = RandomAgent(player='O', switching=False)

game = TicTacToe(learning_agent1, random_agent1, display=False)
# game = TicTacToe(learning_agent1, learning_agent2, display=False)

for episode in range(nr_of_episodes):
    outcome = game.play()
    outcomes[outcome] += 1

print("Outcomes during learning:")
print(f"X wins: {outcomes['X']/nr_of_episodes}, O wins: {outcomes['O']/nr_of_episodes}, draws: {outcomes['D']/nr_of_episodes}")

In [None]:
Qmodel = learning_agent1.Qmodel
playing_agent1 = DeepQPlayingAgent(Qmodel, player='X', switching=False)
random_agent1 = RandomAgent(player='O', switching=False)

game = TicTacToe(playing_agent1, random_agent1, display=False)
nr_of_episodes = 250
outcomes = {'X' : 0, 'O' : 0, 'D' : 0}
for episode in range(nr_of_episodes):
    outcome = game.play()
    outcomes[outcome] += 1

print("Outcomes during playing:")
print(f"X wins: {outcomes['X']/nr_of_episodes}, O wins: {outcomes['O']/nr_of_episodes}, draws: {outcomes['D']/nr_of_episodes}")

In [None]:
from Evaluation import plot_graphs

evaluation_data = learning_agent1.evaluation_data
lossX = evaluation_data['loss']
avg_action_valueX = evaluation_data['avg_action_value']
rewards = evaluation_data['rewards']
print(f"Number of losses: {len(lossX)}")
print(f"Number of action values: {len(avg_action_valueX)}")
print(f"Number of rewards: {len(rewards)}")
plot_graphs(lossX, avg_action_valueX, rewards)

In [None]:
from Evaluation import plot_graphs

evaluation_data = learning_agent2.evaluation_data
lossX = evaluation_data['loss']
avg_action_valueX = evaluation_data['avg_action_value']
rewards = evaluation_data['rewards']
print(f"Number of losses: {len(lossX)}")
print(f"Number of action values: {len(avg_action_valueX)}")
print(f"Number of rewards: {len(rewards)}")
plot_graphs(lossX, avg_action_valueX, rewards)