In [None]:
# Let the games begin

import copy

from TicTacToe import TicTacToe
from Agent import RandomAgent, HumanAgent
from DeepQAgent import DeepQLearningAgent, DeepQPlayingAgent

params = {
    'nr_of_episodes' : 500000, # number of episodes for training
    'width' : 3, # width of the board, width = height

    'epsilon_start' : 0.15,  # initial exploration rate
    'epsilon_min' : 0.005, # minimum exploration rate
    'alpha_start' : 0.1,  # initial learning rate
    'alpha_min' : 0.1, # minimum learning rate
    'gamma' : 0.9,  # discount factor

    'switching' : False, # switch between X and O
    'debug' : False, # print debug messages

    # Parameters for QAgent
    'lazy_evaluation' : True, # use lazy evaluation
    'Q_initial_value' : 0.0, # initial Q value
    'terminal_q_updates' : False, # flag to switch between terminal and immediate Q updates

    # Parameters for DeepQAgent
    'batch_size' : 32, # batch size for deep learning
    # 'target_update_frequency' : 250, # target network update frequency
    'target_update_frequency' : 10, # target network update frequency
    'evaluation' : True, # save data for evaluation
    'double_q_learning' : False, # flag to switch on double Q-learnning
    'device' : 'cpu', # device to use, 'cpu' or 'mps' or 'cuda' 
    }

width = 3
win_length = 3
nr_of_episodes = 5000
params['nr_of_episodes'] = nr_of_episodes
params['width'] = width

paramsX = copy.deepcopy(params)
paramsO = copy.deepcopy(params)
paramsX['player'] = 'X'
paramsO['player'] = 'O'

outcomes = {'X' : 0, 'O' : 0, 'D' : 0}

learning_agent1 = DeepQLearningAgent(paramsX)
# learning_agent2 = DeepQLearningAgent(paramsO)
random_agent2 = RandomAgent(player='O', switching=False)

game = TicTacToe(learning_agent1, random_agent2, display=False, width=width, height=width, win_length=win_length)
# game = TicTacToe(learning_agent1, learning_agent2, display=False)

for episode in range(nr_of_episodes):
    outcome = game.play()
    outcomes[outcome] += 1

print("Outcomes during learning:")
print(f"X wins: {outcomes['X']/nr_of_episodes}, O wins: {outcomes['O']/nr_of_episodes}, draws: {outcomes['D']/nr_of_episodes}")

In [None]:
q_network = learning_agent1.q_network
playing_agent1 = DeepQPlayingAgent(q_network, player='X', switching=False)
random_agent2 = RandomAgent(player='O', switching=False)

game = TicTacToe(playing_agent1, random_agent2, display=False, width=width, height=width, win_length=win_length)
nr_of_episodes = 1000
outcomes = {'X' : 0, 'O' : 0, 'D' : 0}
for episode in range(nr_of_episodes):
    outcome = game.play()
    outcomes[outcome] += 1

print("Outcomes during playing:")
print(f"X wins: {outcomes['X']/nr_of_episodes}, O wins: {outcomes['O']/nr_of_episodes}, draws: {outcomes['D']/nr_of_episodes}")

Using device: cpu
Outcomes during playing:
X wins: 0.542, O wins: 0.217, draws: 0.241

In [None]:
from Evaluation import plot_evaluation_data
plot_evaluation_data(learning_agent1)
# plot_evaluation_data(learning_agent2)