In [1]:
import json
from tqdm import tqdm
from time import time

In [2]:
def evaluate_agent(game_instance, agent, opponent, num_games=1000):
    agent.in_training = False
    scores = {'X': 0, 'O': 0, 'tie': 0}
    # pbar = tqdm(total=num_games)
    for i in range(num_games):
        while not game_instance.is_gameover():
            if game_instance.player_X_turns:
                agent.take_turn()
            else:
                opponent.take_turn()
        if game_instance.X_wins:
            scores['X'] += 1
        elif game_instance.O_wins:
            scores['O'] += 1
        else:
            scores['tie'] += 1
        # pbar.update(1)
        game_instance.play_again()
    # pbar.close()
    return scores

### Tic-Tac-Toe

In [None]:
from TicTacToeGame import TicTacToeGame
from TicTacToeRandomSolver import TicTacToeRandomSolver
from TicTacToeMinimaxSolver import TicTacToeMinimaxSolver
from TicTacToeDefaultSolver import TicTacToeDefaultSolver
from TicTacToeQLearningSolver import TicTacToeQLearningSolver

In [3]:
def train_agent(agent, episodes):
    agent.in_training = True
    agent.train(episodes)

In [6]:
# Create a game instance
game_instance = TicTacToeGame()

# Minimax agent plays against default opponent
agent = TicTacToeMinimaxSolver(game_instance)
opponent = TicTacToeDefaultSolver(game_instance)

scores_arr = []
for i in range(10):
    scores = evaluate_agent(game_instance, agent, opponent, num_games=100)
    scores_arr.append(scores)
    print('Minimax vs Default')
    print('X wins:', scores['X'])
    print('O wins:', scores['O'])
    print('Ties:', scores['tie'])

with open('experiments/ttt-mini-vs-default.txt', 'w') as f:
    json.dump(scores_arr, f)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [02:09<00:00,  1.30s/it]


Minimax vs Default
X wins: 95
O wins: 0
Ties: 5


100%|██████████| 100/100 [02:08<00:00,  1.29s/it]


Minimax vs Default
X wins: 93
O wins: 0
Ties: 7


100%|██████████| 100/100 [02:08<00:00,  1.29s/it]


Minimax vs Default
X wins: 91
O wins: 0
Ties: 9


100%|██████████| 100/100 [02:07<00:00,  1.28s/it]


Minimax vs Default
X wins: 94
O wins: 0
Ties: 6


100%|██████████| 100/100 [01:58<00:00,  1.19s/it]


Minimax vs Default
X wins: 87
O wins: 0
Ties: 13


100%|██████████| 100/100 [01:48<00:00,  1.08s/it]


Minimax vs Default
X wins: 93
O wins: 0
Ties: 7


100%|██████████| 100/100 [01:58<00:00,  1.18s/it]


Minimax vs Default
X wins: 89
O wins: 0
Ties: 11


100%|██████████| 100/100 [01:47<00:00,  1.07s/it]


Minimax vs Default
X wins: 90
O wins: 0
Ties: 10


100%|██████████| 100/100 [01:50<00:00,  1.10s/it]


Minimax vs Default
X wins: 84
O wins: 0
Ties: 16


100%|██████████| 100/100 [01:49<00:00,  1.09s/it]

Minimax vs Default
X wins: 90
O wins: 0
Ties: 10





In [6]:
# Create a game instance
game_instance = TicTacToeGame()

# Minimax agent plays against default opponent
agent = TicTacToeQLearningSolver(game_instance)
opponent = TicTacToeDefaultSolver(game_instance)
# opponent = TicTacToeRandomSolver(game_instance)

untrained_scores = evaluate_agent(game_instance, agent, opponent)
print('Q-learning vs Default - Untrained: X wins: {}, O wins: {}, tie: {}'.format(untrained_scores['X'], untrained_scores['O'], untrained_scores['tie']))
q_learning_training = [untrained_scores]
batch_train_episodes = 5000
for i in range(10):
    train_agent(agent, batch_train_episodes)
    scores = evaluate_agent(game_instance, agent, opponent)
    q_learning_training.append(scores)
    print('Q-learning vs Default - Batch {}: X wins: {}, O wins: {}, tie: {}'.format(i, scores['X'], scores['O'], scores['tie']))

  4%|▎         | 35/1000 [00:00<00:02, 346.26it/s]

100%|██████████| 1000/1000 [00:02<00:00, 492.29it/s]


Q-learning vs Default - Untrained: X wins: 771, O wins: 188, tie: 41


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:12<00:00, 408.97it/s]
100%|██████████| 1000/1000 [00:01<00:00, 629.48it/s]


Q-learning vs Random - Batch 0: X wins: 909, O wins: 59, tie: 32


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:12<00:00, 412.20it/s]
100%|██████████| 1000/1000 [00:01<00:00, 606.79it/s]


Q-learning vs Random - Batch 1: X wins: 913, O wins: 22, tie: 65


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:12<00:00, 414.50it/s]
100%|██████████| 1000/1000 [00:01<00:00, 594.54it/s]


Q-learning vs Random - Batch 2: X wins: 930, O wins: 23, tie: 47


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:12<00:00, 398.02it/s]
100%|██████████| 1000/1000 [00:02<00:00, 498.48it/s]


Q-learning vs Random - Batch 3: X wins: 908, O wins: 0, tie: 92


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:11<00:00, 436.34it/s]
100%|██████████| 1000/1000 [00:01<00:00, 567.28it/s]


Q-learning vs Random - Batch 4: X wins: 900, O wins: 22, tie: 78


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:11<00:00, 436.61it/s]
100%|██████████| 1000/1000 [00:01<00:00, 558.82it/s]


Q-learning vs Random - Batch 5: X wins: 903, O wins: 0, tie: 97


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:12<00:00, 407.94it/s]
100%|██████████| 1000/1000 [00:01<00:00, 563.46it/s]


Q-learning vs Random - Batch 6: X wins: 950, O wins: 0, tie: 50


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:12<00:00, 400.78it/s]
100%|██████████| 1000/1000 [00:03<00:00, 301.63it/s]


Q-learning vs Random - Batch 7: X wins: 955, O wins: 0, tie: 45


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:15<00:00, 325.25it/s]
100%|██████████| 1000/1000 [00:01<00:00, 536.27it/s]


Q-learning vs Random - Batch 8: X wins: 971, O wins: 0, tie: 29


Training Q-learning agent...: 100%|██████████| 5000/5000 [00:13<00:00, 360.96it/s]
100%|██████████| 1000/1000 [00:02<00:00, 428.28it/s]

Q-learning vs Random - Batch 9: X wins: 979, O wins: 0, tie: 21





In [7]:
with open('experiments/ttt-qlearning-vs-default.txt', 'w') as f:
    json.dump(q_learning_training, f)

In [8]:
opponent = TicTacToeMinimaxSolver(game_instance)
scores_arr = []
for i in range(10):
    scores = evaluate_agent(game_instance, agent, opponent, num_games=100)
    scores_arr.append(scores)
    print('Minimax vs Default')
    print('X wins:', scores['X'])
    print('O wins:', scores['O'])
    print('Ties:', scores['tie'])

100%|██████████| 100/100 [00:16<00:00,  5.98it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:16<00:00,  6.24it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:15<00:00,  6.26it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:15<00:00,  6.35it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:22<00:00,  4.45it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:19<00:00,  5.10it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:19<00:00,  5.13it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:18<00:00,  5.55it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:18<00:00,  5.54it/s]


Minimax vs Default
X wins: 0
O wins: 0
Ties: 100


100%|██████████| 100/100 [00:16<00:00,  6.18it/s]

Minimax vs Default
X wins: 0
O wins: 0
Ties: 100





In [3]:
# Create a game instance
game_instance = TicTacToeGame()

# Minimax agent plays against default opponent
q_learning_agent = TicTacToeQLearningSolver(game_instance)
minimax_agent = TicTacToeMinimaxSolver(game_instance)
# opponent = TicTacToeRandomSolver(game_instance)

q_learning_agent.train(episodes=6000)
q_learning_training = [evaluate_agent(game_instance, q_learning_agent, minimax_agent, num_games=100)]
batch_train_episodes = 100
for i in range(10):
    q_learning_agent.train(batch_train_episodes)
    scores = evaluate_agent(game_instance, q_learning_agent, minimax_agent, num_games=100)
    q_learning_training.append(scores)
    print('Q-learning vs Minimax - Batch {}: X wins: {}, O wins: {}, tie: {}'.format(i, scores['X'], scores['O'], scores['tie']))

with open('experiments/ttt-q-learning-vs-minimax.txt', 'w') as f:
    json.dump(q_learning_training, f)

Training Q-learning agent...: 100%|██████████| 6000/6000 [00:17<00:00, 334.46it/s]
Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 358.78it/s]


Q-learning vs Minimax - Batch 0: X wins: 0, O wins: 0, tie: 100


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 253.86it/s]


Q-learning vs Minimax - Batch 1: X wins: 0, O wins: 100, tie: 0


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 324.84it/s]


Q-learning vs Minimax - Batch 2: X wins: 0, O wins: 0, tie: 100


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 214.39it/s]


Q-learning vs Minimax - Batch 3: X wins: 0, O wins: 100, tie: 0


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 136.57it/s]


Q-learning vs Minimax - Batch 4: X wins: 0, O wins: 0, tie: 100


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 257.69it/s]


Q-learning vs Minimax - Batch 5: X wins: 0, O wins: 0, tie: 100


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 276.41it/s]


Q-learning vs Minimax - Batch 6: X wins: 0, O wins: 0, tie: 100


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 244.40it/s]


Q-learning vs Minimax - Batch 7: X wins: 0, O wins: 100, tie: 0


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 208.71it/s]


Q-learning vs Minimax - Batch 8: X wins: 0, O wins: 100, tie: 0


Training Q-learning agent...: 100%|██████████| 100/100 [00:00<00:00, 156.83it/s]


Q-learning vs Minimax - Batch 9: X wins: 0, O wins: 100, tie: 0


### Connect 4 Experiments

In [3]:
from Connect4Game import Connect4Game
from Connect4DefaultSolver import Connect4DefaultSolver
from Connect4MinimaxSolver import Connect4MinimaxSolver
from Connect4QLearningSolver import Connect4QLearningSolver

In [6]:
for depth in range(3, 8):
    game_instance = Connect4Game()
    minimax_agent = Connect4MinimaxSolver(game_instance, max_depth=depth, use_pruning=True)
    default_opponent = Connect4DefaultSolver(game_instance)
    scores = evaluate_agent(game_instance, minimax_agent, default_opponent, num_games=1000)
    print('Minimax vs Default')
    print('X wins:', scores['X'])
    print('O wins:', scores['O'])
    print('Ties:', scores['tie'])
    with open('experiments/c4-minimax-vs-default-{}.txt'.format(depth), 'w') as f:
        json.dump(scores, f)

Minimax vs Default
X wins: 9
O wins: 991
Ties: 0
Minimax vs Default
X wins: 1
O wins: 998
Ties: 1
Minimax vs Default
X wins: 2
O wins: 998
Ties: 0
Minimax vs Default
X wins: 1
O wins: 999
Ties: 0
Minimax vs Default
X wins: 0
O wins: 1000
Ties: 0


In [10]:
with open('experiments/connect4-minimax-vs-default.txt', 'w') as f:
    json.dump(scores, f)

In [5]:
for depth in range(4,9):

    # Create a game instance
    game_instance = Connect4Game()

    # Minimax agent plays against default opponent
    agent = Connect4MinimaxSolver(game_instance, max_depth=depth, use_pruning=True)
    opponent = Connect4DefaultSolver(game_instance)

    scores_arr = []
    for i in range(10):
        start_time = time()
        scores = evaluate_agent(game_instance, agent, opponent, num_games=100)
        end_time = time()
        scores['execution_time'] = end_time - start_time
        scores_arr.append(scores)
        print('Minimax vs Default')
        print('X wins:', scores['X'])
        print('O wins:', scores['O'])
        print('Ties:', scores['tie'])

    with open('experiments/c4-minimax-vs-default.txt', 'a') as f:
        json.dump(scores_arr, f)

Minimax vs Default
X wins: 55
O wins: 43
Ties: 2
Minimax vs Default
X wins: 62
O wins: 37
Ties: 1


KeyboardInterrupt: 

In [4]:
learning_rate_arr = [0.1, 0.3, 0.5, 0.7, 0.9]
discount_factor_arr = [0.1, 0.3, 0.5, 0.7, 0.9]
exploration_rate_arr = [0.1, 0.3, 0.5, 0.7, 0.9]
decay_rate_arr = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
scores_arr = []

total_iterations = len(learning_rate_arr) * len(discount_factor_arr) * len(exploration_rate_arr) * len(decay_rate_arr)
pbar = tqdm(total=total_iterations, desc='Total Progress')

for learning_rate in learning_rate_arr:
    for discount_factor in discount_factor_arr:
        for exploration_rate in exploration_rate_arr:
            for decay_rate in decay_rate_arr:
                game_instance = TicTacToeGame()
                agent = TicTacToeQLearningSolver(game_instance, learning_rate, discount_factor, exploration_rate, decay_rate)
                opponent = TicTacToeRandomSolver(game_instance)
                train_agent(agent, range(1000))
                scores = evaluate_agent(game_instance, agent, opponent)
                scores_arr.append((learning_rate, discount_factor, exploration_rate, decay_rate, scores['X'], scores['O'], scores['tie']))
                pbar.update(1)
pbar.close()

Total Progress: 100%|██████████| 625/625 [32:57<00:00,  3.16s/it]   


In [5]:
# Turn scores_arr into a dataframe
import pandas as pd
df = pd.DataFrame(scores_arr, columns=['learning_rate', 'discount_factor', 'exploration_rate', 'decay_rate', 'X_wins', 'O_wins', 'ties'])

# Sort by X_wins
df = df.sort_values(by='X_wins', ascending=False)

# Save to csv
df.to_csv('q_learning_results.csv', index=False)

df.head()

Unnamed: 0,learning_rate,discount_factor,exploration_rate,decay_rate,X_wins,O_wins,ties
10,0.1,0.1,0.5,1e-06,687,246,67
81,0.1,0.7,0.3,1e-05,686,245,69
50,0.1,0.5,0.1,1e-06,685,244,71
300,0.5,0.5,0.1,1e-06,682,240,78
387,0.7,0.1,0.5,0.0001,680,268,52


In [3]:
# Create a game instance
game_instance = TicTacToeGame()

# Minimax agent plays against default opponent
agent = TicTacToeMinimaxSolver(game_instance)
opponent = TicTacToeDefaultSolver(game_instance)
scores = evaluate_agent(game_instance, agent, opponent)
print('Minimax vs Default')
print('X wins:', scores['X'])
print('O wins:', scores['O'])
print('Ties:', scores['tie'])

KeyboardInterrupt: 

In [10]:
untrained_scores = evaluate_agent(game_instance, q_learning_instance, random_opponent)
scores_history = [untrained_scores]
for i in range(10):
    train_episodes = 10000
    train_agent(q_learning_instance, episodes_range=range(i*train_episodes, (i+1)*train_episodes))
    scores = evaluate_agent(game_instance, q_learning_instance, random_opponent)
    scores_history.append(scores)
    print('Q-learning vs Random - Game {}: X wins: {}, O wins: {}, tie: {}'.format(i, scores['X'], scores['O'], scores['tie']))

AttributeError: 'TicTacToeQLearningSolver' object has no attribute 'make_move'

In [7]:
scores_history

[{'X': 1000, 'O': 0, 'tie': 0}]