# Tic Tac Toe Game (Noughts and Crosses)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from gamelearner import *

## Game dynamics

In [2]:
game = TicTacToeGame()
game

TicTacToeGame(moves=[])

In [3]:
game.roles

[1, 2]

In [4]:
game.marks

['X', 'O']

In [5]:
game.state

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [6]:
game.turn

1

In [7]:
game.make_move((1, (0, 2)))
game.make_move((2, (0, 1)))
game.make_move((1, (1, 1)))
game.make_move((2, (2, 2)))

In [8]:
game.show_state()

_ O X
_ X _
_ _ O


In [9]:
game.check_if_game_over()

False

In [10]:
game.show_moves()

1 (1, (0, 2))
2 (2, (0, 1))
3 (1, (1, 1))
4 (2, (2, 2))


In [11]:
game.turn

1

In [12]:
game.available_positions()

[(0, 0), (1, 0), (1, 2), (2, 0), (2, 1)]

In [13]:
try:
    game.make_move((2, (2,0)))
except ValueError as err:
    print(err)

It is not player 2's turn.


In [14]:
game.show_state()

_ O X
_ X _
_ _ O


In [15]:
game.game_over

False

In [16]:
print(game.winner)

None


In [17]:
game.reverse_move()

In [18]:
game.show_state()

_ O X
_ X _
_ _ _


In [19]:
print(game.winner)

None


In [20]:
game.make_move((2, (1,2)))
game.make_move((1, (2,0)))
game.show_state()

_ O X
_ X O
X _ _


In [21]:
game.game_over, game.winner

(True, 1)

In [22]:
try:
    game.make_move((2, (2,1)))
except AssertionError as err:
    print(err)

Player 1 has already won


### Playing with Human Players

In [23]:
players = [HumanPlayer("Jill"), HumanPlayer("Jack")]
players

[HumanPlayer('Jill')), HumanPlayer('Jack'))]

In [24]:
game = TicTacToeGame()
game

TicTacToeGame(moves=[])

In [25]:
game.show_state()
players[0].make_move(game, role=1)

_ _ _
_ _ _
_ _ _
Jill's turn (row, col): 0,0


In [26]:
game.show_state()
players[1].make_move(game, role=2)

X _ _
_ _ _
_ _ _
Jack's turn (row, col): 1,1


In [27]:
game.show_state()

X _ _
_ O _
_ _ _


## Using the game controller

In [28]:
game = TicTacToeGame()
players = [ExpertPlayer("EXP1"), ExpertPlayer("EXP2")]

In [29]:
ctrl = GameController(game, players)
ctrl

GameController(TicTacToeGame(moves=[]), [ExpertPlayer('EXP1'), ExpertPlayer('EXP2')])

In [30]:
ctrl.player_roles

{ExpertPlayer('EXP1'): 2, ExpertPlayer('EXP2'): 1}

In [31]:
ctrl.play()

_ _ _
_ _ _
_ _ _
EXP2's turn (row, col): (1, (0, 0))
X _ _
_ _ _
_ _ _
EXP1's turn (row, col): (2, (1, 1))
X _ _
_ O _
_ _ _
EXP2's turn (row, col): (1, (2, 2))
X _ _
_ O _
_ _ X
EXP1's turn (row, col): (2, (0, 1))
X O _
_ O _
_ _ X
EXP2's turn (row, col): (1, (2, 1))
X O _
_ O _
_ X X
EXP1's turn (row, col): (2, (2, 0))
X O _
_ O _
O X X
EXP2's turn (row, col): (1, (0, 2))
X O X
_ O _
O X X
EXP1's turn (row, col): (2, (1, 2))
X O X
_ O O
O X X
EXP2's turn (row, col): (1, (1, 0))
X O X
X O O
O X X
Game over!
Draw


## Competition between computer players

In [32]:
game = TicTacToeGame()
players = [TDLearner('TD1'), ExpertPlayer('EXPERT')]
ctrl = GameController(game, players)

In [33]:
ctrl.play()

_ _ _
_ _ _
_ _ _
EXPERT's turn (row, col): (1, (2, 2))
_ _ _
_ _ _
_ _ X
TD1's turn (row, col): (0, 2)
_ _ O
_ _ _
_ _ X
EXPERT's turn (row, col): (1, (1, 1))
_ _ O
_ X _
_ _ X
TD1's turn (row, col): (0, 1)
_ O O
_ X _
_ _ X
EXPERT's turn (row, col): (1, (0, 0))
X O O
_ X _
_ _ X
Game over!
EXPERT won


In [34]:
for player in players:
    print("\nPlayer %s" % str(player.name))
    print("  played: %d, wins: %d, losses: %d" % (player.games_played, player.games_won, player.games_lost))


Player TD1
  played: 1, wins: 0, losses: 1

Player EXPERT
  played: 1, wins: 1, losses: 0


## Train a TD Learner

In [35]:
td_learner = TDLearner('TD1')
computer_players = [td_learner, ExpertPlayer('EXPERT')]

In [36]:
train_computer_players(computer_players)


Training 2 computer players...
0 games completed
100 games completed
200 games completed
300 games completed
400 games completed
500 games completed
600 games completed
700 games completed
800 games completed
900 games completed

Results:
TD1: won 0, lost 578
EXPERT: won 578, lost 0
Draws: 422


In [37]:
train_computer_players(computer_players)


Training 2 computer players...
0 games completed
100 games completed
200 games completed
300 games completed
400 games completed
500 games completed
600 games completed
700 games completed
800 games completed
900 games completed

Results:
TD1: won 0, lost 214
EXPERT: won 214, lost 0
Draws: 786


## Now play against the computer!

In [38]:
game = TicTacToeGame()
players = [HumanPlayer("You"), td_learner]
ctrl = GameController(game, players)

In [39]:
while True:
    game.reset()
    ctrl.announce_game()
    ctrl.play()
    text = input("Press enter to play again or q to quit: ")
    if text.strip().lower() == 'q':
        break

print("Results")        
for player in players:
    print("\nPlayer %s" % str(player.name))
    print("  played: %d, wins: %d, losses: %d" % (player.games_played, player.games_won, player.games_lost))


Game of Tic Tac Toe with 2 players ['You', 'TD1']
_ _ _
_ _ _
_ _ _
TD1's turn (row, col): (0, 2)
_ _ X
_ _ _
_ _ _
You's turn (row, col): 0,0
O _ X
_ _ _
_ _ _
TD1's turn (row, col): (0, 1)
O X X
_ _ _
_ _ _
You's turn (row, col): 2,2
O X X
_ _ _
_ _ O
TD1's turn (row, col): (1, 2)
O X X
_ _ X
_ _ O
You's turn (row, col): 1,1
You you won!
O X X
_ O X
_ _ O
Game over!
You won
Press enter to play again or q to quit: q
Results

Player You
  played: 1, wins: 1, losses: 0

Player TD1
  played: 2001, wins: 0, losses: 793


## TD Parameter Optimization

In [40]:
results = {
    'learning_rate': [],
    'off_policy_rate': [],
    'Wins (1000)': [],
    'Losses (1000)': [],
    'Wins (2000)': [],
    'Losses (2000)': []
}

In [41]:
learning_rates = [0.5, 0.25, 0.1]
off_policy_rates = [0.0, 0.1]

In [42]:
for lr in learning_rates:
    for opr in off_policy_rates:
        print("\nlr: %s, opr: %s" % (str(lr), str(opr)))
        print("Training against expert...")
        td_learner = TDLearner('TD', learning_rate=lr, off_policy_rate=opr)
        computer_players = [td_learner, ExpertPlayer('EXPERT')]
        train_computer_players(computer_players, show=False)
        results['learning_rate'].append(lr)
        results['off_policy_rate'].append(opr)
        results['Wins (1000)'].append(td_learner.games_won)
        results['Losses (1000)'].append(td_learner.games_lost)
        train_computer_players(computer_players, show=False)
        results['Wins (2000)'].append(td_learner.games_won)
        results['Losses (2000)'].append(td_learner.games_lost)


lr: 0.5, opr: 0.0
Training against expert...

lr: 0.5, opr: 0.1
Training against expert...

lr: 0.25, opr: 0.0
Training against expert...

lr: 0.25, opr: 0.1
Training against expert...

lr: 0.1, opr: 0.0
Training against expert...

lr: 0.1, opr: 0.1
Training against expert...


In [43]:
results

{'learning_rate': [0.5, 0.5, 0.25, 0.25, 0.1, 0.1],
 'off_policy_rate': [0.0, 0.1, 0.0, 0.1, 0.0, 0.1],
 'Wins (1000)': [0, 0, 0, 0, 0, 0],
 'Losses (1000)': [487, 606, 499, 603, 483, 586],
 'Wins (2000)': [0, 0, 0, 0, 0, 0],
 'Losses (2000)': [557, 813, 562, 837, 570, 813]}

In [44]:
df = pd.DataFrame(results)
df

Unnamed: 0,learning_rate,off_policy_rate,Wins (1000),Losses (1000),Wins (2000),Losses (2000)
0,0.5,0.0,0,487,0,557
1,0.5,0.1,0,606,0,813
2,0.25,0.0,0,499,0,562
3,0.25,0.1,0,603,0,837
4,0.1,0.0,0,483,0,570
5,0.1,0.1,0,586,0,813


In [45]:
good_results = {
    'After 1000 iterations:': [df['Wins (1000)'].idxmax(), df['Losses (1000)'].idxmin()],
    'After 2000 iterations:': [df['Wins (2000)'].idxmax(), df['Losses (2000)'].idxmin()]
}
good_results

{'After 1000 iterations:': [0, 4], 'After 2000 iterations:': [0, 0]}