In [1]:
import numpy as np
import random

from ttt import TicTacToe

from AgentHuman import AgentHuman
from AgentRandom import AgentRandom
from AgentBlockWin import AgentBlockWin
from AgentExploreQ import AgentExploreQ

### Random vs random

In [2]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o: {}%".format(100*o_win/total))
print("Wins, x: {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o: 60%
Wins, x: 26%
Draws: 12%


### Random vs sem-deterministic

In [3]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentBlockWin()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (semi-det): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 17%
Wins, x (semi-det): 60%
Draws: 22%


In [4]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentBlockWin()
aix = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (semi-det): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (semi-det): 85%
Wins, x (random): 4%
Draws: 10%


### Train RL with itself

In [5]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_rl_o", train_prefix="rl")
aix = AgentExploreQ(policy_file="policy_rl_x", train_prefix="rl")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


#### RL vs random

In [6]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentExploreQ(policy_file="policy_rl_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 28%
Wins, x (RL): 56%
Draws: 15%


In [7]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_rl_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 76%
Wins, x (random): 15%
Draws: 8%


### Train against semi-deterministic

In [8]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_semidem_o", train_prefix='semidem')
aix = AgentBlockWin()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [9]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_semidem_o", exp_rate=0.0)
aix = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 83%
Wins, x (random): 5%
Draws: 11%


In [10]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentExploreQ(policy_file="policy_semidem_x", train_prefix='semidem')
aio = AgentRandom()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [11]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentExploreQ(policy_file="policy_semidem_x", exp_rate=0.0)
aio = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 12%
Wins, x (RL): 70%
Draws: 17%


### Train against RL for longer

In [12]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_rl_med_o", train_prefix="rl_med")
aix = AgentExploreQ(policy_file="policy_rl_med_x", train_prefix="rl_med")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


In [13]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentExploreQ(policy_file="policy_rl_med_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 21%
Wins, x (RL): 61%
Draws: 16%


In [14]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_rl_med_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 93%
Wins, x (random): 1%
Draws: 4%


In [15]:
n = 10000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_rl_max_o", train_prefix="rl_max")
aix = AgentExploreQ(policy_file="policy_rl_max_x", train_prefix="rl_max")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


In [16]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_rl_max_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 98%
Wins, x (random): 0%
Draws: 1%


In [17]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentExploreQ(policy_file="policy_rl_max_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 6%
Wins, x (RL): 80%
Draws: 13%
