In [1]:
import numpy as np
import random

from ttt import TicTacToe

from AgentHuman import AgentHuman
from AgentRandom import AgentRandom
from AgentBlockWin import AgentBlockWin
from AgentExploreQ import AgentExploreQ

### Random vs random

In [2]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o: {}%".format(100*o_win/total))
print("Wins, x: {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o: 58%
Wins, x: 28%
Draws: 12%


### Random vs sem-deterministic

In [3]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentBlockWin()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (semi-det): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 15%
Wins, x (semi-det): 60%
Draws: 24%


In [4]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentBlockWin()
aix = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (semi-det): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (semi-det): 85%
Wins, x (random): 4%
Draws: 9%


### Train RL with random

In [5]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_random_o", train_prefix="random")
aix = AgentRandom()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [6]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_random_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 87%
Wins, x (RL): 5%
Draws: 6%


In [7]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentExploreQ(policy_file="policy_random_x", train_prefix="random")
aio = AgentRandom()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [8]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_random_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 77%
Wins, x (random): 18%
Draws: 4%


### Train RL with itself

In [9]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_rl_o", train_prefix="rl")
aix = AgentExploreQ(policy_file="policy_rl_x", train_prefix="rl")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


#### RL vs random

In [10]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentExploreQ(policy_file="policy_rl_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 37%
Wins, x (RL): 44%
Draws: 18%


In [11]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_rl_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 81%
Wins, x (random): 10%
Draws: 7%


### Train against semi-deterministic

In [12]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_semidem_o", train_prefix='semidem')
aix = AgentBlockWin()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [13]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_semidem_o", exp_rate=0.0)
aix = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 88%
Wins, x (random): 4%
Draws: 7%


In [14]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentExploreQ(policy_file="policy_semidem_x", train_prefix='semidem')
aio = AgentRandom()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [15]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentExploreQ(policy_file="policy_semidem_x", exp_rate=0.0)
aio = AgentRandom()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 19%
Wins, x (RL): 62%
Draws: 18%


### Train against RL for longer

In [16]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_rl_med_o", train_prefix="rl_med")
aix = AgentExploreQ(policy_file="policy_rl_med_x", train_prefix="rl_med")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


In [17]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentExploreQ(policy_file="policy_rl_med_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 21%
Wins, x (RL): 65%
Draws: 13%


In [18]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_rl_med_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 89%
Wins, x (random): 4%
Draws: 6%


In [19]:
n = 10000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentExploreQ(policy_file="policy_rl_max_o", train_prefix="rl_max")
aix = AgentExploreQ(policy_file="policy_rl_max_x", train_prefix="rl_max")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


In [20]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = AgentRandom()
aio = AgentExploreQ(policy_file="policy_rl_max_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 94%
Wins, x (random): 0%
Draws: 4%


In [21]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = AgentRandom()
aix = AgentExploreQ(policy_file="policy_rl_max_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 8%
Wins, x (RL): 74%
Draws: 17%
