In [1]:
import numpy as np
import random

from ttt import TicTacToe, TRand, tAtIt, RLttt

### Random vs random

In [2]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = TRand()
aix = TRand()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o: {}%".format(100*o_win/total))
print("Wins, x: {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o: 57%
Wins, x: 29%
Draws: 12%


### Random vs sem-deterministic

In [3]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = TRand()
aix = tAtIt()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (semi-det): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 14%
Wins, x (semi-det): 62%
Draws: 22%


In [4]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = tAtIt()
aix = TRand()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (semi-det): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (semi-det): 86%
Wins, x (random): 3%
Draws: 9%


### Train RL with itself

In [5]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = RLttt(policy_file="policy_rl_o", train_prefix="rl")
aix = RLttt(policy_file="policy_rl_x", train_prefix="rl")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


#### RL vs random

In [6]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = TRand()
aix = RLttt(policy_file="policy_rl_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 37%
Wins, x (RL): 48%
Draws: 13%


In [7]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = TRand()
aio = RLttt(policy_file="policy_rl_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 81%
Wins, x (random): 9%
Draws: 9%


### Train against semi-deterministic

In [8]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = RLttt(policy_file="policy_semidem_o", train_prefix='semidem')
aix = tAtIt()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [9]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = RLttt(policy_file="policy_semidem_o", exp_rate=0.0)
aix = TRand()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 70%
Wins, x (random): 15%
Draws: 14%


In [10]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = RLttt(policy_file="policy_semidem_x", train_prefix='semidem')
aio = tAtIt()
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.


In [11]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = RLttt(policy_file="policy_semidem_x", exp_rate=0.0)
aio = TRand()

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 20%
Wins, x (RL): 48%
Draws: 31%


### Train against RL for longer

In [12]:
n = 3000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = RLttt(policy_file="policy_rl_med_o", train_prefix="rl_med")
aix = RLttt(policy_file="policy_rl_med_x", train_prefix="rl_med")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


In [13]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = TRand()
aix = RLttt(policy_file="policy_rl_med_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 22%
Wins, x (RL): 60%
Draws: 16%


In [14]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = TRand()
aio = RLttt(policy_file="policy_rl_med_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 89%
Wins, x (random): 5%
Draws: 5%


In [15]:
n = 10000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = RLttt(policy_file="policy_rl_max_o", train_prefix="rl_max")
aix = RLttt(policy_file="policy_rl_max_x", train_prefix="rl_max")
tictactoe.train(aio=aio, aix=aix, turns=n)

File not available. Starting from the beginning.
File not available. Starting from the beginning.


In [16]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aix = TRand()
aio = RLttt(policy_file="policy_rl_max_o", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (RL): {}%".format(100*o_win/total))
print("Wins, x (random): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (RL): 98%
Wins, x (random): 0%
Draws: 1%


In [17]:
n = 1000

tictactoe = TicTacToe(board_size=3, win_len=3)
aio = TRand()
aix = RLttt(policy_file="policy_rl_max_x", train_prefix=None, exp_rate=0.0)

o_win = 0
x_win = 0
draws = 0

for i in range(n):
    w = tictactoe.play(aio=aio, aix=aix, gui_on=False)
    if w == 1:
        x_win += 1
    elif w == -1:
        o_win += 1
    elif w == -2:
        draws += 1
    else:
        print('Error!')

total = o_win + x_win + draws
print("Wins, o (random): {}%".format(100*o_win/total))
print("Wins, x (RL): {}%".format(100*x_win/total))
print("Draws: {}%".format(100*draws/total))

Wins, o (random): 5%
Wins, x (RL): 80%
Draws: 14%
