In [82]:
from src.CNN import CNN
import torch, gym

In [83]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

from warnings import filterwarnings
filterwarnings("ignore")

In [88]:
def play_game(players, max_moves):
    go_env = gym.make('gym_go:go-v0', size=5, komi=0, reward_method='heuristic')
    go_env.reset()

    go_env.step(go_env.uniform_random_action())
    go_env.step(go_env.uniform_random_action())

    for _ in range(max_moves):
        # Player 1's turn
        if go_env.done: break
        if players[0]: 
            moves = players[0].forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())

        # Player 2's turn
        if go_env.done: break
        if players[1]: 
            moves = players[1].forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())
    # go_env.render("terminal")

    return go_env.reward()

def get_winrate(players, max_moves=300, n_games=1000):
    wins = [0.]*2
    for i in range(n_games):
        print(f"Playing game {i}", end="\r")
        reward = play_game(players, max_moves)
        if reward > 0: wins[0] += 1.
        elif reward < 0: wins[1] += 1.
    wins[0] = (wins[0]/n_games) * 100
    wins[1] = (wins[1]/n_games) * 100
    return wins


In [85]:
models = []
for i in range(5):
    model = CNN()
    model.to(device)
    model.load_state_dict(torch.load(f"models/{i}-times.pth"))
    models.append(model)

In [86]:
for i in range(len(models)):
    black = get_winrate([models[i], None], n_games=100)
    white = get_winrate([None, models[i]], n_games=100)
    print(f"Model {i}: black = {black[0]:3.4f}%, white = {white[1]:3.4f}%")

	0 1 2 3 4 e 0
0	╔═╤═╤═╤═╗
1	╟─┼─┼─┼─╢
2	╟─┼─┼─┼─╢
3	╟─┼─┼─┼─╢
4	╚═╧═╧═╧═╝
	Turn: BLACK, Game State (ONGOING|PASSED|END): END
	Black Area: 0, White Area: 0

	0 1 2 3 4 e 1
0	●═●═○═○═○
1	●─○─○─┼─○
2	●─●─●─●─●
3	●─●─●─○─○
4	●═╧═○═○═○
	Turn: BLACK, Game State (ONGOING|PASSED|END): END
	Black Area: 11, White Area: 12

	0 1 2 3 4 e 2
0	○═○═○═○═○
1	╟─○─○─○─●
2	○─○─○─○─╢
3	○─○─○─○─○
4	○═╧═○═○═○
	Turn: WHITE, Game State (ONGOING|PASSED|END): END
	Black Area: 23, White Area: 1

	0 1 2 3 4 e 3
0	○═○═○═○═○
1	╟─○─○─○─○
2	○─○─○─○─╢
3	○─○─○─○─○
4	●═╧═○═○═○
	Turn: WHITE, Game State (ONGOING|PASSED|END): END
	Black Area: 23, White Area: 1

	0 1 2 3 4 e 4
0	○═○═○═○═●
1	╟─○─●─●─●
2	○─○─●─●─●
3	○─┼─●─○─○
4	●═●═●═○═╝
	Turn: WHITE, Game State (ONGOING|PASSED|END): END
	Black Area: 13, White Area: 11

	0 1 2 3 4 e 5
0	○═○═○═○═○
1	╟─○─○─○─○
2	○─○─○─○─╢
3	○─●─○─○─○
4	○═╧═○═○═○
	Turn: WHITE, Game State (ONGOING|PASSED|END): END
	Black Area: 23, White Area: 1

	0 1 2 3 4 e 6
0	○═○═○═○═○
1	╟─○─○─○─○
2	○─○─○─○─○


KeyboardInterrupt: 

In [92]:
import random

for i in range(len(models)):
    black = get_winrate([models[i], models[4]], n_games=100)
    white = get_winrate([models[i], models[4]], n_games=100)
    print(f"Model {i}: black = {black[0]:3.4f}%, white = {white[1]:3.4f}%")

Model 0: black = 37.0000%, white = 56.0000%
Model 1: black = 34.0000%, white = 60.0000%
Model 2: black = 38.0000%, white = 59.0000%
Model 3: black = 49.0000%, white = 53.0000%
Playing game 57

KeyboardInterrupt: 

In [None]:
wins = get_winrate([models[4], None], n_games=10_000)
print("Model as black:")
print(f"  Black won {wins[0]:3.4f}%")
print(f"  White won {wins[1]:3.4f}%")

wins = get_winrate([None, models[4]], n_games=10_000)
print("Model as white:")
print(f"  Black won {wins[0]:3.4f}%")
print(f"  White won {wins[1]:3.4f}%")

Playing game 1902

KeyboardInterrupt: 

In [95]:
weak_model = CNN()
weak_model.to(device)
weak_model.load_state_dict(torch.load(f"100-games/0-times.pth"))

black_wins = 0
white_wins = 0
for _ in range(1000):
    reward = play_game([weak_model, None], 300)
    if reward > 0: black_wins += 1
    reward = play_game([None, weak_model], 300)
    if reward < 0: white_wins += 1
print(f"Black winrate: {black_wins/1000}")
print(f"White winrate: {white_wins/1000}")

Black winrate: 0.0
White winrate: 0.0
