In [5]:
import torch, gym, time
import numpy as np
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

from src.CNN import CNN

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
def play_game(black_player=None, white_player=None, max_moves=300):
    go_env = gym.make('gym_go:go-v0', size=5, komi=0, reward_method='heuristic')
    go_env.reset()

    if black_player and white_player:
        go_env.step(go_env.uniform_random_action())
        go_env.step(go_env.uniform_random_action())

    for _ in range(max_moves):
        # Player 1's turn
        if go_env.done: break
        if black_player: 
            moves = black_player.forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())

        # Player 2's turn
        if go_env.done: break
        if white_player: 
            moves = white_player.forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())
    return go_env.reward()

In [8]:
def get_win_percent(black_player=None, white_player=None, n_games=1000, max_moves=300):
    black, white, draws = 0, 0, 0
    # results = Parallel(n_jobs=6)(delayed(play_game)(black_player, white_player, max_moves) for _ in range(n_games))
    for _ in range(n_games):
        res = play_game(black_player, white_player, max_moves)
        if res > 0: black += 1
        elif res < 0: white += 1
        else: draws += 1
    return (black/n_games)*100, (white/n_games)*100, (draws/n_games)*100

In [9]:
models = []
for i in range(3, 5):
    model = CNN()
    model.to(device)
    model.load_state_dict(torch.load(f"src/models/1000-games/{i}-times.pth"))
    models.append(model)

In [21]:
results = np.zeros((5, 5), dtype=tuple)

for black_index, black_player in enumerate(models):
    print(f"Model {black_index}:")
    for white_index, white_player in enumerate(models):
        start = time.time()
        (black, white, draw) = get_win_percent(black_player, white_player, 167)
        stop = time.time()
        results[black_index][white_index] = black, white, draw
        print(f"  {white_index}: b={black:2.2f}%, w={white:2.2f}%, d={draw:2.2f}%, took {stop-start:4.2f} seconds")

Model 0:
  0: b=39.10%, w=59.70%, d=1.20%, took 33.70 seconds
  1: b=39.50%, w=58.30%, d=2.20%, took 45.28 seconds
  2: b=36.40%, w=62.10%, d=1.50%, took 36.92 seconds
  3: b=39.70%, w=58.90%, d=1.40%, took 29.38 seconds
  4: b=39.20%, w=59.00%, d=1.80%, took 30.93 seconds
Model 1:
  0: b=40.20%, w=58.20%, d=1.60%, took 30.08 seconds
  1: b=40.70%, w=58.30%, d=1.00%, took 31.88 seconds
  2: b=38.70%, w=60.20%, d=1.10%, took 33.43 seconds
  3: b=38.60%, w=60.70%, d=0.70%, took 30.33 seconds
  4: b=38.60%, w=59.90%, d=1.50%, took 31.57 seconds
Model 2:
  0: b=41.10%, w=57.20%, d=1.70%, took 32.67 seconds
  1: b=42.60%, w=55.90%, d=1.50%, took 32.73 seconds
  2: b=39.60%, w=58.80%, d=1.60%, took 31.60 seconds
  3: b=41.30%, w=57.20%, d=1.50%, took 31.09 seconds
  4: b=40.40%, w=57.70%, d=1.90%, took 32.66 seconds
Model 3:
  0: b=41.80%, w=57.00%, d=1.20%, took 30.90 seconds
  1: b=39.10%, w=58.80%, d=2.10%, took 35.45 seconds
  2: b=38.60%, w=59.70%, d=1.70%, took 31.77 seconds


In [1]:
print(results[0][3])

NameError: name 'results' is not defined