In [5]:
import torch, gym, time
import numpy as np
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

from src.CNN import CNN

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
def play_game(black_player=None, white_player=None, max_moves=300):
    go_env = gym.make('gym_go:go-v0', size=5, komi=0, reward_method='heuristic')
    go_env.reset()

    if black_player and white_player:
        go_env.step(go_env.uniform_random_action())
        go_env.step(go_env.uniform_random_action())

    for _ in range(max_moves):
        # Player 1's turn
        if go_env.done: break
        if black_player: 
            moves = black_player.forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())

        # Player 2's turn
        if go_env.done: break
        if white_player: 
            moves = white_player.forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())
    return go_env.reward()

In [8]:
def get_win_percent(black_player=None, white_player=None, n_games=1000, max_moves=300):
    black, white, draws = 0, 0, 0
    # results = Parallel(n_jobs=6)(delayed(play_game)(black_player, white_player, max_moves) for _ in range(n_games))
    for _ in range(n_games):
        res = play_game(black_player, white_player, max_moves)
        if res > 0: black += 1
        elif res < 0: white += 1
        else: draws += 1
    return (black/n_games)*100, (white/n_games)*100, (draws/n_games)*100

In [9]:
models = []
for i in range(3, 5):
    model = CNN()
    model.to(device)
    model.load_state_dict(torch.load(f"src/models/1000-games/{i}-times.pth"))
    models.append(model)

In [10]:
results = np.zeros((5, 5), dtype=tuple)

for black_index, black_player in enumerate(models):
    print(f"Model {black_index}:")
    for white_index, white_player in enumerate(models):
        start = time.time()
        (black, white, draw) = get_win_percent(black_player, white_player, 167)
        stop = time.time()
        results[black_index][white_index] = black, white, draw
        print(f"  {white_index}: b={black:2.2f}%, w={white:2.2f}%, d={draw:2.2f}%, took {stop-start:4.2f} seconds")

Model 0:


  f"It seems a Box observation space is an image but the `dtype` is not `np.uint8`, actual type: {observation_space.dtype}. "
  "It seems a Box observation space is an image but the upper and lower bounds are not in [0, 255]. "
  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Future gym versions will require that `Env.reset` can be passed a `seed` instead of using `Env.seed` for resetting the environment random number generator."
  "Future gym versions will require that `Env.reset` can be passed `return_info` to return information from the environment resetting."
  "Future gym versions will require that `Env.reset` can be passed `options` to allow th

  0: b=36.53%, w=62.87%, d=0.60%, took 25.98 seconds
  1: b=38.92%, w=59.28%, d=1.80%, took 20.66 seconds
  2: b=32.93%, w=65.87%, d=1.20%, took 19.97 seconds
  3: b=44.31%, w=54.49%, d=1.20%, took 21.01 seconds
  4: b=37.13%, w=62.87%, d=0.00%, took 20.42 seconds
Model 1:
  0: b=41.92%, w=58.08%, d=0.00%, took 20.19 seconds


In [1]:
print(results[0][3])

NameError: name 'results' is not defined