In [1]:
import torch, gym
from joblib import Parallel, delayed
import pandas as pd

from src.CNN import CNN

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from warnings import filterwarnings
filterwarnings("ignore")

In [3]:
def play_game(black_player=None, white_player=None, max_moves=300):
    go_env = gym.make('gym_go:go-v0', size=5, komi=0, reward_method='heuristic')
    go_env.reset()

    if black_player and white_player:
        go_env.step(go_env.uniform_random_action())
        go_env.step(go_env.uniform_random_action())

    for _ in range(max_moves):
        # Player 1's turn
        if go_env.done: break
        if black_player: 
            moves = black_player.forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())

        # Player 2's turn
        if go_env.done: break
        if white_player: 
            moves = white_player.forward(go_env.state()).detach().cpu().numpy() * go_env.valid_moves()
            go_env.step(moves.argmax())
        else: go_env.step(go_env.uniform_random_action())
    return go_env.reward()

In [4]:
def get_win_percent(black_player=None, white_player=None, n_games=1000, max_moves=300):
    black, white, draws = 0, 0, 0
    results = Parallel(n_jobs=6)(delayed(play_game)(black_player, white_player, max_moves) for _ in range(n_games))
    for i in range(n_games):
        res = results[i]
        if res > 0: black += 1
        elif res < 0: white += 1
        else: draws += 1
    return (black/n_games)*100, (white/n_games)*100, (draws/n_games)*100

In [5]:
models = []
for i in range(5):
    model = CNN()
    model.to(device)
    model.load_state_dict(torch.load(f"src/models/1000-games/{i}-times.pth"))
    models.append(model)

In [6]:
results = Parallel(n_jobs=6)(delayed(get_win_percent)(b, w) for b in models for w in models)

In [None]:
brandom = Parallel(n_jobs=3)(delayed(get_win_percent)(b) for b in models)
wrandom = Parallel(n_jobs=3)(delayed(get_win_percent)(None, w) for w in models)

In [7]:
array = []
for res in results:
    array.append([res[0],res[1],res[2]])

df = pd.DataFrame(array)
df.to_excel("winrates.xlsx")

[(39.1, 58.9, 2.0), (38.800000000000004, 59.199999999999996, 2.0), (39.900000000000006, 58.699999999999996, 1.4000000000000001), (41.0, 56.699999999999996, 2.3), (38.1, 59.4, 2.5), (39.0, 59.4, 1.6), (36.0, 62.8, 1.2), (39.6, 58.199999999999996, 2.1999999999999997), (40.8, 57.099999999999994, 2.1), (37.4, 61.0, 1.6), (40.9, 56.99999999999999, 2.1), (37.7, 60.9, 1.4000000000000001), (41.199999999999996, 56.39999999999999, 2.4), (39.1, 59.099999999999994, 1.7999999999999998), (39.2, 59.199999999999996, 1.6), (37.7, 60.199999999999996, 2.1), (40.9, 56.89999999999999, 2.1999999999999997), (40.699999999999996, 57.4, 1.9), (38.3, 59.4, 2.3), (40.8, 56.89999999999999, 2.3), (40.400000000000006, 57.699999999999996, 1.9), (41.099999999999994, 57.4, 1.5), (44.2, 53.300000000000004, 2.5), (39.5, 57.99999999999999, 2.5), (36.8, 60.9, 2.3)]


In [1]:
prev_results = [(39.1, 58.9, 2.0), 
                (38.800000000000004, 59.199999999999996, 2.0), 
                (39.900000000000006, 58.699999999999996, 1.4000000000000001), 
                (41.0, 56.699999999999996, 2.3), 
                (38.1, 59.4, 2.5), 
                (39.0, 59.4, 1.6), 
                (36.0, 62.8, 1.2), 
                (39.6, 58.199999999999996, 2.1999999999999997), 
                (40.8, 57.099999999999994, 2.1), 
                (37.4, 61.0, 1.6), 
                (40.9, 56.99999999999999, 2.1), 
                (37.7, 60.9, 1.4000000000000001), 
                (41.199999999999996, 56.39999999999999, 2.4), 
                (39.1, 59.099999999999994, 1.7999999999999998), 
                (39.2, 59.199999999999996, 1.6), 
                (37.7, 60.199999999999996, 2.1), 
                (40.9, 56.89999999999999, 2.1999999999999997), 
                (40.699999999999996, 57.4, 1.9), 
                (38.3, 59.4, 2.3), 
                (40.8, 56.89999999999999, 2.3), 
                (40.400000000000006, 57.699999999999996, 1.9), 
                (41.099999999999994, 57.4, 1.5), 
                (44.2, 53.300000000000004, 2.5), 
                (39.5, 57.99999999999999, 2.5), 
                (36.8, 60.9, 2.3)]

for i in range(5):
    print(f"Model {i} playing as black vs:")
    result_slice = prev_results[i*5:(i+1)*5]
    for j in range(5):
        print(f"    Model {j}: Black winrate: {round(result_slice[j][0], 2)}, White winrate: {round(result_slice[j][1], 2)}, Drawrate: {round(result_slice[j][2], 2)}")

Model 0 playing as black vs:
    Model 0: Black winrate: 39.1, White winrate: 58.9, Drawrate: 2.0
    Model 1: Black winrate: 38.8, White winrate: 59.2, Drawrate: 2.0
    Model 2: Black winrate: 39.9, White winrate: 58.7, Drawrate: 1.4
    Model 3: Black winrate: 41.0, White winrate: 56.7, Drawrate: 2.3
    Model 4: Black winrate: 38.1, White winrate: 59.4, Drawrate: 2.5
Model 1 playing as black vs:
    Model 0: Black winrate: 39.0, White winrate: 59.4, Drawrate: 1.6
    Model 1: Black winrate: 36.0, White winrate: 62.8, Drawrate: 1.2
    Model 2: Black winrate: 39.6, White winrate: 58.2, Drawrate: 2.2
    Model 3: Black winrate: 40.8, White winrate: 57.1, Drawrate: 2.1
    Model 4: Black winrate: 37.4, White winrate: 61.0, Drawrate: 1.6
Model 2 playing as black vs:
    Model 0: Black winrate: 40.9, White winrate: 57.0, Drawrate: 2.1
    Model 1: Black winrate: 37.7, White winrate: 60.9, Drawrate: 1.4
    Model 2: Black winrate: 41.2, White winrate: 56.4, Drawrate: 2.4
    Model 3: Bla