# Testing the different models produced in this project

In [2]:
from joblib import Parallel, delayed
import gym
import torch

from src.CNN import CNN
from src.MCTS import Monte_Carlo_Tree_Search

from gym_go.gogame import random_action, turn
from copy import deepcopy
import gym

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BOARD_SIZE = 5
from warnings import filterwarnings
filterwarnings("ignore")

### Testing win rates for MCTS vs Random

In [3]:
def mcts_black_random_white(mcts : Monte_Carlo_Tree_Search, go_env: gym.Env):
    go_env.reset()
    done = go_env.done
    turn_nr = 0
    while not done:
        node = mcts.get_move_from_env(go_env)
        _, _, done, _ = go_env.step(node.action)
        turn_nr += 1

        if done:
            continue

        action = random_action(go_env.state())
        _, _, done, _ = go_env.step(action)

        if turn_nr > 300:
            break
    
    return go_env

def random_black_mcts_white(mcts : Monte_Carlo_Tree_Search, go_env: gym.Env):
    go_env.reset()
    done = go_env.done
    turn_nr = 0
    while not done:
        action = random_action(go_env.state())
        _, _, done, _ = go_env.step(action)

        if done:
            continue

        node = mcts.get_move_from_env(go_env)
        _, _, done, _ = go_env.step(node.action)
        turn_nr += 1
        
        if turn_nr > 300:
            break
    
    return go_env

In [4]:
def play_black_game():
    mcts_test = Monte_Carlo_Tree_Search(BOARD_SIZE, None)
    env = mcts_black_random_white(mcts_test, deepcopy(mcts_test.env))
    if env.reward() > 0:
        return 1
    return 0

def play_white_game():
    mcts_test = Monte_Carlo_Tree_Search(BOARD_SIZE, None)
    env = random_black_mcts_white(mcts_test, deepcopy(mcts_test.env))
    if env.reward() < 0:
        return 1
    return 0

games = 100

mcts_black_wins = Parallel(n_jobs=4)(delayed(play_black_game)() for _ in range(games))
print("Win rate as black:", ((sum(mcts_black_wins) / games) * 100), "%")
mcts_white_wins = Parallel(n_jobs=4)(delayed(play_white_game)() for _ in range(games))
print("Win rate as white:", ((sum(mcts_white_wins) / games) * 100), "%")

Win rate as black: 0.0 %
Win rate as white: 90.0 %


# Testing Convolutional Neural Network vs random

In [4]:
players = []
for i in range(5):
    player = CNN()
    player.to(device)
    player.load_state_dict(torch.load(f"src/models/1000-games/{i}-times.pth"))
    players.append(player)
print(players)

[CNN(
  (conv1): Conv2d(6, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=128, out_features=26, bias=True)
), CNN(
  (conv1): Conv2d(6, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=128, out_features=26, bias=True)
), CNN(
  (conv1): Conv2d(6, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=128, out_features=26, bias=True)
), CNN(
  (conv1): Conv2d(6, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1

In [5]:
def play_game_no_render(model : CNN, go_env: gym.Env):
    go_env.reset()
    done = go_env.done
    turn_nr = 0
    while not done:
        action = random_action(go_env.state())
        _, _, done, _ = go_env.step(action)

        if done: break
        weights = model.forward(go_env.state()).detach().cpu().numpy()
        weights = weights*go_env.valid_moves()
        _, _, done, _ = go_env.step(weights.argmax())

        turn_nr += 1
        if turn_nr > 300: break
    return go_env

model_wins = 0

for i in range(1000):
    env = play_game_no_render(players[4], gym.make('gym_go:go-v0', size=BOARD_SIZE, komi=0, reward_method='heuristic'))
    if not env.done:
        print("Game stopped after 300 turns: \nIt was resigned and is a draw.")
        continue

    if env.reward() > 0: model_wins += 1
    if env.reward() == 0: print("It's a draw!")

1

KeyboardInterrupt: 

In [None]:
print(f"Model won {(model_wins/1000)*100}%")

Model won 16.400000000000002%


# Testing model training levels vs each other

In [None]:
# #!/usr/bin/env python
# # coding: utf-8

# # ignore deprecation warnings ('safe' as long as we don't update packages)
# from joblib import Parallel, delayed
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torch.nn.functional as F

# from src.CNN import CNN
# from src.MCTS import Monte_Carlo_Tree_Search

# BOARD_SIZE = 5

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# def generate_game(x, y, model):
#     mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model)
#     # mcts.run_game()
#     mcts.run(1000)
#     a, b = mcts.get_tree_data()
#     x.append(a)
#     y.append(b)

# def generate_games(x, y, n_games, model):
#     print(f"Generating {n_games} games")
#     Parallel(n_jobs=10)(delayed(generate_game)(x, y, model) for _ in range(1, n_games))

# def train_model(model, criterion, optimizer, x, y):
#     print("Training model")
#     for i in range(len(x)):
#         inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#         labels = F.softmax(labels)
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

# model = CNN()   
# model.to(device=device)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# for i in range(5):
#     x, y = [], []
#     if i == 0: generate_games(x, y, 10, None)
#     else: generate_games(x, y, 10, model)
#     train_model(model, criterion, optimizer, x, y)
#     torch.save(model.state_dict(), f"models/{i}-times.pth")

In [None]:
    # go_env.reset()
    # done = go_env.done
    # players = [model1, model2]
    # while not done:
    #     weights = model1.forward(go_env.state()).detach().cpu().numpy()
    #     weights = weights*go_env.valid_moves()
    #     _, _, done, _ = go_env.step(weights.argmax())
    #     if done: continue

    #     weights = model2.forward(go_env.state()).detach().cpu().numpy()
    #     weights = weights*go_env.valid_moves()
    #     _, _, done, _ = go_env.step(weights.argmax())
    #     turn_nr += 1
    #     if turn_nr > 300: break
hundred_games = CNN()
hundred_games.to(device)
hundred_games.load_state_dict(torch.load(f"100-games/0-times.pth"))

def play_model_vs_model_no_render(model1 : CNN, model2 : CNN, max_moves=300):
        go_env = gym.make('gym_go:go-v0', size=BOARD_SIZE, komi=0, reward_method='heuristic')
        go_env.reset()
        
        players = [model1, model2]
        for _ in range(max_moves):
            if go_env.done: break
            for _ in players:
                if go_env.done: break
                # action = random_action(go_env.state())
                # _, _, done, _ = go_env.step(action)
                
                weights = hundred_games.forward(go_env.state()).detach().cpu().numpy()
                weights = weights*go_env.valid_moves()
                go_env.step(weights.argmax())
        return go_env.reward()

In [None]:
bwd = [0]*3
for i in range(1000):
    reward = play_model_vs_model_no_render(players[4], players[0])
    if reward > 0: bwd[0] += 1
    if reward < 0: bwd[1] += 1
    else: bwd[2] += 1
print(f"Black wins: {bwd[0]}\nWhite wins: {bwd[1]}\nDraws: {bwd[2]}")

Black wins: 0
White wins: 1000
Draws: 0


In [None]:
def play_tournament(wins, draws, games_played, b_w):
    # print("Starting a tournament")
    # start1 = time.time()
    for player1_index in range(len(players)):
        for player2_index in range(len(players)):
            if player1_index == player2_index: continue
            games_played[player1_index] += 1
            games_played[player2_index] += 1
            p1, p2 = players[player1_index], players[player2_index]
            # print(f"  Starting a game between {player1_index} and {player2_index}", end="")
            # start = time.time()
            go_env = play_model_vs_model_no_render(p1, p2, gym.make('gym_go:go-v0', size=BOARD_SIZE, komi=0, reward_method='heuristic'))
            # stop = time.time()
            if go_env.reward() > 0:
                wins[player1_index] += 1
                b_w[0] += 1
            elif go_env.reward() < 0:
                wins[player2_index] += 1
                b_w[1] += 1
            else:
                draws[player1_index] += 1
                draws[player2_index] += 1
            # print(f", reward: {go_env.reward()}, time: {stop-start} sec")
    # stop1 = time.time()
    # print(f"Tournament took {stop1-start1}")
b_w = [0]*2
wins, draws, games_played = [0]*len(players), [0]*len(players), [0]*len(players)
# Parallel(n_jobs=10)(delayed(play_tournament)(wins, draws, games_played, b_w) for _ in range(1000))
for game_number in range(1000):
    print("Tournament: ", game_number, end="\r")
    play_tournament(wins, draws, games_played, b_w)

Tournament:  0

TypeError: 'OrderEnforcing' object cannot be interpreted as an integer

In [None]:
print(b_w)
print(wins)
print(games_played)
print(draws)

[0, 2000]
[400, 400, 400, 400, 400]
[800, 800, 800, 800, 800]
[0, 0, 0, 0, 0]


In [None]:
mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, None)
mcts.run(1000)

In [None]:
print(mcts.root)