In [3]:
from joblib import Parallel, delayed
import gym
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from random import choice
import sys

from src.CNN import CNN
from src.MCTS import Monte_Carlo_Tree_Search

from gym_go.gogame import random_action
from copy import deepcopy
import gym

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BOARD_SIZE = 5
print(device)

cuda:0


In [6]:
def mcts_black_random_white(mcts : Monte_Carlo_Tree_Search, go_env: gym.Env):
    go_env.reset()
    done = go_env.done
    turn_nr = 0
    while not done:
        node = mcts.get_move_from_env(go_env)
        _, _, done, _ = go_env.step(node.action)
        turn_nr += 1

        if done:
            continue

        action = random_action(go_env.state())
        _, _, done, _ = go_env.step(action)

        if turn_nr > 300:
            break
    
    return go_env

def random_black_mcts_white(mcts : Monte_Carlo_Tree_Search, go_env: gym.Env):
    go_env.reset()
    done = go_env.done
    turn_nr = 0
    while not done:
        action = random_action(go_env.state())
        _, _, done, _ = go_env.step(action)

        if done:
            continue

        node = mcts.get_move_from_env(go_env)
        _, _, done, _ = go_env.step(node.action)
        turn_nr += 1
        
        if turn_nr > 300:
            break
    
    return go_env

In [9]:
from warnings import filterwarnings
filterwarnings("ignore")

mcts_white_wins, mcts_black_wins, games = 0, 0, 10
for i in range(games):
    mcts_test = Monte_Carlo_Tree_Search(BOARD_SIZE, None)
    env = mcts_black_random_white(mcts_test, deepcopy(mcts_test.env))
    print("Black game", i, "finished", end="\r")
    if env.reward() > 0:
        mcts_black_wins += 1
    env = random_black_mcts_white(mcts_test, deepcopy(mcts_test.env))
    print("White game", i, "finished", end="\r")
    if env.reward() < 0:
        mcts_white_wins += 1

print("Win rate as black:", (mcts_black_wins / games))
print("Win rate as white:", (mcts_white_wins / games))

Win rate as black: 0.6
Win rate as white: 0.1


# Convolutional Neural Network

In [14]:
# def train_model(model, lr, momentum):
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
#     model.to(device)
    
#     for epoch in range(10):
#         mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model) # new tree
#         mcts.run_game() # run a single game to completion
#         x, y = mcts.get_tree_data() # get data
#         running_loss = .0 
#         for i in range(len(x)):
#             inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#             labels = F.softmax(labels)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()
#             running_loss += loss.item()

#             if i % 10 == 9:
#                 print(f'[{epoch:3d}, {i + 1:3d}] loss: {running_loss / 10:.5f}')
#                 running_loss = 0.0

# def train(n_games=1000):

#     model = CNN()
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

#     all_x, all_y = [], []

#     # Simulate N games
#     for _ in range(n_games):
#         mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model)
#         mcts.run(10_000)
#         x, y = mcts.get_tree_data()

#         for i in range(len(x)):
#             inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#             labels = F.softmax(labels)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()
            
#             all_x.append(x[i])
#             all_y.append(y[i])
        
#         # After training, check accuracy against previous data
#         acc_values.append(model.avg_accuracy(all_x, all_y).item())
#         loss_values.append(game_loss/len(x))

#     print(f'''Finished running {n_games} games
#     Time..............: {total_time}
#     Avg. time per game: {total_time/n_games}''')
#     return model, loss_values, acc_values


In [15]:
# model, loss_values, acc_values = train(1000)

# MCTS and CNN combined

In [16]:
# def generate_game(x, y, model):
#     mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model)
#     # mcts.run_game()
#     mcts.run(1000)
#     a, b = mcts.get_tree_data()
#     x.append(a)
#     y.append(b)

# def generate_games(x, y, n_games, model):
#     print(f"Generating {n_games} games")
#     Parallel(n_jobs=10)(delayed(generate_game)(x, y, model) for _ in range(1, n_games))

# def train_model(model, criterion, optimizer, x, y):
#     print("Training model")
#     for i in range(len(x)):
#         inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#         labels = F.softmax(labels)
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

In [17]:
# x, y = [], []
# generate_games(x, y, 1000, None)

# model = CNN()
# model.to(device=device)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# train_model(model, criterion, optimizer, x, y)

# print("Exporting model")
# torch.save(model.state_dict(), f"models/only-mcts.pth")

In [18]:
# # model = CNN()
# # model.load_state_dict(torch.load("models/only-mcts.pth"))
# # model.eval()
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# for i in range(2, 6):
#     # x, y = [], []
#     generate_games(x, y, 1000, model)
#     train_model(model, criterion, optimizer, x, y)
#     torch.save(model.state_dict(), f"models/{i}-times.pth")

In [37]:
players = []
for i in range(5):
    player = CNN()
    player.to(device)
    player.load_state_dict(torch.load(f"models/{i}-times.pth"))
    players.append(player)
print(players)

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [39]:
def play_game_no_render(model : CNN, go_env: gym.Env):
    go_env.reset()
    done = go_env.done
    turn_nr = 0
    while not done:
        action = random_action(go_env.state())
        _, _, done, _ = go_env.step(action)

        if done: break
        print(turn(go_env.state()), end="\r")
        weights = model.forward(go_env.state()).detach().cpu().numpy()
        weights = weights*go_env.valid_moves()
        _, _, done, _ = go_env.step(weights.argmax())

        turn_nr += 1
        if turn_nr > 300: break
    return go_env

model_wins = 0

for i in range(1000):
    env = play_game_no_render(players[4], gym.make('gym_go:go-v0', size=BOARD_SIZE, komi=0, reward_method='heuristic'))
    if not env.done:
        print("Game stopped after 300 turns: \nIt was resigned and is a draw.")
        continue

    if env.reward() > 0: model_wins += 1
    if env.reward() == 0: print("It's a draw!")

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [42]:
print(f"Model won {(model_wins/1000)*100}%")

Model won 16.400000000000002%


In [22]:
# #!/usr/bin/env python
# # coding: utf-8

# # ignore deprecation warnings ('safe' as long as we don't update packages)
# from joblib import Parallel, delayed
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torch.nn.functional as F

# from src.CNN import CNN
# from src.MCTS import Monte_Carlo_Tree_Search

# BOARD_SIZE = 5

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# def generate_game(x, y, model):
#     mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model)
#     # mcts.run_game()
#     mcts.run(1000)
#     a, b = mcts.get_tree_data()
#     x.append(a)
#     y.append(b)

# def generate_games(x, y, n_games, model):
#     print(f"Generating {n_games} games")
#     Parallel(n_jobs=10)(delayed(generate_game)(x, y, model) for _ in range(1, n_games))

# def train_model(model, criterion, optimizer, x, y):
#     print("Training model")
#     for i in range(len(x)):
#         inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#         labels = F.softmax(labels)
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

# model = CNN()   
# model.to(device=device)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# for i in range(5):
#     x, y = [], []
#     if i == 0: generate_games(x, y, 10, None)
#     else: generate_games(x, y, 10, model)
#     train_model(model, criterion, optimizer, x, y)
#     torch.save(model.state_dict(), f"models/{i}-times.pth")

In [31]:
    # go_env.reset()
    # done = go_env.done
    # players = [model1, model2]
    # while not done:
    #     weights = model1.forward(go_env.state()).detach().cpu().numpy()
    #     weights = weights*go_env.valid_moves()
    #     _, _, done, _ = go_env.step(weights.argmax())
    #     if done: continue

    #     weights = model2.forward(go_env.state()).detach().cpu().numpy()
    #     weights = weights*go_env.valid_moves()
    #     _, _, done, _ = go_env.step(weights.argmax())
    #     turn_nr += 1
    #     if turn_nr > 300: break
hundred_games = CNN()
hundred_games.to(device)
hundred_games.load_state_dict(torch.load(f"100-games/0-times.pth"))

def play_model_vs_model_no_render(model1 : CNN, model2 : CNN, max_moves=300):
        go_env = gym.make('gym_go:go-v0', size=BOARD_SIZE, komi=0, reward_method='heuristic')
        go_env.reset()
        
        players = [model1, model2]
        for _ in range(max_moves):
            if go_env.done: break
            for _ in players:
                if go_env.done: break
                # action = random_action(go_env.state())
                # _, _, done, _ = go_env.step(action)
                
                weights = hundred_games.forward(go_env.state()).detach().cpu().numpy()
                weights = weights*go_env.valid_moves()
                go_env.step(weights.argmax())
        return go_env.reward()

In [32]:
bwd = [0]*3
for i in range(1000):
    reward = play_model_vs_model_no_render(players[4], players[0])
    if reward > 0: bwd[0] += 1
    if reward < 0: bwd[1] += 1
    else: bwd[2] += 1
print(f"Black wins: {bwd[0]}\nWhite wins: {bwd[1]}\nDraws: {bwd[2]}")

Black wins: 0
White wins: 1000
Draws: 0


In [25]:
def play_tournament(wins, draws, games_played, b_w):
    # print("Starting a tournament")
    # start1 = time.time()
    for player1_index in range(len(players)):
        for player2_index in range(len(players)):
            if player1_index == player2_index: continue
            games_played[player1_index] += 1
            games_played[player2_index] += 1
            p1, p2 = players[player1_index], players[player2_index]
            # print(f"  Starting a game between {player1_index} and {player2_index}", end="")
            # start = time.time()
            go_env = play_model_vs_model_no_render(p1, p2, gym.make('gym_go:go-v0', size=BOARD_SIZE, komi=0, reward_method='heuristic'))
            # stop = time.time()
            if go_env.reward() > 0:
                wins[player1_index] += 1
                b_w[0] += 1
            elif go_env.reward() < 0:
                wins[player2_index] += 1
                b_w[1] += 1
            else:
                draws[player1_index] += 1
                draws[player2_index] += 1
            # print(f", reward: {go_env.reward()}, time: {stop-start} sec")
    # stop1 = time.time()
    # print(f"Tournament took {stop1-start1}")
b_w = [0]*2
wins, draws, games_played = [0]*len(players), [0]*len(players), [0]*len(players)
# Parallel(n_jobs=10)(delayed(play_tournament)(wins, draws, games_played, b_w) for _ in range(1000))
for game_number in range(1000):
    print("Tournament: ", game_number, end="\r")
    play_tournament(wins, draws, games_played, b_w)

Tournament:  0

TypeError: 'OrderEnforcing' object cannot be interpreted as an integer

In [None]:
print(b_w)
print(wins)
print(games_played)
print(draws)

[0, 2000]
[400, 400, 400, 400, 400]
[800, 800, 800, 800, 800]
[0, 0, 0, 0, 0]


In [None]:
mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, None)
mcts.run(1000)

In [None]:
print(mcts.root)