### TODO
- Factor out classes into .py-files to make notebook more concise
- Find package versions where we don't need to filter warnings (older packages)
- Set up pseudocode for the tournament;
    - players, saving models, etc.

In [2]:
# ignore deprecation warnings ('safe' as long as we don't update packages)
from warnings import filterwarnings
filterwarnings("ignore")

from joblib import Parallel, delayed

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt

from src.CNN import CNN
from src.MCTS import Monte_Carlo_Tree_Search

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BOARD_SIZE = 5

In [4]:
# def play_game_no_render(mcts : Monte_Carlo_Tree_Search, go_env: gym.Env):
#     go_env.reset()
#     done = go_env.done
#     turn_nr = 0
#     while not done:
#         action = random_action(go_env.state())
#         _, _, done, _ = go_env.step(action)

#         if done:
#             continue

#         node = mcts.get_move_from_env(go_env)
#         _, _, done, _ = go_env.step(node.action)
#         turn_nr += 1
#         if turn_nr > 300:
#             break
    
#     return go_env

In [5]:
# mcts_test = Monte_Carlo_Tree_Search(BOARD_SIZE, None)
# _ = mcts_test.run_game()

In [6]:
# for i in range(10):
#     env = play_game_no_render(mcts_test, copy.deepcopy(mcts_test.env))
#     if not env.done:
#         print("Game stopped after 300 turns: \nIt was resigned and is a draw.")
#         continue
        
#     if env.reward() < 0:
#         print("White won!")
#     if env.reward() > 0:
#         print("Black won!")
#     if env.reward() == 0:
#         print("It's a draw!")

# Convolutional Neural Network

In [7]:
# def train_model(model, lr, momentum):
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
#     model.to(device)
    
#     for epoch in range(10):
#         mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model) # new tree
#         mcts.run_game() # run a single game to completion
#         x, y = mcts.get_tree_data() # get data
#         running_loss = .0 
#         for i in range(len(x)):
#             inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#             labels = F.softmax(labels)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()
#             running_loss += loss.item()

#             if i % 10 == 9:
#                 print(f'[{epoch:3d}, {i + 1:3d}] loss: {running_loss / 10:.5f}')
#                 running_loss = 0.0

# def train(n_games=1000):

#     model = CNN()
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

#     all_x, all_y = [], []

#     # Simulate N games
#     for _ in range(n_games):
#         mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model)
#         mcts.run(10_000)
#         x, y = mcts.get_tree_data()

#         for i in range(len(x)):
#             inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
#             labels = F.softmax(labels)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()
            
#             all_x.append(x[i])
#             all_y.append(y[i])
        
#         # After training, check accuracy against previous data
#         acc_values.append(model.avg_accuracy(all_x, all_y).item())
#         loss_values.append(game_loss/len(x))

#     print(f'''Finished running {n_games} games
#     Time..............: {total_time}
#     Avg. time per game: {total_time/n_games}''')
#     return model, loss_values, acc_values


In [8]:
# model, loss_values, acc_values = train(1000)

# MCTS and CNN combined

In [9]:
def generate_game(x, y, model):
    mcts = Monte_Carlo_Tree_Search(BOARD_SIZE, model)
    mcts.run_game()
    # mcts.run(10_000)
    a, b = mcts.get_tree_data()
    x.append(a)
    y.append(b)

def generate_games(x, y, n_games, model):
    print(f"Generating {n_games} games")
    Parallel(n_jobs=10)(delayed(generate_game)(x, y, model) for _ in range(1, n_games))

def train_model(model, criterion, optimizer, x, y):
    print("Training model")
    for i in range(len(x)):
        inputs, labels = torch.tensor(x[i], device=device), torch.tensor(y[i], device=device)
        labels = F.softmax(labels)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

In [10]:
x, y = [], []
generate_games(x, y, 1000, None)

model = CNN()
model.to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
train_model(model, criterion, optimizer, x, y)

print("Exporting model")
torch.save(model.state_dict(), f"models/only-mcts.pth")

In [11]:
# model = CNN()
# model.load_state_dict(torch.load("models/only-mcts.pth"))
# model.eval()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
for i in range(2, 6):
    x, y = [], []
    generate_games(x, y, 1000, model)
    train_model(model, criterion, optimizer, x, y)
    torch.save(model.state_dict(), f"models/{i}-times.pth")

Generating 1000 games
Training model
Generating 1000 games
Training model
Generating 1000 games
Training model
Generating 1000 games
Training model
