In [1]:
import json
import os
import sys
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
path = str(Path().resolve() / 'agents/Group27/mcts')
if (path not in sys.path):
    sys.path.append(path)

from PolicyModel import PolicyModel

# Data

In [3]:
path = str(Path().resolve())
with open(os.path.join(path, '..', 'data', 'chump-v-chump.json'), 'rb') as f:
    rawData = json.load(f)
print(len(rawData))

10632


In [4]:
for item in rawData.items():
    print(item[0])
    tempMoves = item[1]['moves']
    print(tempMoves)
    break

0 0 0 0 0 0 0 0 0 0 0 
 0 0 0 0 0 0 0 0 0 0 0 
  0 0 0 0 0 0 0 0 0 0 0 
   0 0 0 0 0 0 0 0 0 0 0 
    0 0 0 0 0 0 0 0 0 0 0 
     0 0 0 0 0 0 0 0 0 0 0 
      0 0 0 0 0 0 0 0 0 0 0 
       0 0 0 0 0 0 0 0 0 0 0 
        0 0 0 0 0 0 0 0 0 0 0 
         0 0 0 0 0 0 0 0 0 0 0 
          0 0 0 0 0 0 0 0 0 0 0 

[[0, 1, 0, 1, 1, 2, 0, 3, 1, 0, 1], [2, 0, 3, 1, 1, 0, 1, 0, 0, 1, 1], [2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 2], [1, 0, 0, 1, 1, 0, 3, 3, 0, 0, 2], [1, 1, 0, 1, 0, 3, 0, 1, 1, 1, 0], [1, 0, 2, 0, 1, 1, 1, 2, 1, 0, 1], [0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0], [1, 0, 3, 1, 3, 0, 0, 1, 1, 0, 0], [0, 0, 1, 1, 1, 0, 1, 0, 2, 0, 2], [1, 2, 1, 1, 0, 1, 1, 2, 2, 0, 0], [0, 1, 0, 1, 0, 1, 1, 1, 3, 1, 1]]


In [5]:
tempBoard = "R 0 0 0 R B 0 0 B R 0 \n R 0 B 0 0 B R B 0 R B \n  B 0 R 0 R R B 0 B R 0 \n   0 0 0 0 0 B 0 R 0 B 0 \n    B R 0 0 B R B R R R R \n     R B B B 0 0 R B 0 0 R \n      B R 0 R 0 R R B R B 0 \n       B R R B R B B B 0 R R \n        B R 0 0 B 0 0 0 R B 0 \n         0 B R B B B R B B 0 R \n          B 0 0 0 0 B R B R R R \n"

## State Tensors

In [6]:
def tensorfyBoard(boardString):
    boardString = boardString.replace('R', '1').replace('B', '2')
    boardRows = boardString.strip().split('\n')
    board = [list(map(int, row.strip().split())) for row in boardRows]

    board = torch.tensor(board, dtype=torch.int)

    rStones = (board == 1).int()
    bStones = (board == 2).int()
    nStones = (board == 0).int()

    return torch.stack([rStones, bStones, nStones])

tensorfyBoard(tempBoard)

tensor([[[1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
         [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
         [0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
         [0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1],
         [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
         [0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0],
         [0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1],
         [0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1]],

        [[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1],
         [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
         [0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
         [1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
         [0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0],
         [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]],

        [[0, 1, 1, 1, 0, 0, 

## Move Frequencies

In [7]:
def encodeMoves(moves2D, boardTensor):
    moves = torch.tensor(np.array(moves2D).flatten())

    # smoothing
    moves += boardTensor[2].flatten()

    # normalise
    moves = moves / moves.sum()
    return moves

encodeMoves(tempMoves, tensorfyBoard(tempBoard))

tensor([0.0000, 0.0139, 0.0069, 0.0139, 0.0069, 0.0139, 0.0069, 0.0278, 0.0069,
        0.0000, 0.0139, 0.0139, 0.0069, 0.0208, 0.0139, 0.0139, 0.0000, 0.0069,
        0.0000, 0.0069, 0.0069, 0.0069, 0.0139, 0.0069, 0.0000, 0.0069, 0.0000,
        0.0069, 0.0069, 0.0069, 0.0000, 0.0000, 0.0208, 0.0139, 0.0069, 0.0069,
        0.0139, 0.0139, 0.0000, 0.0278, 0.0208, 0.0069, 0.0000, 0.0208, 0.0069,
        0.0069, 0.0069, 0.0139, 0.0000, 0.0208, 0.0000, 0.0069, 0.0069, 0.0069,
        0.0000, 0.0069, 0.0000, 0.0139, 0.0000, 0.0139, 0.0139, 0.0069, 0.0139,
        0.0139, 0.0069, 0.0069, 0.0000, 0.0069, 0.0139, 0.0000, 0.0139, 0.0000,
        0.0000, 0.0069, 0.0069, 0.0000, 0.0069, 0.0069, 0.0000, 0.0208, 0.0069,
        0.0208, 0.0000, 0.0000, 0.0069, 0.0139, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0139, 0.0139, 0.0069, 0.0069, 0.0139, 0.0069, 0.0139, 0.0000, 0.0208,
        0.0139, 0.0139, 0.0069, 0.0069, 0.0000, 0.0069, 0.0069, 0.0139, 0.0139,
        0.0069, 0.0000, 0.0000, 0.0139, 

## ...

In [8]:
def processDataSet(dataSet):
    boardTensors = []
    moveTensors = []

    for (boardString, data) in dataSet.items():
        boardTensor = tensorfyBoard(boardString)
        moveTensor = encodeMoves(data['moves'], boardTensor)

        boardTensors.append(boardTensor)
        moveTensors.append(moveTensor)

    boards = torch.stack(boardTensors)
    moves = torch.stack(moveTensors)

    return boards, moves

boards, moves = processDataSet(rawData)
print(boards.shape, moves.shape)

torch.Size([10632, 3, 11, 11]) torch.Size([10632, 121])


# Data Augmentation

The Hex board is symmetric. We can use this to augment our data. For each board state, we can generate 6 more board states by rotating the board by 60 degrees each time. This will give us 7 times more data to train on.

In [9]:
# TODO

# Training

In [10]:
model = PolicyModel(boardSize=11)

In [11]:
def trainModel(model, boards, moves, learningRate=1e-3, epochs=10):

    optimiser = optim.Adam(model.parameters(), lr=learningRate)
    criterion = torch.nn.KLDivLoss(reduction='batchmean')

    model.train()

    for epoch in range(epochs):
        optimiser.zero_grad()

        # forward
        logits = model(boards)
        logProbs = torch.nn.functional.log_softmax(logits, dim=1)

        # loss
        loss = criterion(logProbs, moves)

        # backpropagation
        loss.backward()
        optimiser.step()

        print(f'Epoch {epoch+1}: {loss.item()}')

    return model

trainModel(model, boards.float(), moves)

Epoch 1: 0.772494375705719
Epoch 2: 0.7722831964492798
Epoch 3: 0.7722120881080627
Epoch 4: 0.7721202373504639
Epoch 5: 0.7719911336898804
Epoch 6: 0.771724283695221
Epoch 7: 0.7710041999816895
Epoch 8: 0.7701238989830017
Epoch 9: 0.7694451808929443
Epoch 10: 0.7690417170524597


PolicyModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (convp): Conv2d(64, 2, kernel_size=(1, 1), stride=(1, 1))
  (fcp): Linear(in_features=242, out_features=121, bias=True)
)

# Results

In [12]:
def infer(model, boardString):
    model.eval()
    with torch.no_grad():
        boardTensor = tensorfyBoard(boardString)
        logits = model(boardTensor.float().unsqueeze(0))
        probs = torch.nn.functional.softmax(logits, dim=1)

    return probs

infer(model, tempBoard)

tensor([[0.0078, 0.0090, 0.0069, 0.0078, 0.0096, 0.0084, 0.0092, 0.0083, 0.0093,
         0.0093, 0.0097, 0.0090, 0.0095, 0.0071, 0.0091, 0.0083, 0.0077, 0.0092,
         0.0070, 0.0082, 0.0092, 0.0086, 0.0095, 0.0079, 0.0071, 0.0090, 0.0089,
         0.0083, 0.0082, 0.0081, 0.0088, 0.0077, 0.0079, 0.0091, 0.0094, 0.0092,
         0.0086, 0.0075, 0.0080, 0.0077, 0.0092, 0.0101, 0.0073, 0.0087, 0.0074,
         0.0082, 0.0067, 0.0089, 0.0073, 0.0093, 0.0093, 0.0087, 0.0079, 0.0083,
         0.0091, 0.0073, 0.0072, 0.0081, 0.0087, 0.0080, 0.0089, 0.0072, 0.0077,
         0.0075, 0.0084, 0.0079, 0.0085, 0.0084, 0.0076, 0.0076, 0.0099, 0.0095,
         0.0074, 0.0077, 0.0088, 0.0104, 0.0099, 0.0073, 0.0091, 0.0079, 0.0082,
         0.0077, 0.0072, 0.0088, 0.0086, 0.0080, 0.0085, 0.0086, 0.0091, 0.0077,
         0.0070, 0.0077, 0.0068, 0.0075, 0.0096, 0.0087, 0.0064, 0.0089, 0.0076,
         0.0094, 0.0087, 0.0089, 0.0084, 0.0094, 0.0075, 0.0083, 0.0074, 0.0088,
         0.0069, 0.0077, 0.0

In [13]:
torch.save(model.state_dict(), './models/test.pth')