In [1]:
import json
import os
import sys
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
path = str(Path().resolve() / 'agents/Group27/mcts')
if (path not in sys.path):
    sys.path.append(path)

from PolicyModel import PolicyModel

# Data

In [3]:
def loadData(dataFile):
    path = str(Path().resolve())
    with open(os.path.join(path, '..', 'data', f'{dataFile}.json'), 'rb') as f:
        rawData = json.load(f)
    print(len(rawData))
    return rawData

rawExpertData = loadData('expert-v-expert')
rawSelfPlayData = loadData('chump-v-chump')

5998
105501


In [4]:
for item in rawSelfPlayData.items():
    print(item[0])
    tempMoves = item[1]['moves']
    print(tempMoves)
    break

0 0 0 0 0 0 0 0 0 0 0 
 0 0 0 0 0 0 0 0 0 0 0 
  0 0 0 0 0 0 0 0 0 0 0 
   0 0 0 0 0 0 0 0 0 0 0 
    0 0 0 0 0 0 0 0 0 0 0 
     0 0 0 0 0 0 0 0 0 0 0 
      0 0 0 0 0 0 0 0 0 0 0 
       0 0 0 0 0 0 0 0 0 0 0 
        0 0 0 0 0 0 0 0 0 0 0 
         0 0 0 0 0 0 0 0 0 0 0 
          0 0 0 0 0 0 0 0 0 0 0 

[[6, 8, 9, 8, 7, 8, 8, 5, 6, 7, 12], [6, 5, 7, 9, 8, 3, 13, 10, 10, 8, 10], [9, 8, 11, 5, 11, 12, 10, 8, 11, 8, 10], [5, 9, 6, 7, 7, 13, 4, 7, 8, 10, 9], [8, 6, 7, 6, 10, 6, 6, 13, 12, 8, 6], [4, 10, 10, 6, 12, 6, 11, 7, 4, 8, 9], [5, 8, 7, 5, 5, 14, 9, 13, 9, 19, 6], [15, 6, 7, 9, 10, 12, 11, 7, 6, 7, 8], [10, 7, 6, 9, 2, 12, 4, 12, 10, 6, 5], [7, 5, 10, 10, 9, 8, 9, 6, 7, 9, 12], [12, 11, 11, 7, 2, 6, 8, 10, 7, 7, 10]]


In [5]:
tempBoard = "R 0 0 0 R B 0 0 B R 0 \n R 0 B 0 0 B R B 0 R B \n  B 0 R 0 R R B 0 B R 0 \n   0 0 0 0 0 B 0 R 0 B 0 \n    B R 0 0 B R B R R R R \n     R B B B 0 0 R B 0 0 R \n      B R 0 R 0 R R B R B 0 \n       B R R B R B B B 0 R R \n        B R 0 0 B 0 0 0 R B 0 \n         0 B R B B B R B B 0 R \n          B 0 0 0 0 B R B R R R \n"

## State Tensors

In [6]:
def tensorfyBoard(boardString):
    boardString = boardString.replace('R', '1').replace('B', '2')
    boardRows = boardString.strip().split('\n')
    board = [list(map(int, row.strip().split())) for row in boardRows]

    board = torch.tensor(board, dtype=torch.int)

    rStones = (board == 1).int()
    bStones = (board == 2).int()
    nStones = (board == 0).int()

    return torch.stack([rStones, bStones, nStones])

tensorfyBoard(tempBoard)

tensor([[[1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
         [1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
         [0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0],
         [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
         [0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1],
         [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
         [0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0],
         [0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1],
         [0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1],
         [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1]],

        [[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1],
         [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
         [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
         [0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
         [1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
         [0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0],
         [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]],

        [[0, 1, 1, 1, 0, 0, 

## Move Frequencies

In [7]:
def encodeMoves(moves2D, boardTensor):
    moves = torch.tensor(np.array(moves2D).flatten())

    # smoothing
    moves += boardTensor[2].flatten()

    # normalise
    moves = moves / moves.sum()
    return moves

encodeMoves(tempMoves, tensorfyBoard(tempBoard))

tensor([0.0057, 0.0086, 0.0096, 0.0086, 0.0067, 0.0077, 0.0086, 0.0057, 0.0057,
        0.0067, 0.0125, 0.0057, 0.0057, 0.0067, 0.0096, 0.0086, 0.0029, 0.0125,
        0.0096, 0.0105, 0.0077, 0.0096, 0.0086, 0.0086, 0.0105, 0.0057, 0.0105,
        0.0115, 0.0096, 0.0086, 0.0105, 0.0077, 0.0105, 0.0057, 0.0096, 0.0067,
        0.0077, 0.0077, 0.0125, 0.0048, 0.0067, 0.0086, 0.0096, 0.0096, 0.0077,
        0.0057, 0.0077, 0.0067, 0.0096, 0.0057, 0.0057, 0.0125, 0.0115, 0.0077,
        0.0057, 0.0038, 0.0096, 0.0096, 0.0057, 0.0125, 0.0067, 0.0105, 0.0067,
        0.0048, 0.0086, 0.0086, 0.0048, 0.0077, 0.0077, 0.0048, 0.0057, 0.0134,
        0.0086, 0.0125, 0.0086, 0.0182, 0.0067, 0.0144, 0.0057, 0.0067, 0.0086,
        0.0096, 0.0115, 0.0105, 0.0067, 0.0067, 0.0067, 0.0077, 0.0096, 0.0067,
        0.0067, 0.0096, 0.0019, 0.0125, 0.0048, 0.0125, 0.0096, 0.0057, 0.0057,
        0.0077, 0.0048, 0.0096, 0.0096, 0.0086, 0.0077, 0.0086, 0.0057, 0.0067,
        0.0096, 0.0115, 0.0115, 0.0115, 

## ...

In [8]:
def processDataSet(dataSet):
    boardTensors = []
    moveTensors = []

    for (boardString, data) in dataSet.items():
        boardTensor = tensorfyBoard(boardString)
        moveTensor = encodeMoves(data['moves'], boardTensor)

        boardTensors.append(boardTensor)
        moveTensors.append(moveTensor)

    boards = torch.stack(boardTensors)
    moves = torch.stack(moveTensors)

    return boards, moves

expertBoards, expertMoves = processDataSet(rawSelfPlayData)
print(expertBoards.shape, expertMoves.shape)
selfPlayBoards, selfPlayMoves = processDataSet(rawSelfPlayData)
print(selfPlayBoards.shape, selfPlayMoves.shape)

torch.Size([105501, 3, 11, 11]) torch.Size([105501, 121])
torch.Size([105501, 3, 11, 11]) torch.Size([105501, 121])


# Data Augmentation

The Hex board is symmetric. We can use this to augment our data. For each board state, we can generate 6 more board states by rotating the board by 60 degrees each time. This will give us 7 times more data to train on.

In [9]:
# TODO

# Training

In [10]:
model = PolicyModel(boardSize=11)

In [11]:
def trainModel(model, boards, moves, learningRate=1e-3, epochs=10):

    optimiser = optim.Adam(model.parameters(), lr=learningRate)
    criterion = torch.nn.KLDivLoss(reduction='batchmean')

    model.train()

    for epoch in range(epochs):
        optimiser.zero_grad()

        # forward
        logits = model(boards)
        logProbs = torch.nn.functional.log_softmax(logits, dim=1)

        # loss
        loss = criterion(logProbs, moves)

        # backpropagation
        loss.backward()
        optimiser.step()

        print(f'Epoch {epoch+1}: {loss.item()}')

    return model

# train on expert moves
print('EXPERT')
trainModel(model, expertBoards.float(), expertMoves, learningRate=1e-2)
# train on self-play moves
print('SELF-PLAY')
trainModel(model, selfPlayBoards.float(), selfPlayMoves, learningRate=1e-3)

Epoch 1: 0.7713713645935059
Epoch 2: 0.7710919976234436
Epoch 3: 0.7878260016441345
Epoch 4: 0.7708359956741333
Epoch 5: 0.7708039283752441
Epoch 6: 0.7707875967025757
Epoch 7: 0.7707788348197937
Epoch 8: 0.7707720994949341
Epoch 9: 0.770765483379364
Epoch 10: 0.7707569599151611
Epoch 1: 0.7707464098930359
Epoch 2: 0.7707305550575256
Epoch 3: 0.7707158327102661
Epoch 4: 0.7707018256187439
Epoch 5: 0.7706892490386963
Epoch 6: 0.770677387714386
Epoch 7: 0.7706663608551025
Epoch 8: 0.7706562280654907
Epoch 9: 0.7706465721130371
Epoch 10: 0.7706379890441895


PolicyModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (convp): Conv2d(64, 2, kernel_size=(1, 1), stride=(1, 1))
  (fcp): Linear(in_features=242, out_features=121, bias=True)
)

# Results

In [12]:
def infer(model, boardString):
    model.eval()
    with torch.no_grad():
        boardTensor = tensorfyBoard(boardString)
        logits = model(boardTensor.float().unsqueeze(0))
        probs = torch.nn.functional.softmax(logits, dim=1)

    return probs

infer(model, tempBoard)

tensor([[0.0081, 0.0084, 0.0084, 0.0086, 0.0082, 0.0085, 0.0079, 0.0085, 0.0078,
         0.0082, 0.0082, 0.0083, 0.0083, 0.0082, 0.0085, 0.0083, 0.0081, 0.0083,
         0.0085, 0.0085, 0.0082, 0.0080, 0.0079, 0.0083, 0.0082, 0.0084, 0.0081,
         0.0087, 0.0080, 0.0080, 0.0087, 0.0082, 0.0082, 0.0082, 0.0084, 0.0083,
         0.0084, 0.0079, 0.0080, 0.0086, 0.0080, 0.0080, 0.0082, 0.0086, 0.0083,
         0.0083, 0.0083, 0.0084, 0.0087, 0.0081, 0.0082, 0.0084, 0.0083, 0.0081,
         0.0083, 0.0084, 0.0079, 0.0081, 0.0086, 0.0079, 0.0084, 0.0083, 0.0084,
         0.0085, 0.0076, 0.0080, 0.0082, 0.0086, 0.0080, 0.0083, 0.0082, 0.0081,
         0.0085, 0.0084, 0.0081, 0.0081, 0.0084, 0.0083, 0.0082, 0.0083, 0.0085,
         0.0082, 0.0086, 0.0084, 0.0085, 0.0086, 0.0080, 0.0078, 0.0087, 0.0078,
         0.0084, 0.0079, 0.0084, 0.0081, 0.0084, 0.0085, 0.0082, 0.0086, 0.0081,
         0.0084, 0.0083, 0.0082, 0.0084, 0.0080, 0.0088, 0.0080, 0.0083, 0.0081,
         0.0082, 0.0086, 0.0

In [13]:
modelName = input()
if (modelName == ''):
    modelName = 'unnamed'
torch.save(model.state_dict(), f'./models/{modelName}_policy.pth')