### Building a Neural Net

In [661]:
import numpy as np

def ReLU(x):
    return x * (x > 0)

def dReLU(x):
    return 1. * (x > 0)

def sigmoid(x):
    return 1/(np.exp(-x)+1)    

def dsigmoid(x):
    return sigmoid(x)*(np.ones(x.shape[0])-sigmoid(x))
    
class NeuralNet(): 
    def __init__(self, layers=[], epoch=35, eta=0.08, func=sigmoid, weight_range=[-1,1]):
        self.layers = layers
        self.epoch = epoch
        self.eta = eta
        self.weight_range = weight_range
        self.func = func
        self.dfunc = dsigmoid if func == sigmoid else dReLU
        self.matrices = self.init_mat(layers)
        
    
    def init_mat(self,layers):
        mat_list = []
        for i in range(len(layers)-1):
            mat_list.append(np.random.uniform(self.weight_range[0],self.weight_range[1], (layers[i + 1], layers[i] + 1)))
        return mat_list

    def save_model(self, file_path):
        np.savez(file_path, *self.matrices)

    def load_model(self, file_path):
        data = np.load(file_path)
        self.matrices = [data[f'arr_{i}'] for i in range(len(data.files))]

    def forward(self, x):
        activations = [x]
        for M in self.matrices:
            x = np.append(x, 1) 
            x = self.func(np.dot(M, x))
            activations.append(x)
        return activations

    def backward_propagate(self, activations, y):
        errors = []
        deltas = []

        error = y - activations[-1]
        errors.append(error)

        delta = 2*error*self.dfunc(activations[-1])
        deltas.append(delta)

        for i in range(len(self.matrices)-1, 0, -1):
            error = np.dot(self.matrices[i].T, delta)[:-1]
            delta = 2*error*self.dfunc(activations[i])
            errors.append(error)
            deltas.append(delta)

        errors.reverse()
        deltas.reverse()

        return errors, deltas

    def update_weights(self, activations, deltas):
        for i in range(len(self.matrices)):
            delta = deltas[i].reshape(-1, 1)
            activation = np.append(activations[i], 1).reshape(1, -1)
            self.matrices[i] += self.eta * np.dot(delta, activation)

    def train(self, X, y,verbose=False, batch_size=0):
        for ep in range(self.epoch):
            if verbose:
                tenperc = self.epoch//10
                if ep%tenperc == 0:
                    print("Train percent:",ep/self.epoch*100)

            indices = [i for i in range(len(X))]
            np.random.shuffle(indices)
            if batch_size > 0:
                indices = indices[:batch_size]
            for i in indices:
                x = X[i]
                target = y[i]
                activations = self.forward(x)
                errors, deltas = self.backward_propagate(activations, target)
                self.update_weights(activations, deltas)

    def classify(self, x):
        activations = self.forward(x)
        return activations[-1]
  

#### Training and testing the neural net on a 2 bit adder dataset

In [520]:
nn = NeuralNet([2,2,2], epoch=500, eta=0.4, func=sigmoid, weight_range=[-1,1])
trainX = [[0,0],[0,1],[1,0],[1,1]]
trainY = [[0,0],[0,1],[0,1],[1,0]]
nn.train(trainX, trainY)

In [521]:
print("pred:",np.round(nn.classify([0,0])), "expected:",[0,0])
print("pred:",np.round(nn.classify([1,0])), "expected:",[0,1])
print("pred:",np.round(nn.classify([0,1])), "expected:",[0,1])
print("pred:",np.round(nn.classify([1,1])), "expected:",[1,0])

pred: [0. 0.] expected: [0, 0]
pred: [0. 1.] expected: [0, 1]
pred: [0. 1.] expected: [0, 1]
pred: [1. 0.] expected: [1, 0]


#### Training and testing the neural net on policy table

Parsing the data

In [656]:
from hexapawn import policy_table, Hexapawn
import ast

trainX = []
trainY = []

for x in policy_table:
    listX = ast.literal_eval(x)
    if listX[0] in policy_table[x]:
        for a in policy_table[x][listX[0]]:
            trainX.append((1/2)*(np.array(listX)+1))
            trainY.append((1/2)*(np.array(Hexapawn(listX).result(a)[1:])+1))

print(trainX)
print(trainY)

[array([1. , 0.5, 0.5, 0. , 1. , 0. , 0.5, 0.5, 1. , 0.5]), array([1. , 0.5, 0.5, 0.5, 1. , 0. , 1. , 0.5, 0.5, 0.5]), array([1. , 0.5, 0.5, 0.5, 1. , 0. , 1. , 0.5, 0.5, 0.5]), array([1. , 0. , 0.5, 0.5, 1. , 1. , 0. , 0.5, 1. , 0.5]), array([1. , 0. , 0.5, 0.5, 1. , 1. , 0. , 0.5, 1. , 0.5]), array([1. , 0. , 0.5, 0.5, 1. , 1. , 0. , 0.5, 1. , 0.5]), array([0. , 0. , 0.5, 0. , 1. , 1. , 0.5, 0.5, 1. , 0.5]), array([1. , 0. , 0.5, 0. , 1. , 0. , 0.5, 0.5, 1. , 1. ]), array([0. , 0.5, 0.5, 0. , 0. , 0. , 1. , 0.5, 0.5, 0.5]), array([0. , 0.5, 0.5, 0. , 0. , 0. , 1. , 0.5, 0.5, 0.5]), array([1. , 0.5, 0.5, 0.5, 0. , 1. , 0. , 0.5, 0.5, 0.5]), array([0. , 0.5, 0.5, 0. , 0. , 1. , 0.5, 0.5, 0.5, 0.5]), array([0. , 0.5, 0.5, 0. , 0. , 1. , 0.5, 0.5, 0.5, 0.5]), array([1. , 0. , 0.5, 0.5, 0. , 1. , 0. , 0.5, 0.5, 1. ]), array([1. , 0. , 0.5, 0.5, 0. , 1. , 0. , 0.5, 0.5, 1. ]), array([0. , 0. , 0.5, 0.5, 0. , 0. , 1. , 0.5, 0.5, 0.5]), array([0. , 0. , 0.5, 0.5, 0. , 0. , 1. , 0.5, 0.5, 0.5

Training and testing

In [664]:
nn = NeuralNet([10, 25, 9], epoch=20000, eta=0.001, func=sigmoid, weight_range=[-0.01, 0.01])

nn.train(trainX, trainY, verbose=True, batch_size=32)

optimalMoves = 0
badMoves = 0
illegalMoves = 0

for x,y in zip(trainX, trainY):
    pred = 2*nn.classify(x)-1
    cutoff = 0.3
    for j in range(len(pred)):
        if pred[j]<=-cutoff:
            pred[j] = -1
        elif pred[j] >= cutoff:
            pred[j] = 1
        else:
            pred[j] = 0

    tmpOpt = optimalMoves
    tmpBad = badMoves

    state = Hexapawn(list(2*x-1))

    for a in policy_table[str(list((2*x-1).astype(int)))][int(2*x[0]-1)]:
        optimal = np.array(state.result(a)[1:])
        if np.array_equal(optimal, pred):
            optimalMoves += 1
    if tmpOpt == optimalMoves:
        for a in state.actions:
            bad = np.array(state.result(a)[1:])
            if np.array_equal(bad, pred):
                badMoves += 1
    if tmpOpt == optimalMoves and tmpBad == badMoves:
        illegalMoves += 1
    
    # print("X:", 2*x-1)
    # print("Y:", 2*y-1)
    # print("Predicted:",pred)
    # print(np.array_equal(y,pred))

print("Optimal moves predicted:",optimalMoves)
print("Bad moves predicted:",badMoves)
print("Illegal moves predicted:",illegalMoves)

Train percent: 0.0
Train percent: 10.0
Train percent: 20.0
Train percent: 30.0
Train percent: 40.0
Train percent: 50.0
Train percent: 60.0
Train percent: 70.0
Train percent: 80.0
Train percent: 90.0
Optimal moves predicted: 8
Bad moves predicted: 0
Illegal moves predicted: 87


In [665]:
from datetime import datetime

now = datetime.now()

timeNow = now.strftime("%Y-%m-%d_%H-%M-%S")
nn.save_model("model-"+timeNow)

I wasn't able to find a good model other than having to predict at most 9 optimal moves. I found that the neural network was the best with only one hidden layer. I couldn't configure the parameters correctly so when having more layers it didn't give me that many optimal moves.