In [1]:
#!/usr/bin/env python

from Coach import Coach
from games.tictactoe.TicTacToeGame import TicTacToeGame as Game
from games.tictactoe.mxnet.NNet import NNetWrapper as nn
from utils import *
import numpy
import GA
from random import shuffle
import math
import os

train_examples_file_name = "checkpoint.examples"

# These are the paths to where SageMaker mounts interesting things in your container.

prefix = '/opt/ml/'
channel_name='training'
model_path = os.path.join(prefix, 'model')
param_path = os.path.join(prefix, 'io/config/hyperparameters.json')
input_path = output_path = os.path.join(prefix + 'io/data', channel_name)

# Read in any hyperparameters that the user passed with the training job
if os.path.isfile(param_path):
    with open(param_path, 'r') as tc:
        args = dotdict(json.load(tc))

args = dotdict({
    'numIters': 2,
    'numEps': 20,
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 4,
    'cpuct': 1,

    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
    'numItersForTrainExamplesHistory': 7,
})

In [2]:
sol_per_pop = 6
num_parents = 3
num_generations = 20

"""
Creating the initial population.
    Generate population from scratch if there is no checkpoint,
    or load population from file.
"""
#
input_model_files = [ [model_path, file] for file in os.listdir(model_path) ]
input_model_files.sort()
game = Game()

In [3]:
from games.tictactoe.mxnet.TicTacToeNNet import TicTacToeNNet as onnet
args2 = dotdict({
    'lr': 0.001,
    'dropout': 0.3,
    'epochs': 1,
    'batch_size': 64,
    'cuda': False,
    'num_channels': 512,
})
z = onnet(game, args2)
from mxnet import nd
z.predict(nd.zeros((1,1,3,3)))

(
 [[0.28732005 0.88008714 1.0076691  0.7825768  0.31865382 1.0440212
   0.3939152  1.1920686  1.2976027  1.2683628 ]]
 <NDArray 1x10 @cpu(0)>, 
 [[-0.7818425]]
 <NDArray 1x1 @cpu(0)>)

In [4]:
new_population = []
if len(input_model_files) == 0:
    for i in range(0,sol_per_pop):
        new_population.append(nn(game))
else:
    print("Checkpoint File found. Read it.")
    sol_per_pop = len(input_model_files)
    for load_model_file in input_model_files:
        print(os.path.join(load_model_file[0], load_model_file[1]))
        nnet = nn(game)
        nnet.load_checkpoint(load_model_file[0], load_model_file[1])
        new_population.append(nnet)

In [5]:
"""
Creating Coach class used to generate Training Example
    Load Train Example History if exist
"""
master = Coach(Game(), args)
examples_file_path = os.path.join(input_path, train_examples_file_name)
if os.path.isfile(examples_file_path) :
    print(os.path.join(examples_file_path))
    master.loadTrainExamples(examples_file_path)
alpha_index = 0
"""
Start Genetic Algorithm
"""
ancestors = list(range(0,sol_per_pop))

In [6]:
from games.tictactoe.TicTacToeLogic import Board
Board(3)[0][0][0].asscalar()==0

True

In [7]:
board = game.getInitBoard()
print(board)
print(board.dtype)


[[[[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]]]
<NDArray 1x1x3x3 @cpu(0)>
<class 'numpy.float32'>


In [8]:
Ps,v = z.predict(board)
valids = game.getValidMoves(board, 1)
Ps = Ps*valids  
import numpy as np
print(np.sum(Ps.asnumpy()))
from mxnet import nd
print(nd.sum(Ps))
for a in range(game.getActionSize()):
    print(a)

7.2039146

[7.2039146]
<NDArray 1 @cpu(0)>
0
1
2
3
4
5
6
7
8
9


In [9]:
def flipud(x):
    return nd.flip(data=x, axis=0)

def fliplr(x):
    return nd.flip(data=x, axis=1)

def rot90(x, k):
    k = k%4
    if k == 0:
        return x
    if k == 1:
        y = flipud(nd.transpose(x,axes=(0,1,2)))
    if k == 2:
        y = flipud(fliplr(x))
    if k == 3:
        y = nd.transpose(flipud(x),axes=(0,1,2))
    return y

pi = nd.array([0.,0.125,0.16666667,0.08333334,0.08333334,0.125,0.08333334,0.16666667,0.16666667,0.])
#pi_board = nd.random.uniform(shape=(3,3))
#pi_numpy = pi_board.asnumpy()
#k = 3
#print(pi_board)
#print("NDArray")
#rnd = rot90(pi_board,k)
#print(rnd.asnumpy())
#print(rnd.reshape((9)))
#print("Numpy")
#rnu = np.rot90(pi_numpy,k)
#print(rnu)
#print(rnu.ravel())

#nd.concat(rnd.reshape((9)), pi[-1], dim=0)

def getSymmetries(board, pi):
    # mirror, rotational
    assert(len(pi) == 3**2+1)  # 1 for pass
    pi_board = nd.reshape(pi[:-1], (1, 3, 3))
    
    l = []

    for i in range(1, 5):
        for j in [True, False]:
            newB = rot90(board, i)
            newPi = rot90(pi_board, i)
            if j:
                newB = fliplr(newB)
                newPi = fliplr(newPi)
            l += [(newB, nd.concat(newPi.reshape((9)), pi[-1], dim=0))]
    return l
    
newB = nd.random.uniform(shape=(1,3,3))
fliplr(newB)

board = nd.zeros((1,1,3,3))
sym = getSymmetries(board[0], pi)
sym[0][1].dtype

numpy.float32

In [10]:
print(board)
nd.transpose(board,axes=(0,1,2,3))


[[[[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]]]
<NDArray 1x1x3x3 @cpu(0)>



[[[[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]]]
<NDArray 1x1x3x3 @cpu(0)>

In [None]:
alpha_padawan = new_population[alpha_index]
train_examples = master.generate(alpha_padawan)


[5. 4. 3. 3. 1. 3. 1. 1. 3. 0.]
<NDArray 10 @cpu(0)>

[5. 4. 3. 3. 1. 3. 1. 1. 3. 0.]
<NDArray 10 @cpu(0)>

[6. 5. 3. 0. 3. 4. 2. 1. 3. 0.]
<NDArray 10 @cpu(0)>

[6. 5. 3. 0. 3. 4. 2. 1. 3. 0.]
<NDArray 10 @cpu(0)>

[6. 5. 4. 0. 0. 4. 3. 1. 4. 0.]
<NDArray 10 @cpu(0)>

[6. 5. 4. 0. 0. 4. 3. 1. 4. 0.]
<NDArray 10 @cpu(0)>

[18.  2.  2.  0.  0.  2.  0.  1.  2.  0.]
<NDArray 10 @cpu(0)>

[18.  2.  2.  0.  0.  2.  0.  1.  2.  0.]
<NDArray 10 @cpu(0)>

[25.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
<NDArray 10 @cpu(0)>

[25.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
<NDArray 10 @cpu(0)>

[5. 4. 3. 3. 1. 3. 1. 1. 3. 0.]
<NDArray 10 @cpu(0)>

[5. 4. 3. 3. 1. 3. 1. 1. 3. 0.]
<NDArray 10 @cpu(0)>

[5. 4. 3. 3. 2. 3. 2. 0. 3. 0.]
<NDArray 10 @cpu(0)>

[5. 4. 3. 3. 2. 3. 2. 0. 3. 0.]
<NDArray 10 @cpu(0)>

[6. 5. 3. 0. 3. 4. 2. 0. 4. 0.]
<NDArray 10 @cpu(0)>

[6. 5. 3. 0. 3. 4. 2. 0. 4. 0.]
<NDArray 10 @cpu(0)>

[4. 4. 5. 0. 4. 4. 0. 0. 5. 0.]
<NDArray 10 @cpu(0)>

[4. 4. 5. 0. 4. 4. 0. 0. 5. 0.]
<NDArray 

In [None]:
from mxnet import nd, gpu, gluon, init, autograd
input_boards, target_pis, target_vs = list(zip(*train_examples))
dataset_train = gluon.data.dataset.ArrayDataset(input_boards, target_pis, target_vs)
data_loader = gluon.data.DataLoader(dataset_train,batch_size=64,shuffle=True,num_workers=4)

In [None]:
for input_board, target_pi, target_v in data_loader:
    print(input_board.shape)
    pi,v = z.predict(input_board)
        
    print("Input Board Dtype: ", input_board.dtype)
    print("Output Pis Dtype: ", pi.dtype)
    print("Output Vs Dtype: ", target_pi.dtype)
    print("Target Pis Dtype: ", pi.dtype)
    print("Target Vs Dtype: ", target_v.dtype)
    
    print(target_pi.shape, " = ", pi.shape)
    print(target_v.shape, " = ", v.shape)

    v_loss = z.v_loss(target_v,v) 
    pi_loss = z.pi_loss(target_pi,pi)
    loss = pi_loss + v_loss
    print(loss)
    break

In [None]:
z.pi_loss(nd.zeros((3,3)),nd.ones((3,3)))

In [None]:
master.train(new_population, train_examples)

In [None]:
fitness = GA.cal_pop_fitness(new_population, args)

In [None]:
3**(1./1)

In [None]:
nnet_parents, parents_weights, indices = GA.select_mating_pool(new_population, fitness, num_parents)

In [None]:
offspring_crossover = GA.crossover(parents_weights, offspring_size=sol_per_pop-num_parents)