In [None]:
#!/usr/bin/env python

from Coach import Coach
from games.tictactoe.TicTacToeGame import TicTacToeGame as Game
from games.tictactoe.mxnet.NNet import NNetWrapper as nn
from utils import *
import numpy
import GA
from random import shuffle
import math
import os

train_examples_file_name = "checkpoint.examples"

# These are the paths to where SageMaker mounts interesting things in your container.

prefix = '/opt/ml/'
channel_name='training'
model_path = os.path.join(prefix, 'model')
param_path = os.path.join(prefix, 'io/config/hyperparameters.json')
input_path = output_path = os.path.join(prefix + 'io/data', channel_name)

# Read in any hyperparameters that the user passed with the training job
if os.path.isfile(param_path):
    with open(param_path, 'r') as tc:
        args = dotdict(json.load(tc))

args = dotdict({
    'numIters': 2,
    'numEps': 20,
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 4,
    'cpuct': 1,

    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
    'numItersForTrainExamplesHistory': 7,
})

In [None]:
sol_per_pop = 6
num_parents = 3
num_generations = 20

"""
Creating the initial population.
    Generate population from scratch if there is no checkpoint,
    or load population from file.
"""
#
input_model_files = [ [model_path, file] for file in os.listdir(model_path) ]
input_model_files.sort()
game = Game()

In [None]:
from games.tictactoe.mxnet.TicTacToeNNet import TicTacToeNNet as onnet
args2 = dotdict({
    'lr': 0.001,
    'dropout': 0.3,
    'epochs': 1,
    'batch_size': 64,
    'cuda': False,
    'num_channels': 512,
})
z = onnet(game, args2)
from mxnet import nd
z.predict(nd.zeros((1,3,3)))

In [None]:
new_population = []
if len(input_model_files) == 0:
    for i in range(0,sol_per_pop):
        new_population.append(nn(game))
else:
    print("Checkpoint File found. Read it.")
    sol_per_pop = len(input_model_files)
    for load_model_file in input_model_files:
        print(os.path.join(load_model_file[0], load_model_file[1]))
        nnet = nn(game)
        nnet.load_checkpoint(load_model_file[0], load_model_file[1])
        new_population.append(nnet)

In [None]:
"""
Creating Coach class used to generate Training Example
    Load Train Example History if exist
"""
master = Coach(Game(), args)
examples_file_path = os.path.join(input_path, train_examples_file_name)
if os.path.isfile(examples_file_path) :
    print(os.path.join(examples_file_path))
    master.loadTrainExamples(examples_file_path)
alpha_index = 0
"""
Start Genetic Algorithm
"""
ancestors = list(range(0,sol_per_pop))

In [None]:
from games.tictactoe.TicTacToeLogic import Board
Board(3)[0][0][0].asscalar()==0

In [None]:
board = game.getInitBoard()
print(board)

In [None]:
Ps,v = z.predict(board)
valids = game.getValidMoves(board, 1)
Ps = Ps*valids  
import numpy as np
print(np.sum(Ps.asnumpy()))
from mxnet import nd
print(nd.sum(Ps))
for a in range(game.getActionSize()):
    print(a)

In [None]:
def flipud(x):
    return nd.flip(data=x, axis=0)

def fliplr(x):
    return nd.flip(data=x, axis=1)

def rot90(x, k):
    k = k%4
    if k == 0:
        return x
    if k == 1:
        y = flipud(nd.transpose(x))
    if k == 2:
        y = flipud(fliplr(x))
    if k == 3:
        y = nd.transpose(flipud(x))
    return y

pi = nd.array([0.,0.125,0.16666667,0.08333334,0.08333334,0.125,0.08333334,0.16666667,0.16666667,0.])
pi_board = nd.random.uniform(shape=(3,3))
pi_numpy = pi_board.asnumpy()
k = 11
print(pi_board)
print("NDArray")
rnd = rot90(pi_board,k)
print(rnd.asnumpy())
print(rnd.reshape((9)))
print("Numpy")
rnu = np.rot90(pi_numpy,k)
print(rnu)
print(rnu.ravel())

nd.concat(rnd.reshape((9)), pi[-1], dim=0)

In [None]:
alpha_padawan = new_population[alpha_index]
train_examples = master.generate(alpha_padawan)