In [84]:
### IMPORTS

import math,sys
import numpy
import scipy,scipy.optimize,scipy.spatial,scipy.spatial.distance,scipy.stats
import json
import sqlite3
from collections import *
from enum import Enum
import random

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD

import theano,theano.tensor

In [85]:
NUM_TURNS=9
NUM_PLAYERS=2
SQUARES=9
SQUARE_VALUES=3
NUM_FEATURES = NUM_PLAYERS*SQUARES*SQUARE_VALUES
NUM_ACTIONS=SQUARES
CHANCE_TO_RANDOM_ACTION = 0.1
DISCOUNT_FACTOR = 0.9

In [None]:
class TrainingExample:
    def __init__(self,turn,features,action,playerIndex):
        self.turn = turn
        self.features = features
        self.action = action
        self.playerIndex = playerIndex
        
def EncodeState(currentPlayer, board):
    features = [0]*(NUM_FEATURES)
    playerShift = (currentPlayer)*SQUARES*SQUARE_VALUES
    for x in range(0,SQUARES):
        squareValue = board[x]+1;
        features[playerShift + (x*SQUARE_VALUES) + squareValue] = 1.0
    return features

def MakeMove(board, player, move):
    board[move] = player

def GetWinner(board):
    for player in range(1,3):
        if board[0]==player and board[1]==player and board[2]==player:
            return player
        if board[3]==player and board[4]==player and board[5]==player:
            return player
        if board[6]==player and board[7]==player and board[8]==player:
            return player
        if board[0]==player and board[3]==player and board[6]==player:
            return player
        if board[1]==player and board[4]==player and board[7]==player:
            return player
        if board[2]==player and board[5]==player and board[8]==player:
            return player
    return -1
    
def PlayMatch(models, X_input, Y_input, debug):
    trainingExamples = []
    board = [-1]*9
    for turn in range(0,9):
        currentPlayer = turn%2
        encodedState = EncodeState(currentPlayer, board)
        if models is None or models[currentPlayer] is None or numpy.random.ranf() < CHANCE_TO_RANDOM_ACTION:
            # Play in a random square
            move = -1
            while move == -1:
                move = numpy.random.randint(9)
                if board[move]>=0:
                    move = -1
        else:
            # Query the model
            model = models[currentPlayer]
            features = numpy.array([encodedState,])
            actionValues = model.predict(features, batch_size=1, verbose=0)[0]
            # Choose the action with the highest expected value
            if debug:
                print(currentPlayer,board,actionValues)
            move = -1000000000
            while move == -1000000000:
                move = numpy.argmax(actionValues)
                if board[move]>=0:
                    actionValues[move] = -1000000000
                    move = -1000000000
        if debug:
            print("player",currentPlayer,"makes move",move)
        trainingExamples.append(TrainingExample(turn, encodedState, move, currentPlayer))
        MakeMove(board,currentPlayer,move)
        winner = GetWinner(board)
        if winner > -1:
            break
    if debug:
        print('winner is',winner)
    playerValues = []
    for pIndex in range(0,NUM_PLAYERS):
        if winner == -1:
            value = 8.0
        elif winner == pIndex:
            value = 32.0
        else:
            value = 2.0
        playerValues.append(value)
    for t in trainingExamples:
        discountedValue = playerValues[t.playerIndex] * (DISCOUNT_FACTOR ** (turn - t.turn))
        X_input.append(t.features)
        labels = [0]*SQUARES
        labels[t.action] = discountedValue
        Y_input.append(labels)
    return winner

def PlayMatches(X_input, Y_input, model, numMatches):
    random.seed(1)
    numpy.random.seed(1)
    for x in range(0,numMatches):
        PlayMatch([model, model], X_input, Y_input, False)
    return X_input,Y_input

def TestModel(model):
    random.seed(1)
    numpy.random.seed(1)
    winCount=0
    loseCount=0
    tieCount=0
    for x in range(0,1000):
        winner = PlayMatch([model, None], [], [], False)
        if winner == -1:
            tieCount = tieCount+1
        elif winner == 0:
            winCount = winCount+1
        elif winner == 1:
            loseCount = loseCount+1
        else:
            print(winner)
    for x in range(0,1000):
        winner = PlayMatch([None, model], [], [], False)
        if winner == -1:
            tieCount = tieCount+1
        elif winner == 0:
            loseCount = loseCount+1
        elif winner == 1:
            winCount = winCount+1
        else:
            print(winner)
    return (winCount,loseCount,tieCount)

PlayMatch(None, [], [], True)


player 0 makes move 1
player 1 makes move 0
player 0 makes move 2
player 1 makes move 8
player 0 makes move 6
player 1 makes move 3
player 0 makes move 5
player 1 makes move 7
player 0 makes move 4
winner is -1


-1

In [None]:
model = Sequential()
model.add(Dense(256, input_dim=NUM_FEATURES, init='he_normal', activation='relu'))
#model.add(Dropout(0.1))
model.add(Dense(128, init='he_normal', activation='relu'))
#model.add(Dropout(0.1))
model.add(Dense(64, init='he_normal', activation='relu'))
#model.add(Dropout(0.1))
model.add(Dense(NUM_ACTIONS, init='he_normal', activation='linear'))

#def mean_squared_error(y_true, y_pred):
#    return theano.tensor.mean(theano.tensor.square(y_pred - y_true), axis=-1)
def non_zero_mean_squared_error(y_true, y_pred):
    total = theano.tensor.sum(theano.tensor.square(abs(theano.tensor.sgn(y_true))*(y_pred - y_true)), axis=-1)
    count = theano.tensor.sum(abs(theano.tensor.sgn(y_true)))
    return total/count

#sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=non_zero_mean_squared_error, optimizer='sgd')

def DumpModel(model):
    print("LAYERS:",len(model.layers))
    for layer in model.layers:
        print("NUM WEIGHT ARRAYS",len(layer.get_weights()))
        print(layer.get_weights()[1])
        weights = layer.get_weights()[0] # list of numpy arrays
        for x in range(0,NUM_FEATURES):
            print(weights[x])

X_train = []
Y_train = []
for simulation in range(0,1000):
    simulationModel = None
    if simulation>0:
        simulationModel = model
    PlayMatches(X_train, Y_train, simulationModel, 200)
    print(len(X_train))
    X_train = X_train[-640000:]
    Y_train = Y_train[-640000:]
    history = model.fit(numpy.array(X_train), numpy.array(Y_train), nb_epoch=50, shuffle=True, batch_size=1024, verbose=0)
    model = history.model
    #DumpModel(model)
    #PlayMatch(model,[],[],True)
    print('Finished epoch',simulation)
    sys.stdout.flush()
    results = TestModel(model)
    print('MODEL TEST:',results)
    sys.stdout.flush()
#print(dir(history)).
#print(history.history)
#score = model.evaluate(X_test, y_test, batch_size=16)
#print(score)

#model = history.model



1735
Finished epoch 0
MODEL TEST: (294, 204, 1502)
3369
Finished epoch 1
MODEL TEST: (297, 224, 1479)
5141
Finished epoch 2
MODEL TEST: (452, 311, 1237)
6914
Finished epoch 3
MODEL TEST: (463, 322, 1215)
8694
Finished epoch 4
MODEL TEST: (441, 325, 1234)
10460
Finished epoch 5
MODEL TEST: (415, 294, 1291)
12216
Finished epoch 6
MODEL TEST: (399, 237, 1364)
13978
Finished epoch 7
MODEL TEST: (413, 250, 1337)
15400
Finished epoch 8
MODEL TEST: (387, 301, 1312)
17162
Finished epoch 9
MODEL TEST: (341, 304, 1355)
18820
Finished epoch 10
MODEL TEST: (294, 190, 1516)
20476
Finished epoch 11
MODEL TEST: (318, 318, 1364)
22121
Finished epoch 12
MODEL TEST: (335, 349, 1316)
23893
Finished epoch 13
MODEL TEST: (362, 388, 1250)
25669
Finished epoch 14
MODEL TEST: (357, 399, 1244)
27447
Finished epoch 15
MODEL TEST: (305, 321, 1374)
29196
Finished epoch 16
MODEL TEST: (324, 318, 1358)
