In [28]:
### IMPORTS

import math,sys
import numpy
import scipy,scipy.optimize,scipy.spatial,scipy.spatial.distance,scipy.stats
import json
import sqlite3
from collections import *
from enum import Enum
import random

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD

import theano,theano.tensor

In [29]:
NUM_TURNS=9
NUM_PLAYERS=2
SQUARES=9
SQUARE_VALUES=3
NUM_FEATURES = NUM_PLAYERS*SQUARES*SQUARE_VALUES
NUM_ACTIONS=SQUARES
CHANCE_TO_RANDOM_ACTION = 0.1
DISCOUNT_FACTOR = 0.9

In [None]:
class TrainingExample:
    def __init__(self,turn,features,action,playerIndex):
        self.turn = turn
        self.features = features
        self.action = action
        self.playerIndex = playerIndex
        
def EncodeState(currentPlayer, board):
    features = [0]*(NUM_FEATURES)
    playerShift = (currentPlayer)*SQUARES*SQUARE_VALUES
    for x in range(0,SQUARES):
        squareValue = board[x]+1;
        features[playerShift + (x*SQUARE_VALUES) + squareValue] = 1.0
    return features

def MakeMove(board, player, move):
    board[move] = player

def GetWinner(board):
    for player in range(1,3):
        if board[0]==player and board[1]==player and board[2]==player:
            return player
        if board[3]==player and board[4]==player and board[5]==player:
            return player
        if board[6]==player and board[7]==player and board[8]==player:
            return player
        if board[0]==player and board[3]==player and board[6]==player:
            return player
        if board[1]==player and board[4]==player and board[7]==player:
            return player
        if board[2]==player and board[5]==player and board[8]==player:
            return player
    return -1
    
def PlayMatch(models, X_input, Y_input, debug):
    trainingExamples = []
    board = [-1]*9
    for turn in range(0,9):
        currentPlayer = turn%2
        if models is None or models[currentPlayer] is None or numpy.random.ranf() < CHANCE_TO_RANDOM_ACTION:
            # Play in a random square
            move = -1
            while move == -1:
                move = numpy.random.randint(9)
                if board[move]>=0:
                    move = -1
        else:
            # Query the model
            model = models[currentPlayer]
            baseFeatures = EncodeState(currentPlayer, board)
            features = numpy.array([baseFeatures,])
            actionValues = model.predict(features, batch_size=1, verbose=0)[0]
            # Choose the action with the highest expected value
            if debug:
                print(currentPlayer,board,actionValues)
            move = -1
            while move == -1:
                move = numpy.argmax(actionValues)
                if board[move]>=0:
                    actionValues[move] = -1
                    move = -1
        if debug:
            print("player",currentPlayer,"makes move",move)
        trainingExamples.append(TrainingExample(turn, EncodeState(currentPlayer, board), move, currentPlayer))
        MakeMove(board,currentPlayer,move)
        winner = GetWinner(board)
        if winner > -1:
            break
    if debug:
        print('winner is',winner)
    playerValues = []
    for pIndex in range(0,NUM_PLAYERS):
        if winner == -1:
            value = 8.0
        elif winner == pIndex:
            value = 32.0
        else:
            value = 2.0
        playerValues.append(value)
    for t in trainingExamples:
        discountedValue = playerValues[t.playerIndex] * (DISCOUNT_FACTOR ** (turn - t.turn))
        X_input.append(t.features)
        labels = [0]*SQUARES
        labels[t.action] = discountedValue
        Y_input.append(labels)
    return winner

def PlayMatches(X_input, Y_input, model, numMatches):
    random.seed(1)
    numpy.random.seed(1)
    for x in range(0,numMatches):
        PlayMatch([model, model], X_input, Y_input, False)
    return X_input,Y_input

def TestModel(model):
    random.seed(1)
    numpy.random.seed(1)
    winCount=0
    loseCount=0
    tieCount=0
    for x in range(0,1000):
        winner = PlayMatch([model, None], [], [], False)
        if winner == -1:
            tieCount = tieCount+1
        elif winner == 0:
            winCount = winCount+1
        elif winner == 1:
            loseCount = loseCount+1
        else:
            print(winner)
    for x in range(0,1000):
        winner = PlayMatch([None, model], [], [], False)
        if winner == -1:
            tieCount = tieCount+1
        elif winner == 0:
            loseCount = loseCount+1
        elif winner == 1:
            winCount = winCount+1
        else:
            print(winner)
    return (winCount,loseCount,tieCount)

PlayMatch(None, [], [], True)


player 0 makes move 5
player 1 makes move 3
player 0 makes move 1
player 1 makes move 0
player 0 makes move 4
player 1 makes move 2
player 0 makes move 7
player 1 makes move 6
winner is 1


1

In [None]:
model = Sequential()
model.add(Dense(128, input_dim=NUM_FEATURES, init='he_normal', activation='relu'))
model.add(Dense(64, init='he_normal', activation='relu'))
model.add(Dense(32, init='he_normal', activation='relu'))
model.add(Dense(NUM_ACTIONS, init='he_normal', activation='linear'))

#def mean_squared_error(y_true, y_pred):
#    return theano.tensor.mean(theano.tensor.square(y_pred - y_true), axis=-1)
def non_zero_mean_squared_error(y_true, y_pred):
    total = theano.tensor.sum(theano.tensor.square(abs(theano.tensor.sgn(y_true))*(y_pred - y_true)), axis=-1)
    count = theano.tensor.sum(abs(theano.tensor.sgn(y_true)))
    return total/count

#sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=non_zero_mean_squared_error, optimizer='sgd')

def DumpModel(model):
    print("LAYERS:",len(model.layers))
    for layer in model.layers:
        print("NUM WEIGHT ARRAYS",len(layer.get_weights()))
        print(layer.get_weights()[1])
        weights = layer.get_weights()[0] # list of numpy arrays
        for x in range(0,NUM_FEATURES):
            print(weights[x])

X_train = []
Y_train = []
for simulation in range(0,1000):
    simulationModel = None
    if simulation>0:
        simulationModel = model
    PlayMatches(X_train, Y_train, simulationModel, 200)
    print(len(X_train))
    X_train = X_train[-640000:]
    Y_train = Y_train[-640000:]
    history = model.fit(numpy.array(X_train), numpy.array(Y_train), nb_epoch=40, shuffle=True, batch_size=1024, verbose=0)
    model = history.model
    #DumpModel(model)
    #PlayMatch(model,[],[],True)
    results = TestModel(model)
    print('MODEL TEST:',results)
    sys.stdout.flush()
#print(dir(history)).
#print(history.history)
#score = model.evaluate(X_test, y_test, batch_size=16)
#print(score)

#model = history.model



1735
MODEL TEST: (265, 152, 1583)
3483
MODEL TEST: (269, 150, 1581)
5231
MODEL TEST: (270, 167, 1563)
6987
MODEL TEST: (262, 163, 1575)
8743
MODEL TEST: (289, 169, 1542)
10476
MODEL TEST: (302, 184, 1514)
12210
MODEL TEST: (290, 187, 1523)
13946
MODEL TEST: (294, 198, 1508)
15674
MODEL TEST: (302, 211, 1487)
17414
MODEL TEST: (347, 196, 1457)
19061
MODEL TEST: (318, 199, 1483)
20692
MODEL TEST: (329, 153, 1518)
22079
MODEL TEST: (335, 169, 1496)
23866
MODEL TEST: (370, 161, 1469)
25521
MODEL TEST: (327, 166, 1507)
27164
MODEL TEST: (334, 176, 1490)
28818
MODEL TEST: (373, 193, 1434)
30558
MODEL TEST: (382, 187, 1431)
31955
MODEL TEST: (353, 157, 1490)
33660
MODEL TEST: (344, 176, 1480)
35387
MODEL TEST: (339, 177, 1484)
37118
MODEL TEST: (351, 175, 1474)
38512
MODEL TEST: (347, 168, 1485)
40249
MODEL TEST: (351, 162, 1487)
41999
MODEL TEST: (364, 161, 1475)
43773
MODEL TEST: (343, 160, 1497)
45534
MODEL TEST: (348, 185, 1467)
47267
MODEL TEST: (344, 182, 1474)
48999
MODEL TEST: (340, 2