In [1]:
import numpy as np
import random

# Each move is evaluated as to how good it was given the current grid
class Game:

    def __init__(self,grid):
        self.training_history = []
        self.grid = grid
        self.bin_state = 0b00000000000000000000000000

    def check_win(self, grid):
        # Finds the positions of the Xs or Os in the grid
        resX = [np.where(grid == -1), np.where(np.transpose(grid) == -1)]
        resO = [np.where(grid == 1), np.where(np.transpose(grid) == 1)]

        # Gets the coordinates of the places occupied by X or O
        zipX=list(zip(np.where(grid == -1)[0],np.where(grid == -1)[1]))
        zipO=list(zip(np.where(grid == 1)[0],np.where(grid == 1)[1]))

        # Check if the X positions result in a win
        if self._check_row_col(resX):
            return "X WIN"
        # Check if the O positions result in a win
        elif self._check_row_col(resO):
            return "O WIN"
        # Check the diagonals
        elif self._check_diagonals(zipX):
            return "X WIN"
        elif self._check_diagonals(zipO):
            return "O WIN"
        # Check if the board is full
        elif np.where(grid == 0)[0].size == 0:
            # If there are no lines of 3 for XorO and the grid contains no more empty spaces
            return "DRAW"


    # Checks that the positions give a win irrespective of whether it is Os or Xs
    def _check_row_col(self, res):
        for g in res:
            if any(sublist in np.array_str(np.array(g[0])) for sublist in ('0 0 0', '1 1 1','2 2 2')):
                if '0 1 2' in np.array_str(np.array(g[1])):
                    return True

    def _check_diagonals(self, res):
        # the diagonals
        if (0,0) in res and (1,1) in res and (2,2) in res:
            return True
        if (0,2) in res and (1,1) in res and (2,0) in res:
            return True

    # This gets the current grid
    def get_grid(self):
        return self.grid

In [2]:
import random
import copy
import numpy as np


class Agent:

    def __init__(self):
        pass

    # This returns the location of the position on the grid to place the X or O
    def set_location(self,grid,method, model):
        # Find available positions
        # select a location to place the xoro
        if method == "random":
            select = _get_random_position(grid)
        if method == "neural":
            select = _get_neural_position(grid, model)
        return select

# Static private methods
# Returns the next position of the xoro randomly from the available positions
def _get_random_position(grid):
    # Find positions the contain a 0 (blank)
    available = _get_available_positions(grid)
    return random.choice(available)

# This is where the Neural network goes.
def _get_neural_position(grid, model):
    availableMoves = _get_available_positions(grid)
    maxValue = 0
    bestMove = availableMoves[0]
    for availableMove in availableMoves:
        # get a copy of a board
        boardCopy = copy.deepcopy(grid)
        value = model.predict(boardCopy, 0)
        if value > maxValue:
            maxValue = value
            bestMove = availableMove
    selectedMove = bestMove
    return selectedMove

def _get_available_positions(grid):
    a = np.where(grid == 0)
    return list(zip(a[0], a[1]))

In [3]:
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
import numpy as np


class TicTacToeModel:

    def __init__(self, numberOfInputs, numberOfOutputs, epochs, batchSize):
        self.epochs = epochs

        self.batchSize = batchSize
        self.numberOfInputs = numberOfInputs
        self.numberOfOutputs = numberOfOutputs
        self.model = Sequential()
        self.model.add(Dense(64, activation='relu', input_shape=(numberOfInputs, )))
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dense(numberOfOutputs, activation='softmax'))
        self.model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

    def train(self, dataset):
        input = []
        output = []
        for data in dataset:
            input.append(data[1])
            output.append(data[0])

        X = np.array(input).reshape((-1, self.numberOfInputs))
        # X = np.array(input).reshape((len(input), -1))
        y =to_categorical(output, num_classes=3)
        # Train and test data split this gives 80%
        boundary = int(0.8 * len(X))
        X_train = X[:boundary]
        X_test = X[boundary:]
        y_train = y[:boundary]
        y_test = y[boundary:]
        self.model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=self.epochs, batch_size=self.batchSize)
        # self.model.save('OXO_model')
        return self.model


    # This is to use a new model that has been loaded
    def set_model(self, model):
        self.model = model

    def predict(self, data, index):
        return self.model.predict(np.array(data).reshape(-1, self.numberOfInputs))[0][index]



Using TensorFlow backend.


In [4]:
import copy
from Agent import Agent
from Game import Game
import numpy as np
from Human import Human
from model import TicTacToeModel
import tensorflow as tf

history = []

# Main game loop
def run_game(player1, player2, loaded_model, iterations):
    grid = np.full((3, 3), 0)

    # create the game
    g = Game(grid)
    agent1 = Agent()
    agent2 = Agent()
    human = Human()

    score = 0
    moves = []
    output = 0
    # iterations = 1000
    it = 0
    X_wins = 0
    O_wins = 0
    Draws = 0
    while it < iterations:
        # The moves for each game
        for i in ["X", "O"]:
            while True:
                if i == "X":
                    loc = agent1.set_location(grid,player1,loaded_model)
                    # loc = human.set_location(grid)
                    # Make sure the move is to a blank space before exiting the loop
                    if g.make_move(loc,-1):
                        break
                if i == "O":
                    loc = agent2.set_location(grid,player2,loaded_model)
                    # loc = human.set_location(grid)
                    # Make sure the move is to a blank space before exiting the loop
                    if g.make_move(loc,1):
                        break
            # print(grid)
            res = g.check_win(grid)
            last_state = grid.tolist()
            moves.append(last_state)

            # Goes here if there is a result
            if res:
                print("Game: "+str(it))
                print(res)
                # X wins
                if res[:1] == 'X':
                    X_wins+=1
                    grid = np.full((3, 3), 0)
                    g = Game(grid)
                    output=-1
                # O wins
                elif res[:1] == 'O':
                    O_wins+=1
                    grid = np.full((3, 3), 0)
                    g = Game(grid)
                    output=1
                # Draw
                else:
                    Draws+=1
                    grid = np.full((3, 3), 0)
                    g = Game(grid)
                    output=0
                it += 1
                # If the game is won by less than nine moves then append the last board state to make the array 9 long.
                # Not sure how to make Keras deal with uneven data sizes
                while len(moves) < 9:
                    moves.append(last_state)
                for m in moves:
                    history.append((output,copy.deepcopy(m)))
                moves = []
                break
    return history,X_wins, O_wins, Draws

In [6]:
from RunGame import run_game
from model import TicTacToeModel


print("--- Summary ---")
history,x,o,d=run_game(player1="random", player2="random", loaded_model=None, iterations=100)
print("X Wins = "+str(x))
print("O Wins = "+str(o))
print("Draws = "+str(d))
# Train the network using the results from the random games
ticTacToeModel = TicTacToeModel(9, 3, 100, 32)
model = ticTacToeModel.train(history)
model.save("OXO_model")
ticTacToeModel.set_model(model)
_, x, o, d = run_game(player1="neural", player2="random", loaded_model=ticTacToeModel, iterations=100)
print("After Learning (Neural = X vs Random = O):")
print("X Wins = "+str(x))
print("O Wins = "+str(o))
print("Draws = "+str(d))

--- Summary ---
Game: 0
X WIN
Game: 1
X WIN
Game: 2
X WIN
Game: 3
O WIN
Game: 4
X WIN
Game: 5
X WIN
Game: 6
X WIN
Game: 7
O WIN
Game: 8
X WIN
Game: 9
X WIN
Game: 10
X WIN
Game: 11
X WIN
Game: 12
O WIN
Game: 13
DRAW
Game: 14
X WIN
Game: 15
O WIN
Game: 16
X WIN
Game: 17
X WIN
Game: 18
O WIN
Game: 19
X WIN
Game: 20
X WIN
Game: 21
X WIN
Game: 22
O WIN
Game: 23
X WIN
Game: 24
O WIN
Game: 25
O WIN
Game: 26
X WIN
Game: 27
X WIN
Game: 28
X WIN
Game: 29
O WIN
Game: 30
O WIN
Game: 31
X WIN
Game: 32
X WIN
Game: 33
O WIN
Game: 34
DRAW
Game: 35
X WIN
Game: 36
X WIN
Game: 37
O WIN
Game: 38
X WIN
Game: 39
X WIN
Game: 40
O WIN
Game: 41
X WIN
Game: 42
DRAW
Game: 43
O WIN
Game: 44
X WIN
Game: 45
X WIN
Game: 46
X WIN
Game: 47
O WIN
Game: 48
X WIN
Game: 49
O WIN
Game: 50
X WIN
Game: 51
O WIN
Game: 52
O WIN
Game: 53
X WIN
Game: 54
O WIN
Game: 55
DRAW
Game: 56
X WIN
Game: 57
X WIN
Game: 58
X WIN
Game: 59
DRAW
Game: 60
X WIN
Game: 61
O WIN
Game: 62
X WIN
Game: 63
DRAW
Game: 64
O WIN
Game: 65
X WIN
Game: 66
X

In [7]:
import tensorflow as tf
from RunGame import run_game
from model import TicTacToeModel

ticTacToeModel = TicTacToeModel(9, 3, 100, 32)

# Get the model from the saved file
loaded_model = tf.keras.models.load_model("OXO_model")

# Set the model to use in the game to be the one just loaded
ticTacToeModel.set_model(loaded_model)
_ x, o, d = run_game(player1="neural", player2="random", loaded_model=ticTacToeModel, iterations=100)

print("After Learning (Neural = X vs Random = O):")
print("X Wins = "+str(x))
print("O Wins = "+str(o))
print("Draws = "+str(d))

Game: 0
X WIN
Game: 1
X WIN
Game: 2
X WIN
Game: 3
X WIN
Game: 4
X WIN
Game: 5
X WIN
Game: 6
X WIN
Game: 7
X WIN
Game: 8
X WIN
Game: 9
O WIN
Game: 10
O WIN
Game: 11
X WIN
Game: 12
X WIN
Game: 13
X WIN
Game: 14
X WIN
Game: 15
X WIN
Game: 16
X WIN
Game: 17
X WIN
Game: 18
X WIN
Game: 19
X WIN
Game: 20
X WIN
Game: 21
DRAW
Game: 22
X WIN
Game: 23
X WIN
Game: 24
X WIN
Game: 25
O WIN
Game: 26
X WIN
Game: 27
X WIN
Game: 28
O WIN
Game: 29
X WIN
Game: 30
X WIN
Game: 31
X WIN
Game: 32
X WIN
Game: 33
X WIN
Game: 34
X WIN
Game: 35
O WIN
Game: 36
X WIN
Game: 37
X WIN
Game: 38
X WIN
Game: 39
X WIN
Game: 40
O WIN
Game: 41
X WIN
Game: 42
X WIN
Game: 43
O WIN
Game: 44
X WIN
Game: 45
O WIN
Game: 46
X WIN
Game: 47
X WIN
Game: 48
X WIN
Game: 49
O WIN
Game: 50
X WIN
Game: 51
X WIN
Game: 52
X WIN
Game: 53
X WIN
Game: 54
O WIN
Game: 55
X WIN
Game: 56
O WIN
Game: 57
X WIN
Game: 58
X WIN
Game: 59
X WIN
Game: 60
X WIN
Game: 61
X WIN
Game: 62
X WIN
Game: 63
O WIN
Game: 64
O WIN
Game: 65
X WIN
Game: 66
X WIN
Game: 