# Python Learning

Using the Boost.Python library, I compiled my tic-tac-toe `Board` class as a library accessable to python 3.7. 

### To Do:
- Added winner variable to TicTacToe class because it's messy to have to keep calling whoWon()
- Develop DeepQAgent class


In [None]:
import sys
sys.path.append("../lib")

import numpy as np
import tensorflow as tf
import Board

from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
class TicTacToe():
    def __init__(self, player1, player2, p1_symbol=1, p2_symbol=-1, p1_exporation=0.8, p2_exporation=0.8):  
        # define two players
        self.p1_symbol = p1_symbol
        self.p1_type = player1
        self.p1_exporation = p1_exporation
        self.p1_name = self.p1_type + str(self.p1_symbol)
        player1 = globals()[self.p1_type]
        self.player1 = player1(symbol=self.p1_symbol, name=self.p1_name, exploration=self.p1_exporation)
        
        self.p2_symbol = p2_symbol
        self.p2_type = player2
        self.p2_exporation = p2_exporation
        self.p2_name = self.p2_type + str(self.p2_symbol)
        player2 = globals()[self.p2_type]
        self.player2 = player2(symbol=self.p2_symbol, name=self.p2_name, exploration=self.p2_exporation)
        
        self.turnPlayer = self.player1
                
        # turn couter
        self.moveCounter = 1
        
        # initialize c++ game
        self.game = Board.Board(int(self.p1_symbol), int(self.p2_symbol))
        
    def reinit(self):
        self.moveCounter = 1
        self.turnPlayer = self.player1
        self.game = Board.Board(int(self.p1_symbol), int(self.p2_symbol))

    def printMoveMap(self):
        print("----------------")
        print("|  0 |  1 |  2 |")
        print("----------------")
        print("|  3 |  4 |  5 |")
        print("----------------")
        print("|  6 |  7 |  8 |")
        print("----------------")
        
    def printGame(self):
        gameBoard = self.game.getBoard()
        print(gameBoard)     
        
    def play_pvp(self):
        self.printMoveMap()
        
        while not self.game.isBoardFull() and self.game.whoWon() == 0:
            print("Turn: ", self.moveCounter, "Player: ", self.game.getTurnPlayer())
            self.printGame()
            
            move = self.turnPlayer.getMove(self.game)
            if self.game.addMove(move):
                # successfully added move, so change turn player
                if self.moveCounter % 2 == 1:
                    self.turnPlayer = self.player2
                else:
                    self.turnPlayer = self.player1
                    
                self.moveCounter += 1
        
        if self.game.whoWon() != 0:
            if self.player1.symbol == self.game.whoWon():
                print("Player", self.player1.name, "(", self.game.whoWon(), ")", "Won!")
            else:
                print("Player", self.player2.name, "(", self.game.whoWon(), ")", "Won!")
        elif self.game.isBoardFull():
            print("Stalemate...")
        else:
            print("Crazy Error: Uncaught stopping condition!")
        
        print("Final Board:")
        self.printGame()
        
    def train(self, ngames, epochs=10):
        # flush out results (recording and useage)
        results = {'p1won': 0, 'p2won': 0, 'stalemate': 0, 'numturns': np.zeros((ngames,))}
        
        for game in tqdm(range(ngames)):
            while not self.game.isBoardFull() and self.game.whoWon() == 0:
                move = self.turnPlayer.getMove_train(self.game, epochs)
                if self.game.addMove(move):
                    if self.moveCounter % 2 == 1:
                        self.turnPlayer = self.player2
                    else:
                        self.turnPlayer = self.player1

                    self.moveCounter += 1
            
            results['numturns'][game] = self.moveCounter
            self.reinit()
            
        self.player1.saveModel()
        self.player2.saveModel()
        
        # print results

In [None]:
class Human():
    def __init__(self, symbol, exploration, name):
        self.symbol = symbol
        self.name = name
        self.exploration = exploration
        
    def getMove_train(self, state, epochs):
        return self.getMove(state)
    
    def getMove(self, state):
        validMoves = state.getValidMoves()
        keepTryingInputs = True
        while keepTryingInputs:
            move = int(input('Choose move: '))
            keepTryingInputs = (move not in validMoves)
        
        return move

In [None]:
class Rand():
    def __init__(self, symbol, exploration, name):
        self.symbol = symbol
        self.exploration = exploration
        self.name = name
        
    def getMove_train(self, state, epochs):
        return self.getMove(state)
    
    def getMove(self, state):
        randMove = int(np.random.choice(state.getValidMoves(), 1))
        return randMove

In [None]:
class DeepQAgent():
    def __init__(self, symbol, exploration, name):
        self.symbol = symbol
        self.name = name
        self.exploration = exploration
        
        self.prevBoard = np.zeros((3,3))
        
        self.model = self.loadModel()
        
    def loadModel(self):
        modelPath = Path(self.name+'.h5')
        if modelPath.is_file():
            model = tf.keras.models.load_model(modelPath)
            print('Model loaded:', self.name+'.h5')
        else:
            model = self.buildModel()
        
        return model
    
    def buildModel(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), padding="same", activation='relu', input_shape=(3,3,1)))
        model.add(tf.keras.layers.Conv2D(filters=8, kernel_size=(3,3), padding="same", activation='relu'))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(18, activation='relu'))
        model.add(tf.keras.layers.Dense(18, activation='relu'))
        model.add(tf.keras.layers.Dense(18, activation='linear'))
        model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
                
        return model
    
    def saveModel(self):
        modelPath = Path(self.name+'.h5')
        self.model.save(modelPath)
        
    def getMove_train(self, state, epochs=10):
        winner = state.whoWon() # pl_symbol, p2_symbol, or 0
        if state.isBoardFull() and winner == 0: # (0 could mean not done too)
            winner = 'stalemate'
        
        prevValPredicted = self.model.predict(self.prevBoard.reshape(1,3,3,1))
        reward = self.calcReward(winner)
        
        if winner == 0: # still playing
            currValuePredicted = self.model.predict(state.getBoard().reshape(1,3,3,1))
        else:
            currValuePredicted = 0
            
        # estimate Q value
        alpha = 0.5 # time discount
        target = np.array(prevValPredicted + alpha*(reward + currValuePredicted - prevValPredicted))
        
        # train
        self.model.fit(self.prevBoard.reshape(1,3,3,1), target, epochs=epochs, verbose=0)
        
        # increment state
        self.prevBoard = state.getBoard()
        
        return self.getMove(state)
    
    def calcReward(self, winner):
        reward = -1
        if winner == self.symbol: # you/we won
            reward = 1
        elif winner == 0: # not done
            reward = 0
        elif winner is 'stalemate': # over, nobody won
            reward = 0.5
        else: # lost
            reward = -1
            
        return reward
     
    def getMove(self, state):
        validMoves = state.getValidMoves()
        probBestMove = self.model.predict(state.getBoard().reshape(1,3,3,1))
        
        # I want the index [0:8] of the max, valid move in the original array
        maxValidMove = probBestMove[0,validMoves].max()
        move = np.where(probBestMove[0,:] == maxValidMove)
        move = int(move[0][0])
        
        return move

In [None]:
game = TicTacToe('DeepQAgent', 'DeepQAgent')
game.train(1000, epochs=10)
# game.play_pvp()

In [None]:
game = TicTacToe('Human', 'DeepQAgent')
game.play_pvp()