# External Code Progress Bar

In [39]:
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

# PlayerType Enum


In [40]:
from enum import Enum

class PlayerType(Enum):
    X = 'X',
    O = 'O'

# State Class

In [41]:
class State:
    def __init__(self, boardString):
        self.boardString = boardString
    
    def getActions(self):
        actions = []

        for i,c in enumerate(self.boardString):
            if (c == '_'):
                actions.append(i)
        
        return actions

    
    def do(self, action, type):
        nextState = self.boardString
        nextState = nextState[:action] + type.value[0] + nextState[action+1:]
        return State(nextState)
    
    def toString(self):
        return self.boardString


In [42]:
state = "XO__XO__XO"
stateObj = State(state)

print(PlayerType.X.value[0])

print(stateObj.getActions())
print(stateObj.do(stateObj.getActions()[0], PlayerType.X ).toString())
print(stateObj.toString())

X
[2, 3, 6, 7]
XOX_XO__XO
XO__XO__XO


# Leaner Class

In [43]:
import re
import numpy as np

class Learner:
    def __init__(self, type, epsilon=0.1, lr=0.1):
        self.type = type
        self.states = self._generateStates()
        self.valTable = self._generateValTable(self.states)
        self.stateToIndex = { v:k for k,v in enumerate(self.states) }
        self.epsilon = epsilon
        self.lr = lr
        self.debugging = False
    
    def getAction(self, state):

        """
        Returns:
            action: Action index to take
            isGreedy: If greedy action (true) was taken or if random (false)
        """

        possibleActions = state.getActions()
        maxVal = 0
        maxAction = None

        for a in possibleActions:
            nextState = state.do(a, self.type)
            val = self.getVal(nextState)
            if self.debugging:
                print("Action " + str(a) + " Value: "+ str(val))

            if val > maxVal:
                maxVal = val
                maxAction = a
        
        if np.random.uniform() < self.epsilon:
            return possibleActions[np.random.randint(0, len(possibleActions))], False
        else:
            
            return maxAction, True
    
    def update(self, previousState, reward, currentState, over):
        inital_index = self._getIndex(previousState)
        resulting_index = self._getIndex(currentState)
        if over:
            self.valTable[resulting_index] = reward
        self.valTable[inital_index] += self.lr * (self.valTable[resulting_index] - self.valTable[inital_index])

    def getVal(self, state):
        index = self._getIndex(state)
        return self.valTable[index]

    def isWinning(self,state):

        rows = [0,0,0]
        cols = [0,0,0]
        topLDia = 0
        topRDia = 0

        for i,c in enumerate(state):
            col = i % 3
            row = int(i / 3)

            if c == 'X':
                rows[row] += 1
                cols[col] += 1

                if row == col:
                    topLDia += 1
                
                if (row == 2 - col) and (col == 2 - row):
                    topRDia += 1
            
            if c == 'O':
                rows[row] -= 1
                cols[col] -= 1

                if row == col:
                    topLDia -= 1
                
                if (row == 2 - col) and (col == 2 - row):
                    topRDia -= 1
        
        if 3 in rows or 3 in cols or topLDia == 3 or topRDia == 3:
            return 'X'
        elif -3 in rows or -3 in cols or topLDia == -3 or topRDia == -3:
            return 'O'
        else: 
            return None
    
    def _getIndex(self, state):
        stateString = state.toString()
        index = self.stateToIndex[stateString]
        return index

    def _generateValTable(self,states): 
        return { index:(1 if self.isWinning(state) else 0.5) for index, state in enumerate(states) }

    
    def _generateStates(self):
        init = ['X', 'O', '_']
        states = ['X', 'O', '_']

        for x in range(8):

            tempStates = []

            for s in states:
                for ele in init:

                    tempStates.append(ele + s)
            
            states = tempStates

        return states

In [44]:
player1 = Learner(PlayerType.X)

ste = State('__O__OXOO')
print(player1.getVal(ste))
print(player1.getAction(ste))
print(player1.getVal(ste))
print(player1.isWinning(ste.toString()))

1
(0, True)
1
O


# Player

In [45]:
class Player: 

    def __init__(self, playerType):
        self.type = playerType
    
    def getAction(self, state):

        possibleActions = state.getActions()

        action = input("Possible actions: " + ','.join(str(i) for i in possibleActions))

        while int(action) not in possibleActions:
            action = input("Possible actions: " + ','.join(str(i) for i in possibleActions))
        
        return int(action)


# Tic Tac Toe

In [50]:
class Game:

    def __init__(self, playerX, playerO):
        self.board = State("_________")
        self.currentPlayer = playerX
        self.nextPlayer = playerO

    
    def simulate(self,turns):

        for i in range(turns):
            printProgressBar(i, turns)
            self.episode()
    
    def printBoard(self):

        for i,c in enumerate(self.board.toString()):
            if i % 3 == 0:
                print("\n " + c, end="")
            else:
                print(" | " + c, end="")
    
    def _reset(self):
        self.board = State("_________")


    def episode(self, show=False):

        while (True):

            if show:
                self.printBoard()
            
            action, greedy = self.currentPlayer.getAction(self.board)
            previousState = State(self.board.toString())
            winner, currentState, over = self._step(action)
            reward = self._reward(winner)
            if greedy:
                self.currentPlayer.update(previousState, reward, currentState, over)
                self.nextPlayer.update(previousState, -reward, currentState, over)
            
            self._switchPlayers()

            if show:
                print("\n")
        
            if (self._isOver()):
                self._reset()
                break

        if show:
            self.printBoard()
        
    def _reward(self, winner):
        if self.currentPlayer.type.value[0] == winner:
            return 1
        elif self.nextPlayer.type.value[0] == winner:
            return -1
        else:
            return 0

    def _switchPlayers(self): 
        tempPlayer = self.currentPlayer
        self.currentPlayer = self.nextPlayer
        self.nextPlayer = tempPlayer       

    
    def _isOver(self):
        return (self._hasWon() or self._isDraw(), self._hasWon())
    
    def _isDraw(self):
        return not '_' in self.board.toString()
    
    def _step(self, action):

        over, _ = self._isOver()
        if not over:
            self.board = self.board.do(action, self.currentPlayer.type)
            over, winner = self._isOver()
            return winner, self.board, over

        return _, self.board, over
    
    def _hasWon(self):
        rows = [0,0,0]
        cols = [0,0,0]
        topLDia = 0
        topRDia = 0

        for i,c in enumerate(self.board.toString()):
            col = i % 3
            row = int(i / 3)

            if c == 'X':
                rows[row] += 1
                cols[col] += 1

                if row == col:
                    topLDia += 1
                
                if (row == 2 - col) and (col == 2 - row):
                    topRDia += 1
            
            if c == 'O':
                rows[row] -= 1
                cols[col] -= 1

                if row == col:
                    topLDia -= 1
                
                if (row == 2 - col) and (col == 2 - row):
                    topRDia -= 1
        
        if 3 in rows or 3 in cols or topLDia == 3 or topRDia == 3:
            return 'X'
        elif -3 in rows or -3 in cols or topLDia == -3 or topRDia == -3:
            return 'O'
        else: 
            return None


In [49]:
playerX = Learner(PlayerType.X)
playerO = Learner(PlayerType.O)

tictactoe = Game(playerX=playerX, playerO=playerO)
tictactoe.simulate(1000000)

 |----------------------------------------------------------------------------------------------------| 0.0% 

TypeError: 'tuple' object is not callable

In [None]:
realPlayer = Player(PlayerType.X)
playerO.epsilon = 0
playerO.debugging = True


realGame = Game(playerX=realPlayer, playerO=playerO)
realGame.game(show=True)




 _ | _ | _
 _ | _ | _
 _ | _ | _


 _ | _ | _
 _ | _ | _
 _ | _ | XAction 0 Value: 0.5
Action 1 Value: 0.5
Action 2 Value: 0.5
Action 3 Value: 0.5
Action 4 Value: 0.5
Action 5 Value: 0.5
Action 6 Value: 0.5
Action 7 Value: 0.5



 O | _ | _
 _ | _ | _
 _ | _ | X


 O | X | _
 _ | _ | _
 _ | _ | XAction 2 Value: 0.5
Action 3 Value: 0.9999999999999999
Action 4 Value: 0.5
Action 5 Value: 0.5
Action 6 Value: 0.9999999999999999
Action 7 Value: 0.5



 O | X | _
 O | _ | _
 _ | _ | X


 O | X | _
 O | _ | _
 X | _ | XAction 2 Value: 0.5
Action 4 Value: 0.9999671438188182
Action 5 Value: 0.996608884635755
Action 7 Value: 0.5



 O | X | _
 O | O | _
 X | _ | X


 O | X | _
 O | O | X
 X | _ | XAction 2 Value: 0.5
Action 7 Value: 0.5



 O | X | O
 O | O | X
 X | _ | X


 O | X | O
 O | O | X
 X | X | X