## Train an agent to play tic tac toe

### Strategies
1. Play at random
2. Minimax
3. Imitation learning


In [1]:
import itertools
import random

In [2]:
COMPUTER = False
HUMAN = True


class Entry():
    Empty = '-'
    X = 'X'
    O = 'O'
    

class TicTacToe():
    """
    define the game class
    COMPUTER always plays an 'O'
    HUMAN always plays a 'X'
    """
    
    def __init__(self):
        """
        turn = False -> computer's turn, True-> human turn
        """
        self.state = ['-'] * 9
        self.turn = random.choice([COMPUTER, HUMAN])
        self.game_ended = False
        self.winner = Entry.Empty
     
    def __str__(self):
        x = str(self.state[0:3]) + '\n' + \
            str(self.state[3:6]) + '\n' + \
            str(self.state[6:9])
        return (f'board state: \n{x}\n' +
                f'player turn: {self.turn}\n' +
                f'game ended: {self.game_ended}\n' +
                f'winner: {self.winner}\n')
    
    def play(self):
        print('play a turn')
        if self.game_ended:
            print('Game Over')
            print(self)
            return
        avail_positions = []
        for i, x in enumerate(self.state):
            if x == Entry.Empty: avail_positions.append(i)
        if len(avail_positions) == 0:
            self.game_ended = True
            self.winner = 'DRAW'
            print('board is full')
            return
        if self.turn == COMPUTER:
            print('COMPUTER to play')
            print(f'available positions: {avail_positions}')
            play_id = random.choice(avail_positions) 
            print(play_id)
            self.state[play_id] = Entry.O
        else: # HUMAN
            print('HUMAN to play')
            self.user_input_prompt()
            valid_input = False
            while not valid_input:
                inp = input('where do you wanna play [0-9]?')
                if str.isdigit(inp): valid_input = True
                if valid_input:
                    pos = int(inp)
                    if pos not in avail_positions:
                        valid_input = False
                if not valid_input:
                    print('invalid input')
                    print(f'please enter a number from the list: {avail_positions}')
            # got a valid position to play
            self.state[pos] = Entry.X
        
        self.evaluate()
        self.turn = not self.turn
        print(self)
        
    def evaluate(self):
        """
        evaluate if there is a winner
        if game ended, update `game_ended` and `winner`
        """
        win = False
        # check rows
        rows = [self.state[k:k+3] for k in range(0, 9, 3)]
        cols = [[self.state[k], self.state[k+3], self.state[k+6]]
                 for k in range(0, 3, 1)]
        diags = [[self.state[0], self.state[4], self.state[8]],
                 [self.state[2], self.state[4], self.state[6]]]
        arrs = [rows, cols, diags]
        for arr in itertools.chain(*arrs):
            if (arr[0] != Entry.Empty
                    and arr[0] == arr[1]
                    and arr[0] == arr[2]):
                win = True
                print(f'winning row: {arr}')
                break
        if win:
            print('we have a winner')
            if self.turn: self.winner = "HUMAN"
            else: self.winner = "COMPUTER"
            self.game_ended = True
        
    def user_input_prompt(self):
        """
        shows prompt human user to get position to play
        """
        prompt = ''
        for i, x in enumerate(self.state):
            prompt += f'[{i}| {x}]'
            if (i+1) % 3 == 0: prompt += '\n'
        
        print(f'board state: \n{prompt}\n')
    


In [4]:
game = TicTacToe()
print(game)
while not game.game_ended:
    game.play()

print('done')

board state: 
['-', '-', '-']
['-', '-', '-']
['-', '-', '-']
player turn: False
game ended: False
winner: -

play a turn
COMPUTER to play
available positions: [0, 1, 2, 3, 4, 5, 6, 7, 8]
0
board state: 
['O', '-', '-']
['-', '-', '-']
['-', '-', '-']
player turn: True
game ended: False
winner: -

play a turn
HUMAN to play
board state: 
[0| O][1| -][2| -]
[3| -][4| -][5| -]
[6| -][7| -][8| -]


where do you wanna play [0-9]?8
board state: 
['O', '-', '-']
['-', '-', '-']
['-', '-', 'X']
player turn: False
game ended: False
winner: -

play a turn
COMPUTER to play
available positions: [1, 2, 3, 4, 5, 6, 7]
3
board state: 
['O', '-', '-']
['O', '-', '-']
['-', '-', 'X']
player turn: True
game ended: False
winner: -

play a turn
HUMAN to play
board state: 
[0| O][1| -][2| -]
[3| O][4| -][5| -]
[6| -][7| -][8| X]


where do you wanna play [0-9]?6
board state: 
['O', '-', '-']
['O', '-', '-']
['X', '-', 'X']
player turn: False
game ended: False
winner: -

play a turn
COMPUTER to play
availab