In [1]:
class Agent():
    def __init__(self, piece='X', mover='human', playbook=None):
        self.piece = piece
        movers = {'human':self.human_move, 'random':self.random_move, 
                  'menace':self.menace_move}
        updaters = {'human':self.dummy, 'random':self.dummy, 'menace':self.update_menace}
        self.mover = movers[mover]
        self.update = updaters[mover]
        self.moveHistory = {}
        self.human = mover=='human'
        if not playbook:
            self.playbook = {}
        else:
            self.playbook = playbook
    
    def dummy(self, win):
        pass
    
    def human_move(self, positions):
        move_num = sum([1 if x !='-' else 0 for x in positions])
        move = input(" " + str(move_num + 1))
        if move == 'Q':
            self.quit()
        else:
            return move
    
    def menace_move(self, positions):
        from random import choice
        tup_positions = tuple(positions)
        if tup_positions not in self.playbook:
            move = self.random_move(positions)
            return move
        else:
            move = choice(self.playbook[tup_positions])
            return move
    
    def update_learner(self, win):
        if not win:   #it was a tie
            for position in self.moveHistory:
                if position in self.playbook:
                    self.playbook[position].extend([self.moveHistory[position]])
                else:
                    temp = [self.moveHistory[position]]
                    not_filled = [i for i,x in enumerate(position) if x=='-']
                    temp.extend(not_filled[1:])
                    self.playbook[position] = temp
                    
        elif win[1] == self.piece: #it was a win, reinforce all the moves
            for position in self.moveHistory:
                if position in self.playbook:
                    self.playbook[position].extend(3*[self.moveHistory[position]])
                else:
                    temp = 2*[self.moveHistory[position]]
                    not_filled = [i for i,x in enumerate(position) if x=='-']
                    temp.extend(not_filled[1:])
                    self.playbook[position] = temp
                    
        elif win[1] != self.piece: #it was a loss, prune once
            for position in self.moveHistory:
                if position in self.playbook:
                    if self.moveHistory[position] in self.playbook[position]:
                        self.playbook[position].remove(self.moveHistory[position])
                        if not self.playbook[position]:
                            self.playbook.pop(position)
 
    def random_move(self, positions):
        from random import choice
        not_filled = [i for i,x in enumerate(positions) if x == '-']
        return choice(not_filled[1:])
    
    def quit(self):
        import sys
        print(self.piece, " quits!")
        sys.exit()


class Board():
    def __init__(self, turtle=True, size=200):
        self.positions = ['-']*10
        self.turtle = turtle
        if self.turtle:

            self.size = size
            self.squares = {1:[-size, size], 2:[0,size], 3:[size,size], 
                       4:[-size, 0], 5:[0,0], 6:[size,0],
                       7:[-size,-size], 8:[0,-size], 9:[size, -size]}
            
    def turtle_display(self):
        import turtle
        import turtle_board
        turtle.reset()
        turtle.speed(0)
        turtle_board.board(size=200)
        
        for i in range(1,10):
            if self.positions[i] == 'X':
                turtle_board.x(self.squares[i], self.size)
            elif self.positions[i] == 'O':
                turtle_board.o(self.squares[i], self.size/2)
    
    def display(self, humanAgent = True, turtle=True):
        from os import system
        from IPython.display import clear_output

        system('clear')
        clear_output()
        for i in range(3):
            print('\t'.join(self.positions[1+i*3:1+i*3+3]))
        if self.turtle:
            self.turtle_display()

    def update(self, move, piece):
        # check for clashes
        try:
            move = int(move)
            assert move >= 1 and move <= 9
        except:
            print("That was probably not a valid move, try again")
            return False
        if self.positions[move] != '-':
            print("That space is filled, you will have to submit another")
            return False
        else:
            self.positions[move] = piece
            return True
        
    def is_win(self):
        winners = ((1,2,3), (4,5,6), (7,8,9), (1,4,7), (2,5,8), (3,6,9), (1,5,9), (3,5,7))
        for w in winners:
            test3 = [self.positions[i] for i in w]
            if test3[1] != '-' and test3.count(test3[1]) == 3:
                return (True, test3[1])
        return False
       
        
class Game():
    def __init__(self, board, x, o, watcher=False):
        self.board = board
        self.playerX = x
        self.playerO = o
        self.human = x.human or o.human
        self.playerX.moveHistory = {}
        self.playerO.moveHistory = {}
        self.watcher = watcher    
        
    def play(self):
        import time
        turn = 1
        if self.watcher:
            time.sleep(1.5)
            self.board.display()
            
        while turn < 10:    
            if turn % 2 == 1:
                move = self.playerX.mover(self.board.positions)
                if self.board.update(move, self.playerX.piece):
                    move = int(move)
                    temp = self.board.positions[:]
                    temp[move] = '-'
                    self.playerX.moveHistory[tuple(temp)] = move
                else:
                    continue
            else:
                move = self.playerO.mover(self.board.positions)
                if self.board.update(move, self.playerO.piece):
                    move = int(move)
                    temp = self.board.positions[:]
                    temp[move] = '-'
                    self.playerO.moveHistory[tuple(temp)] = move
                else:
                    continue           
            
            self.win = self.board.is_win()
            if self.watcher:
                time.sleep(1.5)
                self.board.display()
            if self.win:
                break
            turn += 1
        
        if self.watcher:
            if not self.win:
                print("It was a tie")
            else:
                print(self.win[1], " wins!!")

        self.playerX.update(self.win)
        self.playerO.update(self.win)
        
        if self.watcher:
            time.sleep(1.5)
            

class Experiment():
    def __init__(self, X, O, trials, logname=None, watcher=False):
        self.trials = trials
        self.playerX = X
        self.playerO = O
        self.xwins = 0
        self.owins = 0
        self.ties = 0
        self.logname = logname
        self.watcher = watcher

    def logging(self):
        import os
        gp = self.xwins + self.owins + self.ties
        self.file.write(str(self.xwins/gp) + ',' + str(self.owins/gp) + ',' 
                + str(self.ties/gp) + ',' + str(len(self.playerX.playbook.keys())) 
                + ',' + str(len(self.playerO.playbook.keys())) + '\n')
        self.file.flush()
        os.fsync(self.file.fileno())
        
    def one_game(self):
        b = Board()
        g = Game(b, self.playerX, self.playerO, watcher=self.watcher)
        g.play()
        if not g.win:
            self.ties += 1
        elif g.win[1] == 'X':
            self.xwins += 1
        elif g.win[1] == 'O':
            self.owins += 1
        if self.watcher:
            print("X Wins:\t", self.xwins)
            print("O Wins:\t", self.owins) 
            print("Ties:\t", self.ties)
        
    def runExperiment(self):
        if self.logname:
            self.file = open(self.logname, 'a', buffering=1)
        for i in range(self.trials):
            self.one_game()
            if self.logname:
                self.logging()
        if self.logname:
            self.file.close()

        

## 0. Train X and O bots from scratch

![title](training.png)

In [2]:

x_menace = Agent(piece='X', mover='menace')
o_menace = Agent(piece='O', mover='menace')
E1 = Experiment(x_menace, o_menace, 100000, watcher=False, logname='tmp.txt')
E1.runExperiment()


## 1. Do more training for the X and O bots

In [None]:
E2 = Experiment(x_menace, o_menace, 100000, watcher=False, logname='tmp.txt')
E2.runExperiment()

## 2.  Play human X vs. trained O bot

In [None]:
me = Agent(piece='X', mover='human')
E3 = Experiment(me, o_menace, 5, watcher=True, logname='humanvstrained.txt')
E3.runExperiment()

## 3. Play human vs. beginner bot

In [None]:
me = Agent(piece='X', mover='human')
o_beginner = Agent(piece='O', mover='menace')
E4 = Experiment(me, o_beginner, 5, watcher=True, logname='humanvsbeg.txt')
E4.runExperiment()

## 4. Watch beginner X bot learning against a trained O bot

In [3]:
x_beginner = Agent(piece='X', mover='menace')
E5 = Experiment(x_beginner, o_menace, 50, watcher=True, logname='fromscratch.txt')
E5.runExperiment()

-	O	O
X	X	O
X	X	O
O  wins!!
X Wins:	 13
O Wins:	 23
Ties:	 14


In [3]:
len(o_menace.playbook)

967

In [8]:
p = tuple(['-','X','-','X','-','O','-','-','O','-'])
for i in range(1,10):
    print(i, '\t', o_menace.playbook[p].count(i))

KeyError: ('-', 'X', '-', 'X', '-', 'O', '-', '-', 'O', '-')

In [14]:
positions = ['-','-','-','-','X','O','X','-','-','-']
from random import choice
not_filled = [i for i,x in enumerate(positions) if x == '-']
choice(not_filled[1:])

1

In [21]:
choice([1,2,3,4,5,6,7,8,9])

1