# Learning TicTacToe

In [None]:
from copy import deepcopy
import time
import sys
import matplotlib.pyplot as plt
import numpy as np
import random
import itertools
from IPython import display

## Board Class

In [None]:
class Board(object):
    """ A TicTacToe Board. """
    def __init__(self, choose_random = True, win_pos = np.nan):
        self.fields = np.array(9*[0])
        self.num_moves = sum(abs(self.fields))
        if choose_random:
            np.random.seed(1)
            # contains all choose(9,3) == 84 possible combinations of three numbers between 0 and 8
            all_combinations_of_three = list(itertools.combinations(range(9), 3))
            #randomly choose 8 of those
            self.winning_positions = list(all_combinations_of_three[i] for i in np.random.randint(0, 83, 8))
            print self.winning_positions
        else:
            self.winning_positions = win_pos
            
    def reset(self):
        self.fields = np.array(9*[0])
        self.num_moves = 0
        

## Game Class

In [None]:

class Game(object):
    def __init__(self):
        self.prob = []
        self.game_state = []
        self.action = []
        self.final_game_state = []
        self.result = 0
        
    def reset(self):
        self.prob = []
        self.game_state = []
        self.action = []
        self.final_game_state = []
        self.result = 0
    
    def __str__(self):
        return self.prob
        


## Table Class

In [None]:
class Table(object):
    def __init__(self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.player1_won = 0
        self.player2_won = 0
        self.credits = []
        self.output = True
        self.board = Board()
        self.game_is_on = True
        self.num_games = 0
    
    def reset(self):
        self.board.reset()
        self.game_is_on = True

    def play_a_game(self, game_id):
        self.reset()
        while self.game_is_on:
            if (self.board.num_moves + game_id)%2 == 0: #player1's turn
                a = self.player1.action(1 * self.board.fields, self.board.winning_positions)
                if self.output:
                    print(self.player1.name + " decided to play " + str(a))
                if (a<9 and 0<=a ):
                    if (self.board.fields[a] == 0):
                        self.board.fields[a] = 1
                    else:
                        raise NameError('This field is already occupied!!')
                else:
                    raise NameError('The fields are numbered from 0 to 8!')
                    
            else: #player 2's turn
                a = self.player2.action(-1 * self.board.fields, self.board.winning_positions)
                if self.output:
                    print(self.player2.name + " decided to play " + str(a))
                if (a<9 and 0<=a):
                    if (self.board.fields[a] == 0):
                        self.board.fields[a] = -1 
                    else:    
                        raise NameError('This field is already occupied!!')
                else:
                    raise NameError('The fields are numbered from 0 to 8!')
            self.board.num_moves += 1
            self.evaluate_board()
            
    
    def end_game(self, result):
        if self.num_games == 0:
            # self.credits = np.array([self.player1_won,self.player2_won])
            self.credits = np.array([self.player1_won,self.player2_won])
        else:
            self.credits = np.vstack((self.credits, [self.player1_won,self.player2_won]))     
        self.num_games += 1
        self.game_is_on = False
        self.player1.end_game(self.board.fields, result, self.num_games)
        self.player2.end_game(-self.board.fields, -result, self.num_games) 
        
        
    def evaluate_board(self):
        # if less than 5 fields are occupied, keep playing.
        if self.board.num_moves < 5:
            return 0
        else:
            #print self.board.num_moves
            for i in range(7):
                cur_win_pos = self.board.winning_positions[i]
                field_evaluated = sum( [self.board.fields[j] for j in cur_win_pos ])
                if (self.game_is_on and field_evaluated == 3):
                    self.player1_won += 1
                    if self.output:
                        print self.player1.name + " won! Winning position: " + str(cur_win_pos)
                    self.end_game(1) #player1 won!
                                        
                elif (self.game_is_on and field_evaluated == -3):
                    self.player2_won += 1
                    if self.output:
                        print self.player2.name + " won! Winning position: " + str(cur_win_pos)
                    self.end_game(-1) #player2 won!
                    
            if (self.game_is_on and (self.board.num_moves == 9)):
                if self.output: 
                    print "draw!"
                self.end_game(0) #draw!

  

## Strategy Class

In [None]:
class Strategy(object):
    def __init__(self):
        self.table = {}

## Player Class

In [None]:
class Player(object):
    """ A Player. """
    def __init__(self, name):
        self.name = name
    
    def reset(self):
        self.game.reset()
         
    def action(self, state, winningpos):
        pass
        
    def end_game(self, state, result, num_games):
        pass
    
    def update_strategy(self, num_games):
        pass
    

## History

In [None]:
class History(object):
    def __init__(self):
        self.game = []
        
    def add_game(self, g):
        self.game.append(deepcopy(g)) 
        
    def remove_almost_all_games(self, nn):
        self.game = self.game[(-nn):(-1)]

## Strategic Player Class

In [None]:
class StrategicPlayer(Player):
    """ A player with different strategies. """
    #has an object strategy, which is a look-up table and defines all actions
    def __init__(self, name, cre=0):
        self.name = name
        self.game = Game()
        self.history = History()
        self.strategy = Strategy()    
        self.rangeeight = np.array([0,1,2,3,4,5,6,7,8]) 
        
    def hash_state(self,state): #this is not really hashing. 
        return np.array_str(state)
    
    def dehash_state(self, gs):
        c = gs.replace('[', '').replace(']','')
        d = np.fromstring(c, sep = " ")
        return d        
    
    def action(self, state, winningpos):
        #print self.strategy.table
        hstate = self.hash_state(state)
        if hstate in self.strategy.table:
            # print 'I have seen the state before!'
            pr = self.strategy.table[hstate] #vector of probabilities
            #print self.strategy.table
        else:
            pr = np.zeros(9)
            emptyfields = np.array(np.where(state == 0)).flatten()
            pr[emptyfields] = 1/float(len(emptyfields))
            self.strategy.table[hstate] = pr

        act = np.random.choice(self.rangeeight, p = pr)
        
        #print self.strategy.table
        self.game.prob.append(pr[act])
        self.game.game_state.append(state)
        self.game.action.append(act)
        return act
 

    def end_game(self, state, result, num_games):
        self.game.final_game_state = state
        self.game.result = result
        self.history.add_game(self.game)
        self.update_strategy(num_games)
        self.game = Game()
        # do sth with the history
    

    def update_strategy(self, num_games):
        pass
        


## Explore-Exploit Player Class

In [None]:
class ExploreExploitPlayer(StrategicPlayer):
    """ A player with different strategies. """
    #has an object strategy, which is a look-up table and defines all actions
    def __init__(self, name, cre=0):
        self.name = name
        self.game = Game()
        self.history = History()
        self.strategy = Strategy()
        self.rangeeight = np.array([0,1,2,3,4,5,6,7,8]) 
    
    
    
    def update_strategy(self, num_games):
        if ((num_games % 1000 == 0) and ((game_id < 1002) and (game_id > 0))):
            print '...updating strategy...'
    
            cum_gain_action_taken = dict(zip(self.strategy.table.keys(),
                                             [np.zeros(9) for k in range(len(self.strategy.table.keys()))]))
            #this creates a dictionary with the same keys in table but with zero entries. (could be simplified!?)
            
            
            # go through all games in history
            for g in self.history.game:
                # go through all game states in game g
                for decisions_game_i in range(len(g.game_state)):
                    # get hashed game state
                    a = self.hash_state(g.game_state[decisions_game_i])              
                    # compute cum_gain_action_taken
                    
            #print cum_gain_action_taken        
            
            #update strategy to always use the best action
            for game_state_hashed in self.strategy.table.keys():
                d = self.dehash_state(game_state_hashed)
                # change....
            #print self.strategy.table
                

## Random Player Class

In [None]:
class RandomPlayer(Player):
    """ Random, a player with a predefined (completely random) strategy. """
    def __init__(self):
        self.name = "Rando M."
        
    def action(self, state, winningpos):
        emptyfields = np.array(np.where(state == 0)).flatten()
        if len(emptyfields) == 1:
            ind = 0
        else:
            ind = np.random.randint(0, sum(state == 0)-1)    
        ret = emptyfields[ind]
        return ret 


## Value-by-Hand Player Class

In [None]:
class ValuePlayer(Player):
    """ Random, a player with a predefined (completely random) strategy. """
    def __init__(self):
        self.name = "Val Hand"
        
    def provide_value(self, state, future_state, winningpos):
        # change this
        return 0
            
    
    def action(self, state, winningpos):
        emptyfields = np.array(np.where(state == 0)).flatten()
        a = np.array([-np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -np.inf])
        for i in emptyfields:
            future_state = np.array(state)
            future_state[i] = 1
            a[i] = self.provide_value(state, future_state, winningpos)
        #print state
        #print a
        return np.argmax(a)


## AlwaysLeft Player Class

In [None]:
class AlwaysLeftPlayer(Player):
    """ A player who always puts a mark on the first free field. """
    def __init__(self):
        self.name = "Lefto"
            
    def action(self, state, winningpos):
        emptyfields = np.array(np.where(state == 0)).flatten()
        ret = emptyfields[0]
        return ret 


## Human Player Class

In [None]:
class HumanPlayer(Player):
    def __init__(self, name="Human"):
        self.name = name
        
    
    def action(self, state, winningpos):
        response = "a"
        print("The current position is: " + str(state) + ".")
        s = "where do you want to make the next cross?"
        while type(response) != int:
            response = input(self.name + ", " + s + " (number between 0 and 8): ")
        return response
    
        

## Play TicTacToe

In [None]:
%matplotlib


random.seed(2)

do_plot = False

n_games = 2000


# create and add players
p1 = HumanPlayer("Human1")
p2 = RandomPlayer()
p3 = AlwaysLeftPlayer()
p6 = ValuePlayer()
p7 = ExploreExploitPlayer("DoubleExponential")

players = [p7, p6, p3]



num_players = len(players)
results = np.transpose(np.matrix(np.zeros(num_players)))*np.zeros(num_players)
for i in range(num_players):
    results[i,i] = np.nan




for j in range(num_players):
    for i in range(j):
        if (i != j): # not necessary
            p1t = players[i]
            p2t = players[j]
            
            Credits = np.zeros((2,n_games))
            tableETH = Table(p1t, p2t)
            tableETH.output = False

            if(do_plot):
                #plt.close("all")
                plt.axis([0, n_games, -10, 1000])
                lines = [plt.plot([], [])[0] for _ in range(2)]
                plt.show()

            print '\n'    
            print "Player", p1t.name, " is playing against ", p2t.name, "..."
            
            for game_id in range(n_games):
                
                #if(game_id == 1998):
                #   tableETH.output = True 
                
                #play game
                tableETH.play_a_game(game_id)
                if tableETH.output:
                    print "\rYou have finished %d games" % game_id,
                    print "Board: " + str(tableETH.board.fields)
                    sys.stdout.flush()
                

                if(do_plot):
                    # Update plot every 20 games
                    if ((game_id > 0) and (game_id % 20 == 0)):
                        Credits = np.transpose(tableETH.credits)
                        if (np.min(Credits) < plt.gca().get_ylim()[0]):
                            plt.gca().set_ylim([np.min(Credits)-10,100])
                        for i in range(2):
                            lines[i].set_xdata(range(game_id+1))
                            lines[i].set_ydata(Credits[i,0:(game_id+1)])
                        plt.draw()
                        time.sleep(0.1)



            print "Player", p1t.name, " has won ", tableETH.player1_won, " games."
            print "Player", p2t.name, " has won ", tableETH.player2_won, " games." 
            
            if tableETH.output:
                print np.transpose(tableETH.credits)

            results[i,j] = tableETH.player1_won - tableETH.player2_won
            results[j,i] = tableETH.player2_won - tableETH.player1_won
            
            print [p.name for p in players]
            print results
            
for i in range(num_players):
    results[i,i] = 0


print ''
print '---------'
print 'Total number of games the players have won:'
print [p.name for p in players]
print sum(np.transpose(results))
print '---------'
print 'Total number of duels the players have won:'
print [p.name for p in players]
print sum(np.transpose(results)>0)


# Plotten


In [None]:
import matplotlib.pyplot as plt
plt.axis([0, n_games, -10, n_games])
lines = [plt.plot([], [], label=p.name)[0] for p in players]
plt.legend(loc='upper left', fontsize=10)
plt.show()
Credits = np.transpose(tableETH.credits)
if (np.min(Credits) < plt.gca().get_ylim()[0]):
    plt.gca().set_ylim([np.min(Credits)-10,100])
for i in range(2):
    lines[i].set_xdata(range(game_id+1))
    lines[i].set_ydata(Credits[i,0:(game_id+1)])
plt.draw()