# TIC TAE TOE

In [5]:
# !pip3 install matplot

In [6]:
import numpy as np
import matplotlib.pyplot as plt

In [7]:
LENGTH = 3

## AGENT

In [12]:
class Agent:
    
    def __init__(self,eps=0.1,alpha=0.5,verbose=False):
        self.eps = eps   #epsilon for greedy algo
        self.alpha = alpha #learning rate for update
        self.verbose = verbose #for extra information
        self.state_history = list() #store the history after every action
        
    def set_symbol(self,s):   #function to set the symbol like 'o','x'
        self.sym = s
    
    def set_value(self,v):    #store the reward of every state 
        self.V = v
    
    def reset_history(self):
        self.state_history =  list()
    
    def take_action(self,env): # apply epsilon greedy algorithm
   
        r = np.random.rand()
        best_state = None
        
        if r < self.eps:
            
            if self.verbose:
                print('Taking a random action !')
            
            possible_moves = list()
            for i in range(LENGTH):
                for j in range(LENGTH):
                    if env.is_empty(i,j):
                        possible_moves.append((i,j))
            idx = np.random.choice(len(possible_moves))
            next_move =  possible_moves[idx]
            
        else:
            
            pos2val = dict()
            next_move = None
            best_value = -1
            for i in range(LENGTH):
                for j in range(LENGTH):
                    if env.is_empty(i,j):
                        env.board[i,j] = self.sym
                        state = env.get_state()
                        env.board[i,j] = 0
                        pos2val[(i,j)] = self.V[state]
                        if self.V[state] > best_value:
                            best_value = self.V[state]
                            best_state = state
                            next_move = (i,j)
        
        
        #if verbose is true we need to print the value of current board after taking action
        
        if self.verbose:
            print('Taking a greedy action !')
            
            for i in range(LENGTH):
                print("******************")
                for j in range(LENGTH):
                    if env.is_empty(i,j):
                        print('%.2f'%pos2val[(i,j)],end = " ")
                    else:
                        print(" ",end=" ")
                        if env.board[i,j] == env.o:
                            print("x |",end=" ")
                        elif env.board[i,j] == env.x:
                            print("o |",end=" ")
                        else:
                            print(" |",end="")
                    print()
                print("******************")
                
            
            env.board[next_move[0],next_move[1]] = self.sym
        
    def update_state_history(self,s):
        self.state_history.append(s)
    
    def update(self,env): #we only use this function at end of episode
        
        target = env.reward(self.sym)
        
        for p in reversed(self.state_history):
            
            v = self.V[p] + self.alpha*(target-self.V[p]) #prev state + alpha*(nextstate-prevstate)
            self.V[p] = v
            target = v
            
        self.reset_history()
    

## Environment

In [23]:
class Environment:
    
    def __init__(self):
        self.board = np.zeros((LENGTH,LENGTH))
        self.x = -1
        self.o = +1
        self.ended = False
        self.winner = None
        self.num_states = 3**(LENGTH*LENGTH)
        
    def is_empty(self,i,j):
        return self.board[i][j] == 0
    
    def reward(self,sym):
        
        if not self.game_over():
            return 0
        if self.winner==sym :
            return 1
        else:
            return 0
    
    def get_states(self):
        
        k = 0
        h = 0
        v = -1
        for i in range(LENGTH):
            for j in range(LENGTH):
                
                if self.board[i,j] == self.x:
                    v = 1
                elif self.board[i,j] == self.o:
                    v = 2
                elif self.board[i,j] == 0:
                    v = 0
                    
                h += (3**k)*v
                k+=1
        
        return h
    
    def game_over(self,force_recalculate = False):
        
        if not force_recalculate and self.ended:
            return True
        
        #rows winner
        for i in range(LENGTH):
            for player in (self.o,self.x):
                s = self.board[i].sum()
                if s == LENGTH*player:
                    self.ended = True
                    self.winner = player
                    return True
        
        #column winner
        for j in range(LENGTH):
            for player in (self.o,self.x):
                s = self.board[:,j].sum()
                if s == player*LENGTH:
                    self.ended = True
                    self.winner = player
                    return True
        
        # diagonal winner
        for player in (self.o,self.x):
            if self.board.trace() == LENGTH*player:
                self.ended = True
                self.winner = player
                return True
            if np.fliplr(self.board).trace() == LENGTH*player:
                self.ended = True
                self.winner = player
                return True
        #draw
        if np.all((self.board==0)==False):
            self.winner = None
            self.ended = True
        
        self.winner = None
        return False
    
    def is_draw(self):
        return self.ended and self.winner is None


    def display_board(self): 
        for i in range(LENGTH):
            print("************")
            for j in range(LENGTH):
                print("  ", end="")
                if self.board[i,j] == self.x:
                    print("x ", end="")
                elif self.board[i,j] == self.o:
                    print("o ", end="")
                else:
                    print("  ", end="")
            print("")
            print("************")

### np.all

In [24]:
# print(np.all([[1,2],[0,3]]))
# print(np.all(([[1,2],[0,3]])==0))
# print(([[1,2],[0,3]])==0)
# print((([[1,2],[0,3]])==0)==False)