In [3]:
class HexBoard:
  BLUE = 1
  RED = 2
  EMPTY = 3
  def __init__(self, board_size):
    self.board = {}
    self.size = board_size
    self.game_over = False
    for x in range(board_size):
      for y in range (board_size):
        self.board[x,y] = HexBoard.EMPTY
  def is_game_over(self):
    return self.game_over
  def is_empty(self, coordinates):
    return self.board[coordinates] == HexBoard.EMPTY
  def is_color(self, coordinates, color):
    return self.board[coordinates] == color
  def get_color(self, coordinates):
    if coordinates == (-1,-1):
      return HexBoard.EMPTY
    return self.board[coordinates]
  def place(self, coordinates, color):
    if not self.game_over and self.board[coordinates] == HexBoard.EMPTY:
      self.board[coordinates] = color
      if self.check_win(HexBoard.RED) or self.check_win(HexBoard.BLUE):
        self.game_over = True
  def get_opposite_color(self, current_color):
    if current_color == HexBoard.BLUE:
      return HexBoard.RED
    return HexBoard.BLUE
  def get_neighbors(self, coordinates):
    (cx,cy) = coordinates
    neighbors = []
    if cx-1>=0:   neighbors.append((cx-1,cy))
    if cx+1<self.size: neighbors.append((cx+1,cy))
    if cx-1>=0    and cy+1<=self.size-1: neighbors.append((cx-1,cy+1))
    if cx+1<self.size  and cy-1>=0: neighbors.append((cx+1,cy-1))
    if cy+1<self.size: neighbors.append((cx,cy+1))
    if cy-1>=0:   neighbors.append((cx,cy-1))
    return neighbors
  def border(self, color, move):
    (nx, ny) = move
    return (color == HexBoard.BLUE and nx == self.size-1) or (color == HexBoard.RED and ny == self.size-1)
  def traverse(self, color, move, visited):
    if not self.is_color(move, color) or (move in visited and visited[move]): return False
    if self.border(color, move): return True
    visited[move] = True
    for n in self.get_neighbors(move):
      if self.traverse(color, n, visited): return True
    return False
  def check_win(self, color):
    for i in range(self.size):
      if color == HexBoard.BLUE: move = (0,i)
      else: move = (i,0)
      if self.traverse(color, move, {}):
        return True
    return False
  def print(self):
    print("   ",end="")
    for y in range(self.size):
        print(chr(y+ord('a')),"",end="")
    print("")
    print(" -----------------------")
    for y in range(self.size):
        print(y, "|",end="")
        for z in range(y):
            print(" ", end="")
        for x in range(self.size):
            piece = self.board[x,y]
            if piece == HexBoard.BLUE: print("b ",end="")
            elif piece == HexBoard.RED: print("r ",end="")
            else:
                if x==self.size:
                    print("-",end="")
                else:
                    print("- ",end="")
        print("|")
    print("   -----------------------")
  
  

In [65]:
import math #{(1,2):}
from itertools import permutations
import copy
import random
class Node:
    def __init__(self, board, parent = "root has no parent",ID_tuple = ("root",)):
        # board is HexBoard object
        # ID is tuple consist of sequential actions taken two players, every node has its unique ID
        self.ID = self  # ID is Node object
        self.parent = parent  # parent is a node object
        self.children = {}      # the node's children
        self.visit_count = 0    # Number of visit. 
        self.value_sum = 0      # The total count of win 
        self.state = copy.deepcopy(board)       # self.state is HexBoard object
        self.state_empty = [k for k, v in self.state.board.items() if v == 3 ]
        self.state_red = [k for k, v in self.state.board.items() if v == 2 ]
        self.state_blue = [k for k, v in self.state.board.items() if v == 1 ]
        # the ID_tuple is nodes name
        # the name gives us information of path. i.e. all the actions in order by two players
        self.ID_tuple = ID_tuple
    
            

    def expanded(self):
        return len(self.children) > 0
    
        
        
    def freddy_get_root_Node(self):
        parent = self.parent
        if parent == "root has no parent":
            return self
        return parent.freddy_get_root_Node()

    
    
    def BestUCT_Childnode(self, cp = 2):
        a_dic = {}
        self.cp = cp         
        self.root = self.freddy_get_root_Node()  
        for childnode, nodeobject in self.children.items():
            if nodeobject.visit_count == 0:
                return self.children[childnode]
                break
            else:
                self.exploitation = nodeobject.value_sum / nodeobject.visit_count
                self.term = math.log(self.root.visit_count/nodeobject.visit_count)
                if self.term < 0: #becasue < 0 can not be taken sqrt
                    self.term = 0
                self.exploration = self.cp * math.sqrt(self.term)
                a_dic[childnode] = self.exploitation + self.exploration  
        Bestchild = max(a_dic, key= a_dic.get)
        return self.children[Bestchild]  # an Node object
        
    def expand(self,player):
        
        self.movingstate = copy.deepcopy(self.state) # self.movingstate is  HexBoard object
        self.emptycoordinate_2 = copy.deepcopy(self.state_empty) #avoid replacing the "self.state_empty"
           
        for a_tuple in self.emptycoordinate_2:
            self.movingstate.place(a_tuple, player)
            self.nodes_name = self.ID_tuple + (a_tuple,)
            self.children[self.nodes_name]= Node(self.movingstate,parent = self.ID,ID_tuple =self.nodes_name)
            self.movingstate = copy.deepcopy(self.state)

            
                        
    def rollout(self,player): 
        # rollout give the reward for unseen nodes
        # The reward is in the "red" perspective! important!!!!
        # player is either HexBoard.BLUE or HexBoard.RED
        self.movingstate = copy.deepcopy(self.state)
        emptycoordinate = [k for k, v in self.movingstate.board.items() if v == 3]     
        if player == HexBoard.BLUE:
            self.player_enemy = HexBoard.RED
        else:
            self.player_enemy = HexBoard.BLUE
            
            
             #first three "if" is to check current state has a win or lose
        if self.movingstate.check_win(self.player_enemy) == True: 
            if  player_enemy == HexBoard.BLUE:
                self.value_sum = -1
            else:
                self.value_sum = 1
            
        elif self.movingstate.check_win(player) == True:
            if  player_enemy == HexBoard.BLUE:
                self.value_sum = 1
            else:
                self.value_sum = -1
            
        elif emptycoordinate == {}:
            self.value_sum = 0
        else: # note:The first action is enemy in terms of rollout
            while True:
                a_empty_piece = random.choice(emptycoordinate)
                self.movingstate.place(a_empty_piece,self.player_enemy)
                emptycoordinate.remove(a_empty_piece)
            
                if self.movingstate.check_win(self.player_enemy) == True:
                    self.value_sum = -1
                    break
            
                a_empty_piece = random.choice(emptycoordinate)
                self.movingstate.place(a_empty_piece,player)
                emptycoordinate.remove(a_empty_piece)
                if self.movingstate.check_win(player) == True:
                    self.value_sum = 1
                    break
                    
                if emptycoordinate == {}:
                    self.value_sum = 0
                    break
    # backpropagate does not work
    def backpropagate(self, reward = 0):
        print("The node ",self.ID_tuple,": its visit count is",self.visit_count)
        print("The node ",self.ID_tuple,": its value is",self.value_sum)
        print("The node ",self.ID_tuple,": its parent is ",self.parent )


        if self.visit_count == 0:
            self.visit_count =1
            self.reward = self.value_sum
            self.parent.visit_count += 1
            self.parent.value_sum += self.reward
            self.parent.backpropagate(self.reward)
        elif self.parent != "root has no parent":
            self.parent.visit_count += 1
            self.parent.value_sum += reward
            self.parent.backpropagate(reward)
            
    
        
            
        
        
        
   
            

        
        
        
        
        

        
    

('A', 'b')

# TEST_functions works or not.

In [63]:
## given a state below
winner = HexBoard.RED 
loser = HexBoard.BLUE 
board = HexBoard(4)

board.place((1,1), loser)
board.place((2,1), loser)

board.place((3,1), loser)

board.place((0,0), winner)

board.place((0,1), winner)


board.print()
# given a state above



root = Node(board)

# expand nodes
root.expand(winner)

#check whether expansion is correctly operated.
# the key is children's name, which track all the actions in order by two players
# the value is a Node object fot the children.
for key,value in root.children.items():
    print("root children's neam: ",key)

    
print("Get BestUCT_Childnode. the output is a node object",root.BestUCT_Childnode())
print("BestUCT_Childnode's name",root.BestUCT_Childnode().ID_tuple)

# roll out the unseen node, its return an reward (1,-1,0) and save into the value of the unseen node
root.BestUCT_Childnode().rollout(winner)

# backpropagate do two things
# After rollout , backpropagate add the reward to the unseen node and set the vist from zero to 1.
# Add the reward(1,-1,0) and one visit to the unseen node parent, parent'parent... root.
print("eye_check of backpropagate ",root.BestUCT_Childnode().backpropagate())


# we try to expand on of the root's childe to see if the expand function works poperly
root.BestUCT_Childnode().expand(winner)
print("the root's children we choose to expand its childnodes: ", root.BestUCT_Childnode().ID_tuple )
for key,value in root.BestUCT_Childnode().children.items():
    print("children's name of root's childnode",key)


   a b c d 
 -----------------------
0 |r - - - |
1 | r b b b |
2 |  - - - - |
3 |   - - - - |
   -----------------------
root children's neam:  ('root', (0, 2))
root children's neam:  ('root', (0, 3))
root children's neam:  ('root', (1, 0))
root children's neam:  ('root', (1, 2))
root children's neam:  ('root', (1, 3))
root children's neam:  ('root', (2, 0))
root children's neam:  ('root', (2, 2))
root children's neam:  ('root', (2, 3))
root children's neam:  ('root', (3, 0))
root children's neam:  ('root', (3, 2))
root children's neam:  ('root', (3, 3))
Get BestUCT_Childnode. the output is a node object <__main__.Node object at 0x7ff32930b850>
BestUCT_Childnode's name ('root', (0, 2))
The node  ('root', (0, 2)) : its visit count is 0
The node  ('root', (0, 2)) : its value is 1
The node  ('root', (0, 2)) : its parent is  <__main__.Node object at 0x7ff32930b340>
The node  ('root',) : its visit count is 1
The node  ('root',) : its value is 1
The node  ('root',) : its parent is  root has

# histroy

In [None]:
def expand(self,player, original_state):
        
        # original_state is HexBoard object
        self.i = len([k for k, v in original_state.board.items() if v == 3 ])-len(self.state_empty)  + 1
        if player == HexBoard.BLUE:
            self.player_enemy = HexBoard.RED
        else:
            self.player_enemy = HexBoard.BLUE
       
        self.movingstate = copy.deepcopy(self.state) # self.movingstate is  HexBoard object
        self.player_state = [k for k, v in self.state.board.items() if v == player ]
        self.emptycoordinate_2 = copy.deepcopy(self.state_empty) #avoid replacing the "self.state_empty"
           
        for a_tuple in list(permutations(self.emptycoordinate_2,self.i)):
            if self.i == 1:
                self.movingstate.place(a_tuple[0], player)
                self.children[a_tuple]  = Node(self.movingstate,parent = self.ID,ID_tuple =a_tuple )
                self.movingstate = copy.deepcopy(self.state)

            else: 
                for j in range(len(a_tuple)):
                    if j % 2 == 0:
                        self.movingstate.place(a_tuple[j], player)
                    elif j % 2 == 1:
                        self.movingstate.place(a_tuple[j], self.player_enemy)
                        self.children[a_tuple]  = Node(self.movingstate,parent = self.ID,ID_tuple =a_tuple)
                        self.movingstate = copy.deepcopy(self.state)