In [1]:
from utils_1.goenv import GoEnv,_format_state, create_env_copy
from utils_1.fnet import NeuralTrainer, AlphaNeural
import numpy as np
import goSim
from tqdm.auto import tqdm

In [2]:
class AlphaGoPlayer():
    def __init__(self, init_state, seed, player_color, path = "utils_1/nov8/net30.model",debug=True):
        # setup parameters
        self.init_state = init_state
        self.seed = seed
        self.player_color = player_color
        
        # init environment
        if player_color==1:
            self.env = GoEnv(player_color="black", board_size=13)
        else:
            self.env = GoEnv(player_color="white", board_size=13)
        self.env.reset()
        self.debug=debug
        
        # load neural network
        self.model = NeuralTrainer(res_blocks=10, board_size=13)
        self.model.load_model(path)
        
    def get_action(self, cur_state, opponent_action):
        if opponent_action>=0:
            self.env.step(opponent_action)
            if self.debug:
                print('-------------------------------------')
                print('opponent_action: ' + str(opponent_action))
                print('-------------------------------------')
        
        policy, value = self.model.predict(self.env.get_history())
        legal_moves = self.env.get_legal_moves()
        policy = policy*legal_moves
        action = np.argmax(policy)
        self.env.step(action)
        
        if self.debug:
            print('-------------------------------------')
            print('player_action: ' + str(action))
            print('-------------------------------------')
            self.env.print_board()
        
        return action

In [83]:
class MinMax:
    def __init__(self, board_size, player, max_depth=3, path=None):
        self.board_size=board_size
        self.max_depth = max_depth
        self.player = player
        
        # load neural network
        self.model = NeuralTrainer(res_blocks=10, board_size=13)
        self.model.load_model(path)
        
        
    def alpha_beta_search(self, board):
        # if player is black then do minimization at root
        if self.player==1:
            action_value = [np.inf]*(self.board_size**2+1)
            action_space = np.zeros(self.board_size**2+1)
            
            policy,value = self.model.predict(board.get_history())
            # select 50 best action
            policy = np.argsort(policy)[:50]
            action_space[policy]=1
            # amongst these action select legal actions
            action_space = np.nonzero(action_space*board.get_legal_moves())[0]
            if len(action_space)==0:
                action_space = np.nonzero(board.get_legal_moves())[0]
            
            for action in (action_space):
                child = create_env_copy(board)
                try:
                    child.step(action)
                except (Exception):
                    print(action_space)
                    print(action)

                action_value[action] = self.max_search(child, -np.inf, np.inf,1)
            return np.argmin(action_value)
        
        # if player is white then do maximization at root
        else:
            action_value = [-np.inf]*(self.board_size**2+1)
            action_space = np.zeros(self.board_size**2+1)
            
            policy,value = self.model.predict(board.get_history())
            # select 50 best action
            policy = np.argsort(policy)[:50]
            action_space[policy]=1
            # amongst these action select legal actions
            action_space = np.nonzero(action_space*board.get_legal_moves())[0]
            if len(action_space)==0:
                action_space = np.nonzero(board.get_legal_moves())[0]
            
            for action in (action_space):
                child = create_env_copy(board)
                child.step(action)
                action_value[action] = self.max_search(child, -np.inf, np.inf,1)
            return np.argmax(action_value)
    
    def max_search(self, board, alpha, beta, depth):
        if board.isComplete():
            return board.get_winner()*1000
        if depth==self.max_depth:
            policy,value = self.model.predict(board.get_history())
            return value
        
        v = -np.inf
        
        action_space = np.zeros(self.board_size**2+1)
            
        policy,value = self.model.predict(board.get_history())
        # select 50 best action
        policy = np.argsort(policy)[:50]
        action_space[policy]=1
        # amongst these action select legal actions
        action_space = np.nonzero(action_space*board.get_legal_moves())[0]
        if len(action_space)==0:
            action_space = np.random.choice(np.nonzero(board.get_legal_moves())[0],size=50)
        
        for action in action_space:
            child = create_env_copy(board)
            try:
                child.step(action)
            except (Exception):
                print(action_space)
                print(action)
            
            v = max(v,self.min_search(child,alpha,beta,depth+1))
            if v>= beta:
                return v
            alpha = max(alpha,v)
        return v
    
    def min_search(self, board, alpha, beta, depth):
        if board.isComplete():
            return board.get_winner()*1000
        if depth==self.max_depth:
            policy,value = self.model.predict(board.get_history())
            return value
        
        v = np.inf
        action_space = np.zeros(self.board_size**2+1)
            
        policy,value = self.model.predict(board.get_history())
        # select 50 best action
        policy = np.argsort(policy)[:50]
        action_space[policy]=1
        # amongst these action select legal actions
        action_space = np.nonzero(action_space*board.get_legal_moves())[0]
        if len(action_space)==0:
            action_space = np.random.choice(np.nonzero(board.get_legal_moves())[0],size=50)
        
        for action in action_space:
            child = create_env_copy(board)
            child.step(action)
            
            v = min(v,self.max_search(child,alpha,beta, depth+1))
            if v<= alpha:
                return v
            beta = min(beta,v)
        return v

In [84]:
class AlphaGoPlayer_MinMax():
    def __init__(self, init_state, seed, player_color,path = "utils_1/nov8/net30.model",debug=True):
        # setup parameters
        self.init_state = init_state
        self.seed = seed
        self.player_color = player_color
        
        self.minmax = MinMax(board_size=13, player=player_color, max_depth=1, path=path)
        
        # init environment
        if player_color==1:
            self.env = GoEnv(player_color="black", board_size=13)
        else:
            self.env = GoEnv(player_color="white", board_size=13)
        self.env.reset()
        
        
    def get_action(self, cur_state, opponent_action):
        if opponent_action>=0:
            self.env.step(opponent_action)
        
        action = self.minmax.alpha_beta_search(self.env)
        self.env.step(action)
        
        return action

In [85]:
env = goSim.GoEnv(player_color="black", observation_type='image3c', illegal_move_mode="raise", board_size=13, komi=7.5)

obs_t = env.reset()
opp_action=-1

In [86]:
a = AlphaGoPlayer_MinMax(obs_t.copy(), 1, 1)

In [87]:
env.set_player_color(1)
a_t = a.get_action(obs_t.copy(), opp_action)
obs_t, a_t, r_t, done, info, cur_score = env.step(a_t)

52


In [88]:
env.set_player_color(2)
opp_action = np.random.randint(0,170)
obs_t, a_t, r_t, done, info, cur_score = env.step(opp_action)

158


In [89]:
env.set_player_color(1)
a_t = a.get_action(obs_t.copy(), opp_action)
obs_t, a_t, r_t, done, info, cur_score = env.step(a_t)

89


In [90]:
env.set_player_color(2)
opp_action = np.random.randint(0,170)
obs_t, a_t, r_t, done, info, cur_score = env.step(opp_action)

108


In [91]:
env.set_player_color(1)
a_t = a.get_action(obs_t.copy(), opp_action)
obs_t, a_t, r_t, done, info, cur_score = env.step(a_t)

6


In [92]:
env = goSim.GoEnv(player_color="black", observation_type='image3c', illegal_move_mode="raise", board_size=13, komi=7.5)
obs_t = env.reset()
opp_action=-1
a = AlphaGoPlayer(obs_t.copy(), 2, 1)


env.set_player_color(1)
opp_action = np.random.randint(0,170)
obs_t, a_t, r_t, done, info, cur_score = env.step(opp_action)

13


In [93]:
env.set_player_color(2)
a_t = a.get_action(obs_t.copy(), opp_action)
obs_t, a_t, r_t, done, info, cur_score = env.step(a_t)

-------------------------------------
opponent_action: 13
-------------------------------------
-------------------------------------
player_action: 127
-------------------------------------
. . . . . . . . . . . . . 
X . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . O . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
127


In [94]:
env.set_player_color(1)
opp_action = np.random.randint(0,170)
obs_t, a_t, r_t, done, info, cur_score = env.step(opp_action)

env.set_player_color(2)
a_t = a.get_action(obs_t.copy(), opp_action)
obs_t, a_t, r_t, done, info, cur_score = env.step(a_t)


136
-------------------------------------
opponent_action: 136
-------------------------------------
-------------------------------------
player_action: 29
-------------------------------------
. . . . . . . . . . . . . 
X . . . . . . . . . . . . 
. . . O . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . O . . 
. . . . . . X . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
29


In [103]:
env_mank = GoEnv("black", board_size=13)
s = env_mank.reset();
opp_action=-1

In [104]:
a1 = AlphaGoPlayer_MinMax(s,1,1,"utils_1/nov8/net36.model", False)

In [105]:
a2 = AlphaGoPlayer(s, 1, 2, "utils_1/nov8/net36.model", False)

In [106]:
s = env_mank.get_history()

for _ in range(200):
    action = np.nonzero(env_mank.get_legal_moves())[0]
    action = np.random.choice(action)
    action = a1.get_action(s,opp_action)
    env_mank.step(action)
    if (env_mank.isComplete()):
        print(action, opp_action)
        print(env_mank.curr_score())
        print(env_mank.get_winner())
        break   

    opp_action = np.nonzero(env_mank.get_legal_moves())[0]
    opp_action = np.random.choice(opp_action)
    opp_action = a2.get_action(s,action)
    env_mank.step(opp_action)
    print(action, opp_action)
    print(env_mank.curr_score())
    env_mank.print_board()
    if (env_mank.isComplete()):
        print(env_mank.get_winner())
        break

58 46
7.5
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . O . . . . . 
. . . . . . X . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
79 56
7.5
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . O . . . . . 
. . . . O . X . . . . . . 
. . . . . . . . . . . . . 
. X . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
52 86
7.5
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . O . . . . . 
X . . . O . X . . . . . . 
. . . . . . . . . . . . . 
. X . . . . . . O . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . . 
. . . . . . . . . . . . .

. . X . . . . X . . . . . 
. . . . . . . . X . . . X 
. . . . O . . . . . O . . 
. . . . . . O O X O X . . 
X . . O O O X . . X . . . 
X . . . . O . . . O . . X 
. X O . O . . . O O . . . 
. . . O . O O X X X . . . 
. X O . O . O O . . X . . 
X . . . X . X O X . X . . 
. . . . . O . O . . . . . 
. . . . X . . . . . . . . 
. . . . . . X . . . . . . 
27 134
8.5
. . X . . . . X . . . . . 
. . . . . . . . X . . . X 
. X . . O . . . . . O . . 
. . . . . . O O X O X . . 
X . . O O O X . . X . . . 
X . . . . O . . . O . . X 
. X O . O . . . O O . . . 
. . . O . O O X X X . . . 
. X O . O . O O . . X . . 
X . . . X . X O X . X . . 
. . . . O O . O . . . . . 
. . . . X . . . . . . . . 
. . . . . . X . . . . . . 
84 85
8.5
. . X . . . . X . . . . . 
. . . . . . . . X . . . X 
. X . . O . . . . . O . . 
. . . . . . O O X O X . . 
X . . O O O X . . X . . . 
X . . . . O . . . O . . X 
. X O . O . X O O O . . . 
. . . O . O O X X X . . . 
. X O . O . O O . . X . . 
X . . . X . X O X . X . . 
. . . .

3 8
10.5
. . X X . . X X O O O . . 
. . . . . . X X X . . X X 
. X . . O . . X . . O . . 
X . . . . X O O X O X O . 
X . X O O O X . X X . O . 
X O X O . O . . . O X O X 
X X O . O O X O O O X O . 
. . O O . O O X X X O O . 
. X O . O O O O X . X . X 
X . O . X . X O X X X . . 
. . O O O O O O . O . X . 
X X . X X O X O . O . . . 
. . . . . . X O . . . . . 
122 53
10.5
. . X X . . X X O O O . . 
. . . . . . X X X . . X X 
. X . . O . . X . . O . . 
X . . . . X O O X O X O . 
X O X O O O X . X X . O . 
X O X O . O . . . O X O X 
X X O . O O X O O O X O . 
. . O O . O O X X X O O . 
. X O . O O O O X . X . X 
X . O . X X X O X X X . . 
. . O O O O O O . O . X . 
X X . X X O X O . O . . . 
. . . . . . X O . . . . . 
131 120
13.5
. . X X . . X X O O O . . 
. . . . . . X X X . . X X 
. X . . O . . X . . O . . 
X . . . . X O O X O X O . 
X O X O O O X . X X . O . 
X O X O . O . . . O X O X 
X X O . O O X O O O X O . 
. . O O . O O X X X O O . 
. X O . O O O O X . X . X 
X . O O . . . O X X X

158 13
21.5
X X X X O . X X O O O . . 
O . . O . . X X X . O X X 
O X X X O X . X X . O . . 
X O . X . X O O X O X O . 
X O X O O O X O X X . O . 
X O X O . O X O . O X O X 
X X O . O O . O O O X O O 
. . O O . O O X X X O O X 
O X O . O O O O X . X X X 
X X O O X . O O X X X X . 
. X O O O O O O O O O X O 
X X . X X O . O O O O O O 
. . X X X O . O . X O . O 
41 169
17.5
X X X X O . X X O O O . . 
O . . O . . X X X . O X X 
O X X X O X . X X . O . . 
X . X X . X O O X O X O . 
X . X O O O X O X X . O . 
X . X O . O X O . O X O X 
X X O . O O . O O O X O O 
. . O O . O O X X X O O X 
O X O . O O O O X . X X X 
X X O O X . O O X X X X . 
. X O O O O O O O O O X O 
X X . X X O . O O O O O O 
. . X X X O . O . X O . O 
35 38
15.5
X X X X O . X X O O O . . 
O . . O . . X X X . O X X 
O X X X O X . X X X O . O 
X . X X . X O O X . X O . 
X . X O O O X O X X . O . 
X . X O . O X O . O X O X 
X X O . O O . O O O X O O 
. . O O . O O X X X O O X 
O X O . O O O O X . X X X 
X X O O X . O O X X 

102 121
1.5
X X X X O . X X O O O . X 
X X . O . . X X X X O X X 
. X X X O X . X X X O X O 
X X X X O X O O X X X . . 
X X X O O O . O X X X . X 
X X X O O O O O O O X X X 
X X O O O O O O O O X X . 
X O O O O O O X X X X X X 
X X O O O O O O X . X X X 
X X O O O O O O X X X X O 
X X O O O O O O O O O X O 
X X X X X O O O O O O O O 
. X X X X O O O O O O O O 
18 51
1.5
X X X X O . X X O O O . X 
X X . O . X X X X X O X X 
. X X X O X . X X X O X O 
X X X X O X O O X X X . O 
X X X O O O . O X X X . X 
X X X O O O O O O O X X X 
X X O O O O O O O O X X . 
X O O O O O O X X X X X X 
X X O O O O O O X . X X X 
X X O O O O O O X X X X O 
X X O O O O O O O O O X O 
X X X X X O O O O O O O O 
. X X X X O O O O O O O O 
26 63
2.5
X X X X O . X X O O O . X 
X X . O . X X X X X O X X 
X X X X O X . X X X O X O 
X X X X O X O O X X X . O 
X X X O O O . O X X X O X 
X X X O O O O O O O X X X 
X X O O O O O O O O X X . 
X O O O O O O X X X X X X 
X X O O O O O O X . X X X 
X X O O O O O O X X X X

8 169
25.5
. X . . O X X X X X . X X 
X X O O . X X X X X X X X 
. . X O O X O X X X X X X 
. O X O O X O O X X X X X 
O O O O O O . O X X X . X 
. O O O O O O O O O X X X 
O O O O O O O O O O X X . 
. O O O O O O X X X X X X 
O O O O O O O O X . X X X 
O O O O O O O O X X X X O 
X X O O O O O O O O O X O 
X . X X O O O O O O O O O 
X X . X . O O O O O O O O 
158 169
25.5
. X . . O X X X X X . X X 
X X O O . X X X X X X X X 
. . X O O X O X X X X X X 
. O X O O X O O X X X X X 
O O O O O O . O X X X . X 
. O O O O O O O O O X X X 
O O O O O O O O O O X X . 
. O O O O O O X X X X X X 
O O O O O O O O X . X X X 
O O O O O O O O X X X X O 
X X O O O O O O O O O X O 
X . X X O O O O O O O O O 
X X X X . O O O O O O O O 
0 169
25.5
X X . . O X X X X X . X X 
X X O O . X X X X X X X X 
. . X O O X O X X X X X X 
. O X O O X O O X X X X X 
O O O O O O . O X X X . X 
. O O O O O O O O O X X X 
O O O O O O O O O O X X . 
. O O O O O O X X X X X X 
O O O O O O O O X . X X X 
O O O O O O O O X X 

In [50]:
vic = 0
matches = 10
for jk in tqdm(range(matches)):
    env_mank = GoEnv("black", board_size=13)
    s = env_mank.reset();
    opp_action=-1
    a1 = AlphaGoPlayer_MinMax(s,1,1,"utils_1/nov8/net36.model", False)
    a2 = AlphaGoPlayer_MinMax(s, 1, 2, "utils_1/nov8/net36.model", False)
    for i in range(400):
        s = env_mank.get_history()
        action = np.nonzero(env_mank.get_legal_moves())[0]
        action = np.random.choice(action)
#         action = a1.get_action(s,opp_action)
        env_mank.step(action)
        if (env_mank.isComplete()):
#             print(action, opp_action)
#             print(env_mank.curr_score())
#             print(env_mank.get_winner())
            break   

        opp_action = np.nonzero(env_mank.get_legal_moves())[0]
        opp_action = np.random.choice(opp_action)
        opp_action = a2.get_action(s,action)
        env_mank.step(opp_action)
    #     print(action, opp_action)
    #     print(env_mank.curr_score())
    #     env_mank.print_board()
        if (env_mank.isComplete()):
#             print(env_mank.curr_score())
#             print(env_mank.get_winner())
            break
    if(env_mank.curr_score()<0):
        vic += 1
#     print("Loop ended")
print(vic/matches)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


0.8
