# Testing various models by playing match against each other

In [2]:
# Imports
from utils_1.goenv import GoEnv,_format_state, create_env_copy
from utils_1.fnet import NeuralTrainer, AlphaNeural
import numpy as np
from tqdm.auto import tqdm

## Simple Policy Player

In [3]:
class AlphaGoPlayer():
    def __init__(self, player_color, path = "utils_1/nov8/net30.model", play_random=False, debug=True, **useless_args):
        # setup parameters
        self.player_color = player_color
        self.random_player = play_random
        
        # init environment
        if player_color==1:
            self.env = GoEnv(player_color="black", board_size=13)
        else:
            self.env = GoEnv(player_color="white", board_size=13)
        self.env.reset()
        self.debug=debug
        
        # load neural network
        self.model = NeuralTrainer(res_blocks=10, board_size=13)
        self.model.load_model(path)
        
    def get_action(self, cur_state, opponent_action):
        if opponent_action>=0:
            self.env.step(opponent_action)
            if self.debug:
                print('-------------------------------------')
                print('opponent_action: ' + str(opponent_action))
                print('-------------------------------------')
        
        legal_moves = self.env.get_legal_moves()
        if self.random_player:
                actions = np.nonzero(self.env.get_legal_moves())[0]
                action = np.random.choice(actions)
        else:
            policy, value = self.model.predict(self.env.get_history())
            policy = policy*legal_moves
            action = np.argmax(policy)
            
        self.env.step(action)
        
        if self.debug:
            print('-------------------------------------')
            print('player_action: ' + str(action))
            print('-------------------------------------')
            self.env.print_board()
        
        return action

# 1DepthPlayer (in testing phase)

In [4]:
class MaxPValPlayer:
    def __init__(self, board_size, player_color, max_depth=3, path=None):
        self.board_size = board_size
        self.max_depth = max_depth
        self.player_color = player_color
        
        # load neural network
        self.model = NeuralTrainer(res_blocks=10, board_size=13)
        self.model.load_model(path)
        
        
    def search(self, state):
        # Which player to play in this state
        player_turn = state.player_turn()
        
        stack = state.get_history()
        p, v = self.model.predict(stack)
        
        valid_moves = state.get_legal_moves()
        
        children = []; actions = []
        for a in np.nonzero(valid_moves)[0]:
            try:
                state_copy = create_env_copy(state)
                state_copy.step(a)
                children.append(state_copy)
                actions.append(a)
            except:
                continue
                
        stack_list = [state.get_history() for state in children]
        pi_list, v_list = self.model.predict(stack_list)
        
#         print (len(actions), type(actions))
#         print (len(p), type(p))
#         print (len(v_list), type(v_list))
        
        # The values are w.r.t to the children - for parent it's the negative
        v_list = -1 * v_list
        v_list = np.exp(v_list)/np.sum(np.exp(v_list))
        
        values = np.zeros(len(p))
        for v, a in zip(v_list, actions):
            values[a] = v
            
#         print (len(values), type(values))
#         print (len(actions), type(actions))
        
        probs = p * values
        
#         print (len(probs))
#         print ('-------------------------------------------------')
        return np.argmax(probs)


In [5]:
class Depth1Player():
    def __init__(self, player_color, path = "utils_1/nov8/net30.model", debug=True, **useless_args):
        # setup parameters
        self.player_color = player_color
        
        self.player = MaxPValPlayer(board_size=13, player_color=player_color, max_depth=1, path=path)
        
        # init environment
        if player_color==1:
            self.env = GoEnv(player_color="black", board_size=13)
        else:
            self.env = GoEnv(player_color="white", board_size=13)
        self.env.reset()
        
        
    def get_action(self, cur_state, opponent_action):
        if opponent_action>=0:
            self.env.step(opponent_action)
        
        action = self.player.search(self.env)
        self.env.step(action)
        
        return action

# Playing Matches

In [6]:
def play_matches(a1_configs, a2_configs, num_matches=20,
                 player_colors={'black': 'a1', 'white': 'a2'}, tournament_no=1):
    # Play matches between a1 and a2
    print ('Starting Tournament #%d' % tournament_no)
    print (player_colors)
    
    wins = {
        'a1': 0,
        'a2': 0
    }

    for match_no in tqdm(range(num_matches)):
        state = GoEnv("black", board_size=13)
        state.reset()
        
        if 'depth1' in a1_configs:
            a1 = Depth1Player(**a1_configs)
        else:
            a1 = AlphaGoPlayer(**a1_configs)
        
        if 'depth1' in a2_configs:
            a2 = Depth1Player(**a2_configs)
        else:
            a2 = AlphaGoPlayer(**a2_configs)

        a1_action = -1
        a2_action = -1

        for i in range(400):
            if(i==399):
                print("Limit reached")

            a1_action = a1.get_action('null', a2_action)
            _, _, done = state.step(a1_action)
            if (done or state.isComplete()):
                break   

            a2_action = a2.get_action('null', a1_action)
            _, _, done = state.step(a2_action)
            if (done or state.isComplete()):
                break

        winner = -1 * state.get_winner()
        if winner == 1:
            # It's black's victory
            wins[player_colors['black']] += 1
        else:
            # It's white victory
            wins[player_colors['white']] += 1

#         if ((match_no+1) / num_matches >= 0.5):
        if ((match_no+1) % 10 == 0):
            # Log after every 10 matches
            print (wins)

    print(wins)
    print ('-----------------------------------------------------------------------------')

In [7]:
# Match 1
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net55.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1)


Starting Tournament #1
{'black': 'a1', 'white': 'a2'}


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

KeyboardInterrupt: 

In [None]:
# Match 2
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net55.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1)

In [None]:
# Match 1=3
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 2, 'path': "utils_1/nov9-correct/net11.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=3)


In [None]:
# Match 4
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 1, 'path': "utils_1/nov9-correct/net11.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=4)


In [None]:
# Match 5
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 2, 'path': "utils_1/nov9-correct/net11.model", 'debug': False, 'play_random': True}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5)


In [None]:
# Match 6
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 1, 'path': "utils_1/nov9-correct/net11.model", 'debug': False, 'play_random': True}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=6)


In [None]:
# Match 7
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net37.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 2, 'path': "utils_1/nov9-correct/net11.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5)


In [None]:
# Match 7
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 1, 'path': "utils_1/nov9-correct/net11.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5)


In [None]:
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net55.model", 'debug': False}
a2_configs = {'player_color': 2, 'path': "utils_nov8/nov10/net1.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1)


In [None]:
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net55.model", 'debug': False}
a2_configs = {'player_color': 1, 'path': "utils_nov8/nov10/net1.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1)


In [None]:
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net55.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 1, 'path': "utils_nov8/nov10/net1.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1)


In [18]:
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net55.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 2, 'path': "utils_nov8/nov10/net1.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1)


Starting Tournament #1
{'black': 'a1', 'white': 'a2'}


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

{'a1': 2, 'a2': 8}
{'a1': 3, 'a2': 17}

{'a1': 3, 'a2': 17}
-----------------------------------------------------------------------------


In [6]:
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net55.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 2, 'path': "utils_nov8/nov10/net1.model", 'debug': False, 'depth1': True}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=1, num_matches=2)


Starting Tournament #1
{'black': 'a1', 'white': 'a2'}


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

{'a1': 0, 'a2': 1}
{'a1': 1, 'a2': 1}

{'a1': 1, 'a2': 1}
-----------------------------------------------------------------------------


In [8]:
# Match 7
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 1, 'path': "utils_1/nov9-correct/net11.model", 'debug': False, 'depth1': True}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5, num_matches=6)


Starting Tournament #5
{'black': 'a2', 'white': 'a1'}


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

{'a1': 1, 'a2': 2}
{'a1': 2, 'a2': 2}
{'a1': 3, 'a2': 2}
{'a1': 4, 'a2': 2}

{'a1': 4, 'a2': 2}
-----------------------------------------------------------------------------


In [10]:
# Match 7
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 1, 'path': "utils_1/nov9-correct/net11.model", 'debug': False, 'depth1': True}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5, num_matches=10)


Starting Tournament #5
{'black': 'a2', 'white': 'a1'}


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

{'a1': 5, 'a2': 0}
{'a1': 6, 'a2': 0}
{'a1': 7, 'a2': 0}
{'a1': 8, 'a2': 0}
{'a1': 9, 'a2': 0}
{'a1': 10, 'a2': 0}

{'a1': 10, 'a2': 0}
-----------------------------------------------------------------------------


In [9]:
# Match 7
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 1, 'path': "utils_nov10/nov10/net4.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5, num_matches=20)


Starting Tournament #5
{'black': 'a2', 'white': 'a1'}


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

{'a1': 10, 'a2': 0}
{'a1': 20, 'a2': 0}

{'a1': 20, 'a2': 0}
-----------------------------------------------------------------------------


In [10]:
# Match 7
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net37.model", 'debug': False}
a2_configs = {'player_color': 2, 'path': "utils_nov10/nov10/net4.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5, num_matches=20)


Starting Tournament #5
{'black': 'a1', 'white': 'a2'}


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

{'a1': 10, 'a2': 0}
{'a1': 20, 'a2': 0}

{'a1': 20, 'a2': 0}
-----------------------------------------------------------------------------


In [8]:
# Match 7
player_colors = {'black': 'a2', 'white': 'a1'}
a1_configs = {'player_color': 2, 'path': "utils_nov8/nov8/net37.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 1, 'path': "utils_nov10/nov10/net8.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5, num_matches=20)


Starting Tournament #5
{'black': 'a2', 'white': 'a1'}


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

{'a1': 8, 'a2': 2}
{'a1': 16, 'a2': 4}

{'a1': 16, 'a2': 4}
-----------------------------------------------------------------------------


In [9]:
# Match 7
player_colors = {'black': 'a1', 'white': 'a2'}
a1_configs = {'player_color': 1, 'path': "utils_nov8/nov8/net37.model", 'debug': False, 'play_random': True}
a2_configs = {'player_color': 2, 'path': "utils_nov10/nov10/net8.model", 'debug': False}
play_matches(a1_configs, a2_configs, player_colors=player_colors, tournament_no=5, num_matches=20)


Starting Tournament #5
{'black': 'a1', 'white': 'a2'}


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

{'a1': 8, 'a2': 2}
{'a1': 16, 'a2': 4}

{'a1': 16, 'a2': 4}
-----------------------------------------------------------------------------
