In [2]:
from u import *
from expres.src.config import Config

from model import Model
import util

%load_ext autoreload
%autoreload 2

In [21]:
util.config = config = Config(Path('results').mk(),
    board_dim=20,
    n_win=5,
                
    res_basic_block=True,
    res_inplanes=[16, 32, 64, 128],
    res_num_blocks=[3, 4, 6, 3],
                
    c_puct=4,
    mcts_iterations=1600,
    mcts_eps=0.25,
    mcts_alpha=0.03, # 0.6 / board_dim
    temp=1,
    move_temp_decay=25,

    lr=1e-2,
    l2_reg=1e-4,

    train_epochs=50000,
    train_batch=1024,
    pred_batch=48,
    early_stop=False,
    time_model_update=30.0,
    epoch_model_update=10,
    epoch_model_save=100,
    max_save=40,

    max_mcts_queue=200000,
    time_reset_eval=60 * 60,
    min_num_states=20480,
    num_mcts_processes=60,
                
    eval_mcts_iterations=5000
).save(force=True)

In [23]:
util.config = config = Config(Path('results_debug').mk(),
    board_dim=20,
    n_win=5,
                
    res_basic_block=True,
    res_inplanes=[16, 32, 64, 128],
    res_num_blocks=[3, 4, 6, 3],
                
    c_puct=4,
    mcts_iterations=10,
    mcts_eps=0.25,
    mcts_alpha=0.03, # 0.6 / board_dim
    temp=1,
    move_temp_decay=25,

    lr=1e-2,
    l2_reg=1e-4,

    train_epochs=50000,
    train_batch=1024,
    pred_batch=2,
    early_stop=False,
    time_model_update=30.0,
    epoch_model_update=10,
    epoch_model_save=50,
    max_save=20,

    max_mcts_queue=200000,
    time_reset_eval=10,
    min_num_states=2,
    num_mcts_processes=2,
                
    eval_mcts_iterations=5000
).save(force=True)

In [26]:
config = Config(Path('results_small').mk(),
    board_dim=5,
    n_win=4,
                
    res_basic_block=True,
    res_inplanes=[16, 32],
    res_num_blocks=[3, 4],
                
    c_puct=4,
    mcts_iterations=300,
    mcts_eps=0.1,
    mcts_alpha=0.12, # 0.6 / board_dim
    temp=1,
    move_temp_decay=25,

    lr=1e-2,
    l2_reg=1e-4,

    train_epochs=50000,
    train_batch=1024,
    pred_batch=32,
    early_stop=False,
    time_model_update=30.0,
    epoch_model_update=1,
    epoch_model_save=100,
    max_save=40,

    max_mcts_queue=100000,
    time_reset_eval=60 * 60,
    min_num_states=20480,
    num_mcts_processes=48,
                
    eval_mcts_iterations=5000
).save(force=True)

In [24]:
util.config = config = Config(Path('results_tic_tac_toe').mk(),
    board_dim=3,
    n_win=3,
                
    res_basic_block=True,
    res_inplanes=[16],
    res_num_blocks=[3],
                
    c_puct=4,
    mcts_iterations=100,
    mcts_eps=0.1,
    mcts_alpha=0.03, # 0.6 / board_dim
    temp=1,
    move_temp_decay=25,

    lr=1e-2,
    l2_reg=1e-4,

    train_epochs=50000,
    train_batch=1024,
    pred_batch=48,
    early_stop=False,
    time_model_update=30.0,
    epoch_model_update=10,
    epoch_model_save=100,
    max_save=40,

    max_mcts_queue=200000,
    time_reset_eval=60 * 60,
    min_num_states=20480,
    num_mcts_processes=60,
                
    eval_mcts_iterations=5000
).save(force=True)

In [3]:
config.device = 'cuda:1'
model = Model(config).set_state(config.load_max_model_state(min_epoch=-1))

In [4]:
def ev(state):
    with torch.no_grad():
        return model.fit_batch((np.array([state]),), train=False)

In [5]:
class MCTSNode:
    def __init__(self, state, value=0, evaluator=None):
        self.state = state
        if evaluator is None:
            self.value = value # either the terminal value
            self.terminal = True
        else:
            self.evaluator = evaluator
            self.value, self.P = evaluator(state)
            self.terminal = False
            self.N = np.zeros_like(self.P)
            self.W = np.zeros_like(self.P)
            self.mask = state.sum(axis=0).reshape(-1).astype(np.bool)
            self.score = self.P
            self.next = {}
            self.next_total = 0
            q.d()
    
    def select(self):
        if self.terminal:
            return self.value
        score = self.score
        score[self.mask] = -np.inf
        index = score.argmax()
        if index in self.next:
            new_node = self.next[index]
            value = -new_node.select()
        else:
            move = index_to_move(index)

            # flip the board
            this_state, opp_state = self.state.copy()
            this_state[move] = 1
            new_state = np.stack([opp_state, this_state])
            
            if check_win(this_state, move):
                new_node = MCTSNode(new_state, value=-1)
            elif new_state.sum() == config.board_dim ** 2:
                new_node = MCTSNode(new_state, value=0)
            else:
                new_node = MCTSNode(new_state, evaluator=self.evaluator)
            self.next[index] = new_node
            value = -new_node.value
        self.N[index] += 1
        self.W[index] += value
        self.next_total += 1
        q.d()
        self.score = np.nan_to_num(self.W / self.N) + config.c_puct * self.P * np.sqrt(self.next_total) / (1 + self.N) # UCB
        return value

class MCTS:
    def __init__(self, state, evaluator):
        self.state = state
        self.evaluator = evaluator

    def run(self):
        head = MCTSNode(self.state, evaluator=self.evaluator)
        states = []
        policies = []
        indices = []
        while not head.terminal:
            for _ in range(config.mcts_iterations):
                head.select()
            inv_temp = 1 / config.temp
            if len(states) > config.move_temp_decay:
                inv_temp = np.sqrt(len(states) - config.move_temp_decay)
            policy = head.N ** inv_temp
            policy /= policy.sum()
            index = np.random.choice(len(policy), p=policy)

            states.append(head.state)
            policies.append(policy)
            indices.append(index)

            head = head.next[index]
        value = head.value if len(states) % 2 == 1 else -head.value
        values = []
        for _ in states:
            values.append(value)
            value = -value
        # print('MCTS took %s' % (time() - start))
        return np.array(states), np.array(policies), np.array(values, dtype=np.float32), np.array(indices)

In [None]:
import q
mcts = MCTS(np.zeros((2, config.board_dim, config.board_dim), dtype=np.float32), ev)
mcts.run()

Python console opened by q.d() in forward


In : p_


tensor([[34971.6016, 38997.4961, 30575.4883,  1828.9645,  8054.6699, 31628.5098,
          2439.4307,  1727.4076, 48544.7383, 23254.5039,  5099.1948, 10516.6670,
         18361.9785, 33203.1641, 32461.6836, 37883.7617, 14901.2432, 40952.2773,
         20274.1680, 16065.7773,  4356.8135,  9946.3672, 10933.2646, 17263.2988,
         17732.8652, 13900.1885, -1175.9673, 19179.5254, 23814.2910, 50947.1992,
          9514.8906, 51763.0898, 24615.8066, 33687.2617,  8682.8760, 43384.2930,
           179.4453, 25002.5820, 28101.4629, 36334.0625, 21709.2617, 26670.4727,
         44625.0977, 29342.4980, 11964.3203,   681.0692, 16647.7559, 24346.8574,
          3595.1174]], device='cuda:1')

In : mask


tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0]], device='cuda:1', dtype=torch.uint8)

In : p


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:1')

In : c


NameError: name 'c' is not defined

In : continue


SyntaxError: 'continue' not properly in loop (<console>, line 1)