In [1]:
from MCTS import MCTS
from sogo.SogoGame import SogoGame, display as display_board
import numpy as np
from sogo.keras.NNet import NNetWrapper as NNet
from Timer import Timer

class Config(object):
    def __init__(self):    
      self.num_sampling_moves = 30
      self.max_moves = 512  # for chess and shogi, 722 for Go.
      self.num_mcts_sims = 514

      # Root prior exploration noise.
      self.root_dirichlet_alpha = 0.3  # for chess, 0.03 for Go and 0.15 for shogi.
      self.root_exploration_fraction = 0.0

      # UCB formula
      self.pb_c_base = 19652
      self.pb_c_init = 1.25

      # Load model

      self.load_model = True
      self.load_folder_file = ('./save/','new_mcts_15.pth.tar')

config = Config()
game = SogoGame(4)

def setup_board(plays,do_print=True): 
    board = game.init_board()
    player = 1    
    for play in plays:
        board, player = game.next_state(board, player,play)
    if do_print:
        display_board(board)
    return board, player


Using TensorFlow backend.


In [2]:
from NeuralNet import NeuralNet
from Game import Game

class NN(NeuralNet):
  def __init__(self,game:Game):
    self.game = game
  def predict(self, board):
    return np.ones(self.game.action_size())/self.game.action_size(), 0


dummy_nn = NN(game)
dummy_mcts = MCTS(game, dummy_nn, config)

def mcts_only_pred(plays, root=None):
    b,p = setup_board(plays, False)
    with Timer("MCTS only prediction"):
        pi, root = dummy_mcts.get_action_prob(b, p, root)
    print(f"Probs: {np.array2string(np.array(pi), precision=2, separator=',', suppress_small=True, max_line_width=200)}")
    return root
          
def mcts_c_only_pred(plays, root=None):
    b,p = setup_board(plays, False)
    canonical_board = game.canonical_board(b, p)

    with Timer("MCTS canonical only prediction"):
        pi, root = dummy_mcts.get_action_prob(canonical_board, root=root)
    print(f"Probs: {np.array2string(np.array(pi), precision=2, separator=',', suppress_small=True, max_line_width=200)}")
    return root

In [3]:
nn = NNet(game)
nn.load_checkpoint(*(config.load_folder_file))

def nn_pred(plays):
    b,p = setup_board(plays, False)
    b = game.canonical_board(b,p)
    with Timer("NN prediction"):
        pi, v = nn.predict(b)
    print(f"Probs: {np.array2string(pi, precision=2, separator=',', suppress_small=True, max_line_width=200)} Value: {v[0]:0.2f}")

mcts = MCTS(game, nn, config)

def mcts_player(x, player):
    canonical_board = game.canonical_board(x, player)
    pi, root = mcts.get_action_prob(canonical_board)
    return np.argmax(pi), root

def test_mcts(plays, expected):
    board, player = setup_board(plays)    
    with Timer() as t:
        play, root = mcts_player(board, player)    
    new_board, new_player = game.next_state(board, player, play)
    display_board(new_board)
    print(f"MCTS made {'correct' if play == expected else 'incorrect' } play {play} instead of {expected} in  {t.interval:0.3f} sec")
    return root
          
def mcts_pred(plays, root=None):
    b,p = setup_board(plays, False)
    with Timer("MCTS prediction"):
        pi, root = mcts.get_action_prob(b, p, root)
    print(f"Probs: {np.array2string(np.array(pi), precision=2, separator=',', suppress_small=True, max_line_width=200)}")
    return root

def mcts_c_pred(plays, root=None):
    b,p = setup_board(plays, False)
    canonical_board = game.canonical_board(b, p)
    with Timer("MCTS canonical prediction"):
        pi, root = mcts.get_action_prob(canonical_board, root=root)
    print(f"Probs: {np.array2string(np.array(pi), precision=2, separator=',', suppress_small=True, max_line_width=200)}")
    return root

          

### State requires defense against 2 step win as player 2

In [4]:
config.num_mcts_sims = 3
play, expected = [0,7,3,11,5,15,13], 1
nn_pred(play)
r1 = mcts_pred(play) # differs from test_mcts, direct canonical vs this
rc = mcts_c_pred(play)

NN prediction took 0.632 sec
Probs: [0.02,0.19,0.17,0.18,0.02,0.03,0.03,0.02,0.04,0.05,0.05,0.05,0.1 ,0.04,0.02,0.01] Value: 0.51
MCTS prediction took 0.248 sec
Probs: [0.,1.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]
MCTS canonical prediction took 0.071 sec
Probs: [0.,1.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.]


In [5]:
r1.print()
rc.print()

 -> v:-0.66 n:3 p:1.0 tp:1
  1 -> v:1.0 n:2 p:0.19 tp:-1
    9 -> v:-0.99 n:1 p:0.28 tp:1
 -> v:-0.66 n:3 p:1.0 tp:-1
  1 -> v:1.0 n:2 p:0.19 tp:1
    9 -> v:-0.99 n:1 p:0.28 tp:-1


In [7]:
config.num_mcts_sims = 10000
play, expected = [0,7,3,11,5,15,13], 1
nn_pred(play)
r1 = mcts_pred(play) # differs from test_mcts, direct canonical vs this
rc = mcts_c_pred(play)
# r = test_mcts(play,expected); # differs

NN prediction took 0.022 sec
Probs: [0.02,0.19,0.17,0.18,0.02,0.03,0.03,0.02,0.04,0.05,0.05,0.05,0.1 ,0.04,0.02,0.01] Value: 0.51
MCTS prediction took 257.355 sec
Probs: [0.08,0.71,0.06,0.01,0.  ,0.01,0.  ,0.01,0.01,0.04,0.04,0.01,0.01,0.01,0.  ,0.02]
MCTS canonical prediction took 271.486 sec
Probs: [0.08,0.71,0.06,0.01,0.  ,0.01,0.  ,0.01,0.01,0.04,0.04,0.01,0.01,0.01,0.  ,0.02]


In [18]:
def pref_path(r):
    if len(r.children) == 0:
        return []
    _, a, c = max((c.visit_count, a, c) for a,c in r.children.items())
    return [a] + pref_path(c)

In [41]:
for filter in [
        list(range(16)),
        {1:{a: [2,9] for a in range(16)}},
        {9:list(range(16))},
        {10:None}
    ]:
    for d,r in [('canoical', rc), ('usual',r1)]:
        print(d, filter)
        r.print(filter=filter)

print('canonical')
rc.print(2)
print('usual')
r1.print(2)

canoical [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
 -> v:-0.28 n:1000 p:1.0 tp:-1
  0 -> v:0.032 n:6 p:0.024 tp:1
  1 -> v:-0.0068 n:41 p:0.19 tp:1
  2 -> v:-0.14 n:26 p:0.17 tp:1
  3 -> v:-0.19 n:26 p:0.18 tp:1
  4 -> v:0.25 n:35 p:0.016 tp:1
  5 -> v:0.21 n:15 p:0.027 tp:1
  6 -> v:0.26 n:39 p:0.027 tp:1
  7 -> v:0.24 n:28 p:0.017 tp:1
  8 -> v:0.17 n:17 p:0.036 tp:1
  9 -> v:0.37 n:630 p:0.052 tp:1
  10 -> v:-0.049 n:8 p:0.045 tp:1
  11 -> v:-0.031 n:10 p:0.052 tp:1
  12 -> v:0.21 n:56 p:0.097 tp:1
  13 -> v:-0.21 n:5 p:0.04 tp:1
  14 -> v:0.27 n:45 p:0.023 tp:1
  15 -> v:0.21 n:12 p:0.012 tp:1
usual [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
 -> v:0.33 n:1000 p:1.0 tp:1
  0 -> v:-0.26 n:233 p:0.024 tp:-1
  1 -> v:-0.61 n:41 p:0.19 tp:-1
  2 -> v:-0.44 n:72 p:0.17 tp:-1
  3 -> v:-0.62 n:39 p:0.18 tp:-1
  4 -> v:-0.66 n:4 p:0.016 tp:-1
  5 -> v:-0.56 n:7 p:0.027 tp:-1
  6 -> v:-0.79 n:4 p:0.027 tp:-1
  7 -> v:-0.31 n:26 p:0.017 tp:-1
  8 -> v:-0.38 n:25 p:0.0

In [38]:
print(pref_path(r1))
print(pref_path(rc))

[10, 9, 1, 1, 10, 15, 15]
[9, 10, 14, 1, 2, 2, 12, 2, 15]


In [39]:
play = [0,7,3,11,5,15,13]
ext = pref_path(r1)
# [1, 8, 11, 15, 11, 11, 4, 6, 2, 4, 3, 15] # 4k usual
   
#[9, 10, 7, 1, 2, 1, 7, 7, 14, 13, 15] #4k canonical
setup_board(play+ext[:4]);


z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- X - O |
2 |- X O O |
1 |- X - O |
0 |X O - X |
z0+--------+
   0 1 2 3 
--


In [6]:
play, expected = [0,7,3,11,5,15,13], 1
b,p = setup_board(play,False)
print('Player:',p)
print('Usual board:')
display_board(b)
print('Canonical board:')
display_board(game.canonical_board(b,p))

Player: -1
Usual board:
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- X - O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
Canonical board:
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- O - X |
2 |- - - X |
1 |- O - X |
0 |O - - O |
z0+--------+
   0 1 2 3 
--


In [10]:
config.num_mcts_sims = 40000
play = [0,7,3,11,5,15,13]

r1 = mcts_only_pred(play) # differs from test_mcts, direct canonical vs this
rc = mcts_c_only_pred(play)

MCTS only prediction took 97.231 sec
Probs: [0.05,0.24,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.09,0.05,0.05,0.05,0.05,0.05,0.05]
MCTS canonical only prediction took 95.508 sec
Probs: [0.05,0.24,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.09,0.05,0.05,0.05,0.05,0.05,0.05]


# ###### Stuff

In [9]:
config.num_mcts_sims = 1000
play = [0,1,0,1,0]
nn_pred(play)
mcts_pred(play)
mcts_only_pred(play)

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O X - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O X - - |
z0+--------+
   0 1 2 3 
--
NN prediction took 0.699 sec
Probs: [0.9 ,0.  ,0.  ,0.01,0.  ,0.01,0.01,0.  ,0.01,0.  ,0.01,0.01,0.  ,0.  ,0.02,0.01] Value: -0.64
MCTS prediction took 22.706 sec
Probs: [0.95,0.02,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ,0.  ]
MCTS only prediction took 2.476 sec
Probs: [0.68,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02]


<MCTS.Node at 0x126a62f28>

### Required defense against 1 step win

In [10]:
config.num_mcts_sims = 100
play = [2,1,2,1,2]
nn_pred(play)
mcts_pred(play)
mcts_only_pred(play)

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - O - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z0+--------+
   0 1 2 3 
--
NN prediction took 0.040 sec
Probs: [0.01,0.  ,0.91,0.01,0.01,0.01,0.  ,0.  ,0.  ,0.01,0.  ,0.01,0.01,0.01,0.  ,0.01] Value: 0.81
MCTS prediction took 2.208 sec
Probs: [0.01,0.  ,0.85,0.02,0.02,0.01,0.01,0.  ,0.  ,0.02,0.  ,0.01,0.02,0.02,0.  ,0.01]
MCTS only prediction took 0.215 sec
Probs: [0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.07,0.07,0.07]


<MCTS.Node at 0x12a7abfd0>

In [24]:
nn.load_checkpoint("temp/","best.pth.tar")

In [18]:
config.num_mcts_sims = 100
play = [2,1,2,1,2]
nn_pred(play)
r1 = mcts_pred(play)
r0 = mcts_only_pred(play)

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - O - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z0+--------+
   0 1 2 3 
--
NN prediction took 0.013 sec
Probs: [0.04,0.05,0.44,0.02,0.04,0.04,0.04,0.02,0.02,0.04,0.05,0.04,0.03,0.05,0.05,0.04] Value: 0.90
MCTS prediction took 1.370 sec
Probs: [0.01,0.15,0.41,0.01,0.05,0.01,0.01,0.01,0.03,0.04,0.11,0.03,0.01,0.05,0.01,0.06]
MCTS only prediction took 0.109 sec
Probs: [0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0.07,0.07,0.07,0.07]


In [19]:
config.numMCTSSims = 100
play = [2,1,2,1,2,2]
nn_pred(play)
mcts_pred(play,r1.children[2])
mcts_only_pred(play,r0.children[2])

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - X - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - O - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z0+--------+
   0 1 2 3 
--
NN prediction took 0.024 sec
Probs: [0.06,0.06,0.  ,0.07,0.06,0.11,0.08,0.06,0.05,0.06,0.06,0.05,0.08,0.04,0.11,0.06] Value: -0.27
MCTS prediction took 0.839 sec
Probs: [0.2 ,0.02,0.  ,0.02,0.02,0.07,0.09,0.06,0.04,0.02,0.09,0.01,0.02,0.01,0.3 ,0.02]
MCTS only prediction took 0.133 sec
Probs: [0.06,0.06,0.  ,0.06,0.06,0.06,0.06,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07]


<MCTS.Node at 0x1d1b72a3278>

In [20]:
config.numMCTSSims = 100
play = [2,1,2,1,2,2]
nn_pred(play)
mcts_pred(play)
mcts_only_pred(play)

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - X - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - O - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- X O - |
z0+--------+
   0 1 2 3 
--
NN prediction took 0.013 sec
Probs: [0.06,0.06,0.  ,0.07,0.06,0.11,0.08,0.06,0.05,0.06,0.06,0.05,0.08,0.04,0.11,0.06] Value: -0.27
MCTS prediction took 1.953 sec
Probs: [0.22,0.02,0.  ,0.02,0.02,0.07,0.09,0.06,0.04,0.02,0.09,0.01,0.02,0.01,0.29,0.02]
MCTS only prediction took 0.111 sec
Probs: [0.06,0.06,0.  ,0.06,0.06,0.06,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07,0.07]


<MCTS.Node at 0x1d1b721e3c8>

In [21]:
[x.visit_count for x in r1.children.values()]

[1, 15, 100, 1, 5, 1, 1, 1, 3, 4, 11, 3, 1, 5, 1, 6]

### States that requires defense against 1 step win

In [20]:
test_mcts([0,8,0,8,0],0);

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |X - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z0+--------+
   0 1 2 3 
--
MCTS made correct play in  11.332 sec


### State requires defense against 2 step win as player 1

In [22]:
test_mcts([12,0,7,3,11,5,15,13],1);

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X - O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X - O |
2 |- O - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
MCTS made incorrect play in  11.554 sec


### State with 2 step win as player 1

In [24]:
test_mcts([0,7,3,11,5,15,13,12],1);

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |X O - X |
2 |- - - X |
1 |- O - X |
0 |O - - O |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |X O - X |
2 |- - - X |
1 |- O - X |
0 |O O - O |
z0+--------+
   0 1 2 3 
--
MCTS made correct play in  5.456 sec


### State with 2 step win as player 2

In [23]:
test_mcts([14,0,7,3,11,5,15,13,12],1);

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X O O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X O O |
2 |- - - O |
1 |- X - O |
0 |X X - X |
z0+--------+
   0 1 2 3 
--
MCTS made correct play in  5.127 sec
