In [3]:
from MCTS import MCTS
from sogo.SogoGame import SogoGame, display as display_board
import numpy as np
from NeuralNet import NeuralNet
from Game import Game
from Timer import Timer

g = SogoGame(4)

class Config(object):
    def __init__(self):    
      self.max_moves = 512  # for chess and shogi, 722 for Go.
      self.num_mcts_sims = 2000

      # Root prior exploration noise.
      self.root_dirichlet_alpha = 0.3  # for chess, 0.03 for Go and 0.15 for shogi.
      self.root_exploration_fraction = 0.0

      # UCB formula
      self.pb_c_base = 19652
      self.pb_c_init = 1.25


class NN(NeuralNet):
  def __init__(self,game:Game):
    self.game = game
  def predict(self, board):
    return np.ones(self.game.action_size())/self.game.action_size(), 0

config = Config()

nn = NN(g)
mcts1 = MCTS(g, nn, config)
def mcts_player(x, player):
    cx = g.canonical_board(x,player)
    pi, root = mcts1.get_action_prob(cx)
    return np.argmax(pi), root

In [4]:
def setup_board(plays): 
    board = g.init_board()
    player = 1    
    for play in plays:
        board, player = g.next_state(board, player,play)
    display_board(board)
    return board, player

In [5]:
def test_mcts(plays, expected):
    board, player = setup_board(plays)    
    with Timer() as t:
        play, root = mcts_player(board, player)    
    new_board, new_player = g.next_state(board, player, play)
    display_board(new_board)
    print(f"MCTS made {'correct' if play == expected else 'incorrect' } play in  {t.interval:0.3f} sec")
    return root

### State with easy win

In [6]:
b,p = setup_board([0,8,0,1,0,3,0])

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |X - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |X - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |X - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X O - O |
z0+--------+
   0 1 2 3 
--


In [7]:
g.terminal_value(b,p)

-1

In [8]:
r = test_mcts([0,8,0,8,0,8],0)


z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X - - - |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |X - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |O - - - |
1 |- - - - |
0 |X - - - |
z0+--------+
   0 1 2 3 
--
MCTS made correct play in  4.158 sec


In [5]:
r.print(2)

 -> v:-0.0035 n:2000 p:1.0 tp:-1
  0 -> v:-1.0 n:7 p:0.062 tp:1
  1 -> v:0.0075 n:133 p:0.062 tp:1
  2 -> v:0.0075 n:133 p:0.062 tp:1
  3 -> v:0.0075 n:134 p:0.062 tp:1
  4 -> v:0.0075 n:134 p:0.062 tp:1
  5 -> v:0.0075 n:134 p:0.062 tp:1
  6 -> v:0.0075 n:134 p:0.062 tp:1
  7 -> v:0.0075 n:134 p:0.062 tp:1
  8 -> v:0.0 n:118 p:0.062 tp:1
  9 -> v:0.0075 n:134 p:0.062 tp:1
  10 -> v:0.0075 n:134 p:0.062 tp:1
  11 -> v:0.0075 n:134 p:0.062 tp:1
  12 -> v:0.0075 n:134 p:0.062 tp:1
  13 -> v:0.0075 n:134 p:0.062 tp:1
  14 -> v:0.0075 n:134 p:0.062 tp:1
  15 -> v:0.0075 n:134 p:0.062 tp:1


### States that requires defense against 1 step win

In [22]:
test_mcts([0,8,0,8,0],0);


z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |X - - - |
1 |- - - - |
0 |O - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- - - - |
2 |X - - - |
1 |X - - - |
0 |O - - - |
z0+--------+
   0 1 2 3 
--
MCTS made incorrect play in  0.933 sec


In [7]:
b,p = setup_board([0,7,3,11,5,15,13,1,0,9]); # from 40k runs or so
g.terminal_value(b,p)


z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- O - X |
2 |- X - X |
1 |- O - X |
0 |O X - O |
z0+--------+
   0 1 2 3 
--


0

### State requires defense against 2 step win as player 2

In [8]:
config.num_mcts_sims = 1000
test_mcts([0,7,3,11,5,15,13],1);


z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- O - X |
2 |- - - X |
1 |- O - X |
0 |O - - O |
z0+--------+
   0 1 2 3 
--
Canonical Board:
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |- X - O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+

### State requires defense against 2 step win as player 1

In [23]:
config.num_mcts_sims = 40000
root = test_mcts([12,0,7,3,11,5,15,13],1);


z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X - O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - O |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X - O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
MCTS made incorrect play in  42.099 sec


### State with 2 step win as player 1

In [16]:
config.num_mcts_sims = 4000 # works with 20k
root = test_mcts([0,7,3,11,5,15,13,12],1);

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |X O - X |
2 |- - - X |
1 |- O - X |
0 |O - - O |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |X O - X |
2 |- - - X |
1 |- O - X |
0 |O O - O |
z0+--------+
   0 1 2 3 
--
MCTS made correct play in  4.610 sec


### State with 2 step win as player 2

In [26]:
config.num_mcts_sims = 100000 # doesn't work as player 2, explores 1,0,2 which should be a win but values 0
root = test_mcts([14,0,7,3,11,5,15,13,12],1);

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X O O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - X - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X O O |
2 |- - - O |
1 |- X - O |
0 |X - - X |
z0+--------+
   0 1 2 3 
--
MCTS made incorrect play in  115.808 sec


In [14]:
root.print(2)
root.children[1].print(2)
root.children[1].children[0].print(2)
root.children[1].children[0].children[2].print(2)

 -> v:-0.51 n:4000 p:0.0 tp:-1
  0 -> v:0.0 n:113 p:0.062 tp:1
  1 -> v:0.89 n:2291 p:0.062 tp:1
  2 -> v:0.0 n:114 p:0.062 tp:1
  3 -> v:0.0 n:114 p:0.062 tp:1
  4 -> v:0.0 n:114 p:0.062 tp:1
  5 -> v:0.0 n:114 p:0.062 tp:1
  6 -> v:0.0 n:114 p:0.062 tp:1
  7 -> v:0.0 n:114 p:0.062 tp:1
  8 -> v:0.0 n:114 p:0.062 tp:1
  9 -> v:0.0 n:114 p:0.062 tp:1
  10 -> v:0.0 n:114 p:0.062 tp:1
  11 -> v:0.0 n:114 p:0.062 tp:1
  12 -> v:0.0 n:114 p:0.062 tp:1
  13 -> v:0.0 n:114 p:0.062 tp:1
  14 -> v:0.0 n:114 p:0.062 tp:1
  15 -> v:0.0 n:114 p:0.062 tp:1
 -> v:0.89 n:2291 p:0.062 tp:1
  0 -> v:-0.89 n:142 p:0.062 tp:-1
  1 -> v:-0.89 n:142 p:0.062 tp:-1
  2 -> v:-0.89 n:149 p:0.062 tp:-1
  3 -> v:-0.89 n:142 p:0.062 tp:-1
  4 -> v:-0.89 n:142 p:0.062 tp:-1
  5 -> v:-0.89 n:142 p:0.062 tp:-1
  6 -> v:-0.89 n:142 p:0.062 tp:-1
  7 -> v:-0.89 n:142 p:0.062 tp:-1
  8 -> v:-0.89 n:142 p:0.062 tp:-1
  9 -> v:-0.89 n:149 p:0.062 tp:-1
  10 -> v:-0.89 n:142 p:0.062 tp:-1
  11 -> v:-0.89 n:142 p:0.062 tp

In [13]:
b,p = setup_board([14,0,7,3,11,5,15,13,12,1,0,2])
g.terminal_value(b,p)

z3+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z3+--------+
   0 1 2 3 
z2+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |- - - - |
z2+--------+
   0 1 2 3 
z1+--------+
3 |- - - - |
2 |- - - - |
1 |- - - - |
0 |O - - - |
z1+--------+
   0 1 2 3 
z0+--------+
3 |O X O O |
2 |- - - O |
1 |- X - O |
0 |X X X X |
z0+--------+
   0 1 2 3 
--


1