In [1]:
from stepPlay import * 
import numpy as np
import copy

In [2]:

def prn_obj(obj):
    print('\n'.join(['%s:%s' % item for item in obj.__dict__.items()]))

In [3]:
def softmax(x):
    probs = np.exp(x - np.max(x))
    probs /= np.sum(probs)
    return probs


def relu(X):
    out = np.maximum(X, 0)
    return out


def conv_forward(X, W, b, stride=1, padding=1):
    n_filters, d_filter, h_filter, w_filter = W.shape
    # theano conv2d flips the filters (rotate 180 degree) first
    # while doing the calculation
    W = W[:, :, ::-1, ::-1]
    n_x, d_x, h_x, w_x = X.shape
    h_out = (h_x - h_filter + 2 * padding) / stride + 1
    w_out = (w_x - w_filter + 2 * padding) / stride + 1
    h_out, w_out = int(h_out), int(w_out)
    X_col = im2col_indices(X, h_filter, w_filter,
                           padding=padding, stride=stride)
    W_col = W.reshape(n_filters, -1)
    out = (np.dot(W_col, X_col).T + b).T
    out = out.reshape(n_filters, h_out, w_out, n_x)
    out = out.transpose(3, 0, 1, 2)
    return out


def fc_forward(X, W, b):
    out = np.dot(X, W) + b
    return out


def get_im2col_indices(x_shape, field_height,
                       field_width, padding=1, stride=1):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = np.repeat(np.arange(field_height), field_width)
    i0 = np.tile(i0, C)
    i1 = stride * np.repeat(np.arange(out_height), out_width)
    j0 = np.tile(np.arange(field_width), field_height * C)
    j1 = stride * np.tile(np.arange(out_width), out_height)
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)

    k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)

    return (k.astype(int), i.astype(int), j.astype(int))


def im2col_indices(x, field_height, field_width, padding=1, stride=1):
    """ An implementation of im2col based on some fancy indexing """
    # Zero-pad the input
    p = padding
    x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')

    k, i, j = get_im2col_indices(x.shape, field_height,
                                 field_width, padding, stride)

    cols = x_padded[:, k, i, j]
    C = x.shape[1]
    cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
    return cols

In [4]:
n = 5
width, height = 8, 8
model_file =  'best_policy_8_8_5.model'

board = Board(width=width, height=height, n_in_row=n)
game = Game(board)

# ############### human VS AI ###################
# load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

# best_policy = PolicyValueNet(width, height, model_file = model_file)
# mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400)

# load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy
try:
    policy_param = pickle.load(open(model_file, 'rb'))
except:
    policy_param = pickle.load(open(model_file, 'rb'),
                               encoding='bytes')  # To support python3
#得到策略                               
best_policy = PolicyValueNetNumpy(width, height, policy_param)
#得到策略函数
mcts_player1 = MCTSPlayer(best_policy.policy_value_fn,
                         c_puct=5,
                         n_playout=400)  # set larger n_playout for better performance

mcts_player2 = MCTSPlayer(best_policy.policy_value_fn,
                         c_puct=5,
                         n_playout=400)  

# uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout)
# mcts_player = MCTS_Pure(c_puct=5, n_playout=1000)

# human player, input your move in the format: 2,3
human = Human()

player1 =  mcts_player1
player2 =  mcts_player2
start_player=0 
is_shown=1

game.board.init_board(start_player)
p1, p2 = game.board.players
player1.set_player_ind(p1)
player2.set_player_ind(p2)
players = {p1: player1, p2: player2}

In [5]:
##计算概率
current_player = game.board.get_current_player()
player_in_turn = players[current_player]
board=game.board
temp=1e-3
return_prob=0

sensible_moves = board.availables
move_probs = np.zeros(board.width*board.height)
acts, probs = player_in_turn.mcts.get_move_probs(board, temp)
print(acts)
print(probs)

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)
[0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 5.91871068e-107
 1.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 1.53059365e-139
 2.92327048e-039 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.000

In [6]:
#计算概率
current_player = game.board.get_current_player()
player_in_turn = players[current_player]
board=game.board
temp=1e-3
return_prob=0

sensible_moves = board.availables
print("---------sensible_moves----------")
print(sensible_moves)
move_probs = np.zeros(board.width*board.height)

state=board
temp=1e-3


for n in range(player_in_turn.mcts._n_playout):
    state_copy = copy.deepcopy(state)
    player_in_turn.mcts._playout(state_copy)

# calc the move probabilities based on visit counts at the root node
act_visits = [(act, node._n_visits)
              for act, node in player_in_turn.mcts._root._children.items()]
acts, visits = zip(*act_visits)
act_probs = softmax(1.0/temp * np.log(np.array(visits) + 1e-10))
print("---------acts----------")
print(acts)
print(visits)
print(act_probs)


move_visits = np.zeros(board.width*board.height)
move_visits[list(acts)] = visits

print(move_visits)

---------sensible_moves----------
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]
---------acts----------
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)
(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 4, 2, 12, 0, 0, 0, 0, 2, 157, 214, 4, 0, 0, 0, 0, 3, 164, 206, 4, 0, 0, 0, 0, 7, 3, 3, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
[0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000
 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.

In [7]:
print( player_in_turn.mcts._root._children.items())

dict_items([(0, <mcts_alphaZero.TreeNode object at 0x0000020875DA32E8>), (1, <mcts_alphaZero.TreeNode object at 0x0000020875DA3320>), (2, <mcts_alphaZero.TreeNode object at 0x0000020875DA3358>), (3, <mcts_alphaZero.TreeNode object at 0x0000020875DA3390>), (4, <mcts_alphaZero.TreeNode object at 0x0000020875DA33C8>), (5, <mcts_alphaZero.TreeNode object at 0x0000020875DA3400>), (6, <mcts_alphaZero.TreeNode object at 0x0000020875DA3438>), (7, <mcts_alphaZero.TreeNode object at 0x0000020875DA3470>), (8, <mcts_alphaZero.TreeNode object at 0x0000020875DA34A8>), (9, <mcts_alphaZero.TreeNode object at 0x0000020875DA34E0>), (10, <mcts_alphaZero.TreeNode object at 0x0000020875DA3518>), (11, <mcts_alphaZero.TreeNode object at 0x0000020875DA3550>), (12, <mcts_alphaZero.TreeNode object at 0x0000020875DA3588>), (13, <mcts_alphaZero.TreeNode object at 0x0000020875DA35C0>), (14, <mcts_alphaZero.TreeNode object at 0x0000020875DA35F8>), (15, <mcts_alphaZero.TreeNode object at 0x0000020875DA3630>), (16, <

In [8]:
def printNode(node,level=0,act=0):    
    if(node._n_visits<2):
        return
    str = '	'*level
    print("%s, act=%d,_n_visits=%d ,_Q=%f ,_u=%f ,_P=%f " %(str,act,node._n_visits,node._Q,node._u,node._P))
          
    for act,child in node._children.items():
          printNode(child,level+1,act)

prn_obj(game.board)          
printNode(player_in_turn.mcts._root)          

players:[1, 2]
n_in_row:5
availables:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]
current_player:1
height:8
width:8
last_move:-1
states:{}
, act=0,_n_visits=800 ,_Q=-0.455971 ,_u=0.000000 ,_P=1.000000 
	, act=18,_n_visits=8 ,_Q=-0.185447 ,_u=0.781758 ,_P=0.049782 
		, act=27,_n_visits=3 ,_Q=-0.035337 ,_u=1.092187 ,_P=0.330246 
		, act=36,_n_visits=2 ,_Q=0.365520 ,_u=0.683974 ,_P=0.155111 
	, act=19,_n_visits=4 ,_Q=0.219712 ,_u=0.339472 ,_P=0.012010 
		, act=28,_n_visits=3 ,_Q=-0.214190 ,_u=1.373586 ,_P=0.475824 
			, act=27,_n_visits=2 ,_Q=0.232468 ,_u=1.274235 ,_P=0.360408 
	, act=20,_n_visits=2 ,_Q=0.080553 ,_u=0.501082 ,_P=0.010636 
	, act=21,_n_visits=12 ,_Q=0.058928 ,_u=0.458884 ,_P=0.042209 
		, act=27,_n_visits=2 ,_Q=-0.063028 ,_u=0.487490 ,_P=0.088190 
		, act=28,_n_visits=5 ,_Q=

In [9]:
#执行一个步骤
current_player = game.board.get_current_player()
player_in_turn = players[current_player]
#get_action 得到当前动作
move = player_in_turn.get_action(game.board)
print("current_player=%d , move=%d " %(current_player,move))
game.board.do_move(move)
prn_obj(game.board)

current_player=1 , move=35 
players:[1, 2]
n_in_row:5
availables:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]
current_player:2
height:8
width:8
last_move:35
states:{35: 1}


In [10]:
print(game.board.current_state())

[[[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]]


In [11]:
game.graphic(game.board, player1.player, player2.player)

Player 1 with X
Player 2 with O

       0       1       2       3       4       5       6       7

   7   _       _       _       _       _       _       _       _    


   6   _       _       _       _       _       _       _       _    


   5   _       _       _       _       _       _       _       _    


   4   _       _       _       X       _       _       _       _    


   3   _       _       _       _       _       _       _       _    


   2   _       _       _       _       _       _       _       _    


   1   _       _       _       _       _       _       _       _    


   0   _       _       _       _       _       _       _       _    




In [12]:
legal_positions = board.availables
current_state = board.current_state()

X = current_state.reshape(-1, 4, best_policy.board_width, best_policy.board_height)
print(X)

[[[[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]

  [[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 1. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]

  [[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 1. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]

  [[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]]]


In [13]:
i=0
stride=1
padding=1
    
W = best_policy.params[i]
b =  best_policy.params[i+1]

n_filters, d_filter, h_filter, w_filter = W.shape
print("%d,%d,%d,%d" %(n_filters, d_filter, h_filter, w_filter ))

n_x, d_x, h_x, w_x = X.shape
print("%d,%d,%d,%d" %(n_x, d_x, h_x, w_x ))

h_out = (h_x - h_filter + 2 * padding) / stride + 1
w_out = (w_x - w_filter + 2 * padding) / stride + 1

print("%d,%d"%( h_out, w_out))

X_col = im2col_indices(X, h_filter, w_filter,
                           padding=padding, stride=stride)
print(len(X_col))    
print(X_col[0])    
#print(W)
#print(b)


32,4,3,3
1,4,8,8
8,8
36
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [14]:
print(best_policy.params[i])

[[[[ 3.00282031e-01  7.93916807e-02  1.04310252e-01]
   [-2.32803449e-02  4.39492881e-01 -1.46985695e-01]
   [ 7.46263936e-02  1.61980420e-01  5.00340879e-01]]

  [[-1.50764182e-01  9.13940221e-02 -6.68839067e-02]
   [-1.11774810e-01  2.15447694e-01  3.71383615e-02]
   [ 2.60537509e-02  1.37014072e-02  5.10720573e-02]]

  [[-6.64655939e-02 -8.79024416e-02  9.44483057e-02]
   [-1.87298078e-02 -2.23822589e-03  3.32160555e-02]
   [-3.61888446e-02 -4.40173298e-02 -5.63976282e-05]]

  [[ 5.94474003e-02 -9.88958105e-02 -2.14816071e-02]
   [-2.01959927e-02  2.27627177e-02  1.19268738e-01]
   [ 2.24035811e-02 -2.85499338e-02 -8.97876024e-02]]]


 [[[ 1.74412161e-01 -2.87367523e-01 -3.80795151e-02]
   [-1.11669218e-02 -1.62341457e-03  2.17930842e-02]
   [ 7.55131468e-02  1.05582893e-01 -1.08373843e-01]]

  [[ 1.31837398e-01  9.13798437e-02  6.94752336e-01]
   [-9.68147442e-02  2.46946234e-02  8.13015401e-02]
   [ 1.99509144e-01  8.53026062e-02  8.33786428e-02]]

  [[-2.55347416e-02 -1.09474093e

In [15]:
#im2col_indices
x=X
padding=1
stride=1

p = padding
x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
print(X)
print(x_padded )
field_height =  h_filter
field_width  =w_filter
print("field_height= %d,field_width=%d" %(field_height,field_width))

k, i, j = get_im2col_indices(x.shape, field_height,
                                 field_width, padding, stride)
print(k)
print(len(i))
print(i[0])
print(i[len(i)-1])
print(len(j))
print(j[0])
print(j[len(i)-1])
#print("%d,%d,%d" %(k, i, j) )    



[[[[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]

  [[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 1. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]

  [[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 1. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]

  [[0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0.]]]]
[[[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
   [0. 0. 0. 0. 0. 0. 0. 0. 

In [16]:
cols = x_padded[:, k, i, j]
print(cols)

[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [17]:
print(cols.shape)
C = x.shape[1]
print(C)    
cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
print(cols)

(1, 36, 64)
4
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [21]:
print(best_policy.params[0][0][0].shape)

(3, 3)


In [24]:
print(best_policy.params[0] )

[[[[ 3.00282031e-01  7.93916807e-02  1.04310252e-01]
   [-2.32803449e-02  4.39492881e-01 -1.46985695e-01]
   [ 7.46263936e-02  1.61980420e-01  5.00340879e-01]]

  [[-1.50764182e-01  9.13940221e-02 -6.68839067e-02]
   [-1.11774810e-01  2.15447694e-01  3.71383615e-02]
   [ 2.60537509e-02  1.37014072e-02  5.10720573e-02]]

  [[-6.64655939e-02 -8.79024416e-02  9.44483057e-02]
   [-1.87298078e-02 -2.23822589e-03  3.32160555e-02]
   [-3.61888446e-02 -4.40173298e-02 -5.63976282e-05]]

  [[ 5.94474003e-02 -9.88958105e-02 -2.14816071e-02]
   [-2.01959927e-02  2.27627177e-02  1.19268738e-01]
   [ 2.24035811e-02 -2.85499338e-02 -8.97876024e-02]]]


 [[[ 1.74412161e-01 -2.87367523e-01 -3.80795151e-02]
   [-1.11669218e-02 -1.62341457e-03  2.17930842e-02]
   [ 7.55131468e-02  1.05582893e-01 -1.08373843e-01]]

  [[ 1.31837398e-01  9.13798437e-02  6.94752336e-01]
   [-9.68147442e-02  2.46946234e-02  8.13015401e-02]
   [ 1.99509144e-01  8.53026062e-02  8.33786428e-02]]

  [[-2.55347416e-02 -1.09474093e

In [25]:
print(game.board)

<game.Board object at 0x0000020875DA6E10>


In [26]:
prn_obj(game.board)

players:[1, 2]
n_in_row:5
availables:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]
current_player:2
height:8
width:8
last_move:35
states:{35: 1}


In [29]:
print(game.board.current_state())

[[[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0.]]]
