In [1]:
import sys
sys.path.append("/Users/denniswalter/git/github.com/dnswlt/hexz/pyhexz/src")
from pyhexz.board import Board
from pyhexz.hexz import NeuralMCTS, HexzNeuralNetwork
from pyhexz.timing import print_perf_stats
from pyhexz import hexc

In [2]:
model = HexzNeuralNetwork()
m = NeuralMCTS(model)

In [3]:
_ = m.play_game()

TypeError: NeuralMCTS.run() missing 2 required positional arguments: 'board' and 'player'

In [15]:
import random
import numpy as np
x = list(range(100))
n = np.arange(100)
rng = np.random.default_rng()

In [16]:
%%timeit
random.shuffle(x)

11.9 µs ± 34 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [18]:
%%timeit
n = np.array(x)
rng.shuffle(n)

3.91 µs ± 8.96 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [4]:
print_perf_stats()

method                   total_sec      count      ops/s
CBoard.__init__             0.021s      35001  1642468.3
NeuralMCTS.predict         10.324s      10044      972.8
NeuralMCTS.__init__         0.004s          1      235.2
run_find_leaf               4.236s      35000     8263.0
PurePyBoard.next_moves      0.117s      61308   522833.7
NeuralMCTS.run             14.981s      35000     2336.3
CBoard.make_move            0.000s         69   242105.3


In [5]:
m.root

CNN(self._visit_count=22456, self._move=(1, 5, 6, 5.0), self._player=1, nchildren=0)

In [5]:
model = HexzNeuralNetwork()
m = NeuralMCTS(Board(), model)
for i in range(1000):
    m.run()

In [7]:
%%prun
for i in range(1000):
    hexc.c_find_leaf(m.board, m.root)

 

# NeuralMCTS the Cython way

In [29]:
import cython
import numpy as np

In [16]:
%load_ext cython

In [27]:
%%cython -a

import cython
import numpy as np

@cython.cclass
class CBoard:
    b: np.ndarray
    b_view: cython.float[:,:,:]
    flags_0: cython.int
    flags_1: cython.int
    
    @cython.boundscheck(False)
    def __init__(self, other: 'CBoard' = None):
        if other is not None:
            self.b = other.b.copy()
            self.flags_0 = other.flags_0
            self.flags_1 = other.flags_1
            return
        self.b = np.zeros((9, 11, 10), dtype=np.float32)
        b_view: cython.float[:, :, :] = self.b
        self.b_view = b_view
        self.nflags = [3, 3]  # number of flags remaining per player
        # Even rows have 10 cells, odd rows only 9, so mark the last cell in odd rows as blocked for P1+P2.
        i: cython.size_t = 1
        for i in range(1, 10, 2):
            b_view[2, i, 9] = 1
            b_view[6, i, 9] = 1
        # 2-tuple of valid indices in each slice.
        free_cells = (1 - self.b[2]).nonzero()
        # 15 randomly placed stones.
        rng = np.random.default_rng()
        stones = rng.choice(np.arange(0, len(free_cells[0])), replace=False, size=15)
        self.b[2, free_cells[0][stones], free_cells[1][stones]] = 1
        self.b[6, free_cells[0][stones], free_cells[1][stones]] = 1
        free_cells = (1 - self.b[2]).nonzero()
        # 5 grass cells
        grass = rng.choice(np.arange(0, len(free_cells[0])), replace=False, size=5)
        self.b[8, free_cells[0][grass], free_cells[1][grass]] = [1, 2, 3, 4, 5]
        self.b[2, free_cells[0][grass], free_cells[1][grass]] = 1
        self.b[6, free_cells[0][grass], free_cells[1][grass]] = 1
    
    def b_for(self, player: cython.int):
        """Returns the underlying ndarray representing the board, oriented for the given player.
        Only returns a copy if the board is not already oriented for the given player.
        """
        if player == 0:
            return self.b
        b = self.b.copy()
        b[0:4], b[4:8] = self.b[4:8], self.b[0:4]
        return b




Content of stderr:
  if (((__pyx_v_c_mode[0]) == 'f')) {
       ~~~~~~~~~~~~~~~~~~~~^~~~~~
  if (((__pyx_v_c_mode[0]) == 'f')) {
      ~                    ^     ~
/Users/denniswalter/.cache/ipython/cython/_cython_magic_638829fae6a419251d925b3523c30c0c633c20d7.c:5713:28: note: use '=' to turn this equality comparison into an assignment
  if (((__pyx_v_c_mode[0]) == 'f')) {
                           ^~
                           =

In [102]:
from __future__ import annotations
from cython.cimports.libc.math import sqrt, log


@cython.cclass
class CNN:
    parent: Optional[CNN]
    player: cython.int
    move: tuple[cython.size_t, cython.size_t, cython.size_t, cython.float]
    wins: cython.float
    visit_count: cython.int
    move_probs: np.ndarray
    children: list[CNN]
    
    def __init__(self, parent: Optional[CNN], player: int, move: tuple[int, int, int, float]):
        self.parent = parent
        self.player = player
        self.move = move
        self.wins = 0.0
        self.visit_count = 0
        self.children = []
        self.move_probs = np.empty((2, 11, 10))
        
    
    @cython.cfunc
    def puct(self):
        typ: cython.size_t = self.move[0]
        r: cython.size_t = self.move[1]
        c: cython.size_t = self.move[2]
        pr: cython.float[:,:,:]  = self.move_probs
        if self.visit_count == 0:
            q = 0.0
        else:
            q = self.wins / self.visit_count
        return q + pr[typ, r, c] * sqrt(self.parent.visit_count) / (1 + self.visit_count)
   
    def move_likelihoods(self, dtype=np.float32):
        """Returns the move likelihoods for all children as a (2, 11, 10) ndarray.

        The ndarray indicates the likelihoods (based on visit count) for flags
        and normal moves. It sums to 1.
        """
        p = np.zeros((2, 11, 10), dtype=dtype)
        for child in self.children:
            typ, r, c, _ = child.move
            p[typ, r, c] = child.visit_count
        return p / p.sum()

    def best_child(self) -> CNN:
        """Returns the best among all children.

        The best child is the one with the greatest visit count, a common
        choice in the MCTS literature.
        """
        return max(self.children, default=None, key=lambda n: n.visit_count)

    
@cython.cfunc
def cnn_backpropagate(node: CNN, result: cython.float):
    while node:
        node.visit_count += 1
        if node.player == 0:
            node.wins += (result + 1) / 2
        else:
            node.wins += (-result + 1) / 2
        node = node.parent



def c_valid_idx(r_c):
    """Returns True if (r, c) = r_c represents a valid hexz board index."""
    r, c = r_c
    return r >= 0 and r < 11 and c >= 0 and c < 10-r%2


def c_neighbors_map():
    """Returns a dict mapping all valid (r, c) indices to their neighbor indices.
    
    The neighbor indices are represented as (row, column) tuples."""
    result = {}
    for r in range(11):
        shift = r%2 # Depending on the row, neighbors below and above are shifted.
        for c in range(10-r%2):
            ns = filter(c_valid_idx, [
                (r, c+1),
                (r-1, c+shift),
                (r-1, c-1+shift),
                (r, c-1),
                (r+1, c-1+shift),
                (r+1, c+shift),
            ])
            nr, nc = zip(*ns)  # unzip
            result[(r, c)] = (np.array(nr), np.array(nc))
            
    return result


_C_NEIGHBORS = c_neighbors_map()


@cython.cfunc
def c_occupy_grass(board: CBoard, player: cython.int, r: cython.size_t, c: cython.size_t):
    """Occupies the neighboring grass cells of move_idx (a 3-tuple index into a move) for player.

    Expects that the move has already been played.
    """
    b: cython.float[:, :, :] = board.b
    i: cython.size_t = 0
    x: cython.size_t = 0
    y: cython.size_t = 0
    
    nx, ny = _C_NEIGHBORS[(r, c)]
    for i in range(len(nx)):
        x = nx[i]
        y = ny[i]
        grass_val = b[8, x, y]
        if grass_val > 0 and grass_val <= b[1 + player*4, r, c]:
            # Occupy: first remove grass
            b[8, x, y] = 0                
            # the rest is exactly like playing a move.
            c_make_move(board, player, (1, r, c, grass_val))


@cython.cfunc
def c_make_move(board: CBoard, player: int, move: tuple[int, int, int, float]):
    """Makes the given move.

    Args:
      board: the board as an (N, 11, 10) ndarray.
      player: 0 or 1
      move: a 4-tuple of (typ, r, c, val), where typ = 0 (flag) or 1 (normal)
    Does not check that it is a valid move. Should be called only
    with moves returned from `next_moves`.
    """
    b: cython.float[:, :, :] = board.b
    typ: cython.size_t = move[0]
    r: cython.size_t = move[1]
    c: cython.size_t = move[2]
    val: cython.float = move[3]
    next_val: cython.float = 0
    i: cython.size_t = 0
    nr: cython.size_t = 0
    nc: cython.size_t = 0
    nx: cython.long[:]
    ny: cython.long[:]
    
    b[typ + player*4, r, c] = val
    played_flag = typ == 0
    # Block played cell for both players.
    b[2, r, c] = 1
    b[6, r, c] = 1
    # Set next value to 0 for occupied cell.
    b[3, r, c] = 0
    b[7, r, c] = 0
    # Block neighboring cells if a 5 was played.
    nx, ny = _C_NEIGHBORS[(r, c)]
    # Update next value of neighboring cells. If we played a flag, the next value is 1.
    if played_flag:
        next_val = 1
        board.nflags[player] -= 1
    else:
        next_val = val + 1
    if next_val <= 5:
        for nr, nc in zip(nx, ny):
            if b[2 + player*4, nr, nc] == 0:
                if next_val > 5:
                    b[3 + player*4, nr, nc] = 0
                if b[3 + player*4, nr, nc] == 0:
                    b[3 + player*4, nr, nc] = next_val
                elif b[3 + player*4, nr, nc] > next_val:
                    b[3 + player*4, nr, nc] = next_val
    else:
        for i in range(len(nx)):
            # Played a 5: block neighboring cells and clear next value.
            b[2 + player*4, nx[i], ny[i]] = 1
            b[3 + player*4, nx[i], ny[i]] = 0  # Clear next value.
    if not played_flag:
        c_occupy_grass(board, player, r, c)


@cython.cfunc
def c_find_leaf(board: CBoard, n: CNN):
    while n.children:
        best: CNN = n
        best_uct: cython.float = -1
        for c in n.children:
            u = n.puct()
            if u > best_uct:
                best = c
                best_uct = u
        c_make_move(board, best.player, best.move)
        n = best
    return n


@cython.cfunc
def c_foo(x: cython.int) -> cython.int:
    return x * x

In [101]:
c_foo(3)

9

In [95]:
c = CNN(None, 0, (1, 1, 1, 1.0))

In [99]:
c_find_leaf(None, 0)

AttributeError: 'int' object has no attribute 'children'

In [83]:
np.empty((1,2))

array([[5.e-324, 1.e-323]])