In [3]:
import numpy as np
import pandas as pd
import torch
from matplotlib import pyplot as plt

print(f"cuda available: {torch.cuda.is_available()}")
print(f"torch version: {torch.__version__}")
print(f"numpy version: {np.__version__}")

cuda available: False
torch version: 2.1.0
numpy version: 1.26.0


# Hexz

A board is represented by an (N, 10, 11) numpy array. Each 10x11 slice is a one-hot encoding of the presence of specific type of piece/obstacle/etc. The slices are:

* 0: flags by P1
* 1: cell value 1-5 for P1
* 2: cells blocked for P1 (any occupied cell or a cell next to a 5)
* 3: next value for P1 
* 4: flags by P2
* 5: cell value 1-5 for P2
* 6: cells blocked for P2
* 7: next value for P2
* 8: grass cells with value 1-5

An action is specified by a (2, 10, 11) numpy array. The first 10x11 slice represents a flag move,
the second one represents a regular cell move. A flag move must have a single 1 set, a normal move
must have a single value 1-5 set.

In [123]:
def valid_idx(r_c):
    """Returns True if (r, c) = r_c represents a valid hexz board index."""
    r, c = r_c
    return r >= 0 and r < 11 and c >= 0 and c < 10-r%2


def neighbors_map():
    """Returns a dict mapping all valid (r, c) indices to their neighbor indices.
    
    The neighbor indices are represented as a 2-tuple of row and column indices."""
    result = {}
    for r in range(11):
        shift = r%2 # Depending on the row, neighbors below and above are shifted.
        for c in range(10-r%2):
            ns = filter(valid_idx, [
                (r, c+1),
                (r-1, c+shift),
                (r-1, c-1+shift),
                (r, c-1),
                (r+1, c-1+shift),
                (r+1, c+shift),
            ])
            nr, nc = zip(*ns)  # unzip
            result[(r, c)] = (np.array(nr), np.array(nc))
            
    return result

In [156]:
class Board:
    """Numpy representation of a hexz board."""

    # Used to quickly get the indices of neighbor cells.
    neighbors = neighbors_map()
    
    def __init__(self, other=None):
        if other:
            self.b = other.b.copy()
            self.nflags = list(other.nflags)
            return
        self.b = np.zeros((9, 11, 10))
        self.nflags = [3, 3]  # number of flags remaining per player
        # Even rows have 10 cells, odd rows only 9, so mark the last cell in odd rows as blocked for P1+P2.
        self.b[2, [1, 3, 5, 7, 9], 9] = 1
        self.b[6, [1, 3, 5, 7, 9], 9] = 1        
        # 2-tuple of valid indices in each slice.
        free_cells = (1 - self.b[2]).nonzero()
        # 15 randomly placed stones.
        stones = np.random.choice(np.arange(0, len(free_cells[0])), replace=False, size=15)
        self.b[2, free_cells[0][stones], free_cells[1][stones]] = 1
        self.b[6, free_cells[0][stones], free_cells[1][stones]] = 1
        free_cells = (1 - self.b[2]).nonzero()
        # 5 grass cells
        grass = np.random.choice(np.arange(0, len(free_cells[0])), replace=False, size=5)
        self.b[8, free_cells[0][grass], free_cells[1][grass]] = [1, 2, 3, 4, 5]
        self.b[2, free_cells[0][grass], free_cells[1][grass]] = 1
        self.b[6, free_cells[0][grass], free_cells[1][grass]] = 1
    
    # Helpers to retrieve slices of the board "by name".
    def flags(self, player):
        return self.b[0 + player * 4]
    def values(self, player):
        return self.b[1 + player * 4]
    def blocked(self, player):
        return self.b[2 + player * 4]
    def next_values(self, player):
        return self.b[3 + player * 4]
    def grass(self):
        return self.b[8]
    def quickview(self):
        return (self.b[0] * 8) + self.b[1] - (self.b[4] * 8) - self.b[5]
        
        
    def score(self):
        """Returns the current score as a 2-tuple."""
        return (self.b[1].sum(), self.b[5].sum())
    
        
    def make_move(self, player, move):
        """Makes the given move.
        
        Does not check that it is a valid move. Should be called only
        with moves returned from `next_moves`.
        """
        if player == 0:
            self.b[0:2] += move
        else:
            self.b[4:6] += move
        idx = np.unravel_index(move.argmax(), move.shape)
        played_flag = idx[0] == 0
        # Block played cell for both players.
        self.b[2, idx[1], idx[2]] = 1
        self.b[6, idx[1], idx[2]] = 1
        # Set next value to 0 for occupied cell.
        self.b[3, idx[1], idx[2]] = 0
        self.b[7, idx[1], idx[2]] = 0
        # Block neighboring cells if a 5 was played.
        nx, ny = Board.neighbors[(idx[1], idx[2])]
        # Update next value of neighboring cells. If we played a flag, the next value is 1.
        if played_flag:
            next_val = 1
            self.nflags[player] -= 1
        else:
            next_val = move[idx] + 1
#         self.b[nval_idx, nx, ny] = (
#             (1 - self.b[nval_idx-1, nx, ny]) *  # if the cell is not blocked
#               ((self.b[nval_idx, nx, ny] == 0) * next_val +  # use next_val, if current value is 0
#                 np.minimum(self.b[nval_idx, nx, ny], next_val)) # else use the smaller of the two
#         ) 
        # Less "arithmetic" version: just iterate. %%timeit says it's faster. (2.46 v. 1.77)
        if next_val <= 5:
            for nr, nc in zip(nx, ny):
                if self.b[2 + player*4, nr, nc] == 0:
                    if next_val > 5:
                        self.b[3 + player*4, nr, nc] = 0
                    if self.b[3 + player*4, nr, nc] == 0:
                        self.b[3 + player*4, nr, nc] = next_val
                    elif self.b[3 + player*4, nr, nc] > next_val:
                        self.b[3 + player*4, nr, nc] = next_val
        else:
            # Played a 5: block neighboring cells and clear next value.
            self.b[2 + player*4, nx, ny] = 1
            self.b[3 + player*4, nx, ny] = 0  # Clear next value.

        # Occupy neighboring grass cells.
        if not played_flag:
            self.occupy_grass(player, idx)
            
    
    def occupy_grass(self, player, move_idx):
        """Occupies the neighboring grass cells of move_idx (a 3-tuple index into a move) for player.
        
        Expects that the move has already been played.
        """
        _, r, c = move_idx
        nx, ny = Board.neighbors[(r, c)]
        for i, j in zip(nx, ny):
            grass_val = self.b[8, i, j]
            if grass_val > 0 and grass_val <= self.b[1 + player*4, r, c]:
                # Occupy: first remove grass
                self.b[8, i, j] = 0                
                # the rest is exactly like playing a move.
                grass_move = np.zeros((2, 11, 10))
                grass_move[1, r, c] = grass_val
                self.make_move(player, grass_move)
                
        
    def next_moves(self, player):
        """Returns all possible next moves.
        
        A move is represented as a (2, 11, 10) ndarray. The first slice represents
        flag moves, the second one represents normal moves. A flag move will have exactly
        one element set to 1 in slice 0. A normal move will have exactly one element set to
        1-5 in slice 1.
        """
        moves = []
        # Do we have unoccupied cells and flags left? Then we can place another one.
        if self.nflags[player] > 0:
            # Flag in any unoccupied cell.
            rs, cs = np.nonzero(self.b[2 + player*4] == 0)  # funky way to get indices for all free cells.
            for r, c in zip(rs, cs):
                m = np.zeros((2, 11, 10))
                m[0, r, c] = 1
                moves.append(m)
        # Collect all cells with a non-zero next value.
        rs, cs = np.nonzero(self.b[3 + player*4])
        for r, c in zip(rs, cs):
            m = np.zeros((2, 11, 10))
            m[1, r, c] = self.b[3 + player*4, r, c]
            moves.append(m)
        return moves


In [162]:
# %%timeit
b = Board()
player = 0
moves = b.next_moves(player)
num_moves = 0
while moves:
    b.make_move(player, moves[np.random.randint(len(moves))])
    num_moves += 1
    player = 1 - player
    moves = b.next_moves(player)
    if not moves:
        # No more moves for the player. See if the other player can continue.
        player = 1 - player
        moves = b.next_moves(player)
print(f"Done after {num_moves} moves. Flags left: {b.nflags}. Score: {b.score()}")
b.quickview()

Done after 80 moves. Flags left: [0, 0]. Score: (95.0, 91.0)


array([[-1.,  4.,  3.,  0.,  8.,  1., -2., -2., -3.,  2.],
       [-8., -1.,  2.,  1.,  1.,  0., -1.,  8.,  1.,  0.],
       [-1., -1., -2.,  2., -2., -1., -8.,  1., -3., -4.],
       [-2., -2.,  3.,  0.,  0., -1.,  0., -2., -3.,  0.],
       [-3., -3., -3.,  4.,  5.,  0.,  0., -1., -2.,  0.],
       [ 4.,  3.,  2.,  0.,  0.,  0., -8., -1., -2.,  0.],
       [ 4.,  3.,  2.,  1.,  0.,  2.,  0.,  0., -2.,  0.],
       [ 3.,  4.,  1.,  8.,  1.,  2., -3., -2., -8.,  0.],
       [ 4.,  3.,  0.,  1.,  1., -5., -4.,  0., -2.,  0.],
       [ 4.,  0.,  2.,  2.,  2.,  0., -5.,  0., -3.,  0.],
       [ 5.,  0.,  3.,  3.,  3.,  0.,  0.,  0., -4., -4.]])