In [1]:
import random
import collections
import jieqi_game

def run_direct_determinization_game():
    """
    Initializes a single determinized game using the direct constructor
    and plays a random game within that reality for up to 30 plies.
    """
    # 1. Create the initial game state
    # This history object is the canonical, non-randomized starting point.
    history = jieqi_game.PositionHistory()
    start_board = jieqi_game.ChessBoard.hStartposBoard
    history.Reset(start_board, 0, 0)

    # 2. Construct a DeterminizedGame directly from the history.
    # The py::keep_alive policy ensures 'history' is not garbage-collected
    # while 'det_game' is in use.
    det_game = jieqi_game.DeterminizedGame(history)

    # Manually call determinize() to populate the board with a random sample
    # of the hidden pieces.
    det_game.determinize()
    print("--- Starting a game in a single, directly-created determinization ---")
    print("-" * 60)

    # Helper for printing piece names
    piece_names = {
        'p': "Pawn", 'c': "Cannon", 'r': "Rook",
        'a': "Advisor", 'b': "Bishop", 'n': "Knight"
    }

    # 3. Main game loop within the single determinization
    for ply in range(30):
        # Get state from our determinized game instance.
        det_history = det_game.get_position_history()
        current_pos = det_history.Last()
        board = current_pos.GetBoard()

        # --- Print Information ---
        player = "Black" if det_history.IsBlackToMove() else "White"
        print(f"Ply {ply + 1: >2} ({player: <5})")

        # Get the sampled dark pieces for the current player.
        our_dark_pieces_info = current_pos.our_dark()
        sampled_pieces_str = our_dark_pieces_info.pieces
        piece_counts = collections.Counter(sampled_pieces_str)
        print(f"  Dark Pieces ({our_dark_pieces_info.nleft} left): ", end="")
        
        sorted_pieces = sorted(piece_counts.items())
        print(", ".join([f"{piece_names.get(p, p)}: {c}" for p, c in sorted_pieces]))

        # --- Advance the Game ---
        legal_moves = board.GenerateLegalMoves()
        if not legal_moves:
            print(f"\nGame over at ply {ply}. No legal moves available.")
            break

        chosen_move = random.choice(legal_moves)
        print(f"  Move Played: {chosen_move.as_string()}")
        print("-" * 60)

        # Use the append method on the DeterminizedGame object directly.
        det_game.append(chosen_move)
        print(f"  Board:\n {det_game.get_position_history().Last().DebugString()}")

        result = det_history.ComputeGameResult()
        if result != jieqi_game.GameResult.UNDECIDED:
            print(f"\nGame over. Result: {result}")
            break

    # 4. Print final state
    final_history = det_game.get_position_history()
    print("--- Game Finished ---")
    print(f"Final Ply Count: {final_history.GetLength() - 1}")
    print(f"  Board:\n {det_game.get_position_history().Last().DebugString()}")
    print(f"Final FEN: {jieqi_game.GetFen(final_history.Last())}")


if __name__ == "__main__":
    # Initialize C++ magic bitboards.
    jieqi_game.InitializeMagicBitboards()
    run_direct_determinization_game()

--- Starting a game in a single, directly-created determinization ---
------------------------------------------------------------
Ply  1 (White)
  Dark Pieces (15 left):  : 1, Advisor: 2, Bishop: 2, Cannon: 2, Knight: 2, Pawn: 5, Rook: 2
  Move Played: f0e1
------------------------------------------------------------
  Board:
 hhhhk.hhh
....r....
.h.....h.
h.h.h.h.h
.........
.........
H.H.H.H.H
.H.....H.
.........
HHHHKHHHH (from black's eyes) Hash: 13364866146517348023

Ply  2 (Black)
  Dark Pieces (15 left):  : 1, Advisor: 2, Bishop: 2, Cannon: 2, Knight: 2, Pawn: 5, Rook: 2
  Move Played: b2a2
------------------------------------------------------------
  Board:
 hhhhkhhhh
.........
a......h.
h.h.h.h.h
.........
.........
H.H.H.H.H
.H.....H.
....R....
HHHHK.HHH (from white's eyes) Hash: 10065226390803547169

Ply  3 (White)
  Dark Pieces (14 left):  : 1, Advisor: 2, Bishop: 2, Cannon: 2, Knight: 2, Pawn: 5, Rook: 1
  Move Played: g0e2
-----------------------------------------------

In [None]:
import numpy as np
import jieqi_game

def simulate_nn_evaluation(nn_input_batch: np.ndarray):
    """
    Mocks the behavior of a neural network.

    Takes a batch of encoded game states and returns random policy and value tensors.
    """
    # The batch size can vary depending on the number of leaf nodes found
    batch_size = nn_input_batch.shape[0]
    
    # The number of possible moves in the policy head.
    # Move::as_nn_index() produces an index from 0 to 2061.
    num_possible_moves = 2062

    # 1. Mock Policy Head Output
    # Create random logits and apply softmax to get a probability distribution.
    dummy_policy_logits = np.random.rand(batch_size, num_possible_moves).astype(np.float32)
    policy_exp = np.exp(dummy_policy_logits)
    policy_output = policy_exp / np.sum(policy_exp, axis=1, keepdims=True)

    # 2. Mock Value Head Output
    # Create random logits for (Loss, Draw, Win) and apply softmax.
    dummy_value_logits = np.random.randn(batch_size, 3).astype(np.float32)
    value_exp = np.exp(dummy_value_logits)
    value_output = value_exp / np.sum(value_exp, axis=1, keepdims=True)
    
    # The C++ side expects flattened arrays for the data spans.
    return policy_output.flatten(), value_output.flatten()


def run_mcts_test():
    """
    Sets up a position, runs an MCTS search with a mock NN,
    and prints the resulting move evaluations.
    """
    # 1. Set up a non-trivial game position
    history = jieqi_game.PositionHistory()
    history.Reset(jieqi_game.ChessBoard.hStartposBoard, 0, 0)

    print("--- Testing MCTS on the following position ---")
    print(f"FEN: {jieqi_game.GetFen(history.Last())}")
    print(f"Player to move: {'Black' if history.IsBlackToMove() else 'White'}")
    print("-" * 50)

    # 2. Initialize MCTS search
    batch_size = 8
    num_simulations = 400
    mcts = jieqi_game.MCTS(history, batch_size=batch_size, cpuct=1.5) #
    
    # 3. Main search loop
    print(f"Running {num_simulations} simulations with batch size {batch_size}...")
    for _ in range(num_simulations // batch_size):
        # Get a batch of encoded leaf nodes for the NN.
        nn_input = mcts.run_search_batch()

        # If the batch is empty, the tree is fully explored.
        if nn_input.size == 0:
            print("Search tree fully explored, stopping early.")
            break
            
        # Get mock NN predictions.
        mock_policy_output, mock_value_output = simulate_nn_evaluation(nn_input)
        
        # Apply the mock evaluations to the tree.
        mcts.apply_evaluations(mock_policy_output, mock_value_output)

    print("Search complete.")
    print("-" * 50)

    # 4. Print results
    print("--- MCTS Root Move Evaluations ---")
    
    # Get the detailed search statistics for each move from the root.
    move_evals = mcts.get_root_move_evaluations()
    
    # Sort by visit count in descending order for readability
    sorted_evals = sorted(move_evals, key=lambda x: x.visit_count, reverse=True)

    # Print a formatted table header
    print(f"{'Move':<10} | {'Visits':>8} | {'Policy':>8} | {'Win%':>7} | {'Draw%':>7} | {'Loss%':>7}")
    print(f"{'-'*10:s}-+-{'-'*8:s}-+-{'-'*8:s}-+-{'-'*7:s}-+-{'-'*7:s}-+-{'-'*7:s}")

    for evaluation in sorted_evals:
        win_pct = evaluation.win_prob * 100
        draw_pct = evaluation.draw_prob * 100
        loss_pct = evaluation.loss_prob * 100
        
        print(f"{evaluation.move.as_string():<10} | {evaluation.visit_count:>8d} | {evaluation.policy_prior:8.4f} | "
              f"{win_pct:6.2f}% | {draw_pct:6.2f}% | {loss_pct:6.2f}%")

    # Get and print the best move found.
    best_move = mcts.get_best_move()
    print("-" * 50)
    print(f"🏆 Best Move according to search: {best_move.as_string()}")


if __name__ == "__main__":
    jieqi_game.InitializeMagicBitboards()
    run_mcts_test()

In [1]:
import factory

In [2]:
factory.EXAMPLE_CONFIGS

{'default': {'input_channels': 167,
  'embedding_size': 768,
  'dff_size': 1024,
  'num_encoder_blocks': 15,
  'num_heads': 12,
  'policy_embedding_size': 256,
  'policy_d_model': 128,
  'activation_type': 1,
  'is_wdl': True,
  'has_smolgen': True,
  'embedding_dense_size': 32,
  'num_possible_policies': 2550,
  'smolgen_config': {'smolgen_hidden_channels': 32,
   'smolgen_hidden_sz': 256,
   'smolgen_gen_sz': 256,
   'activation_type': 1}},
 'small': {'input_channels': 167,
  'embedding_size': 512,
  'dff_size': 768,
  'num_encoder_blocks': 10,
  'num_heads': 8,
  'policy_embedding_size': 256,
  'policy_d_model': 128,
  'activation_type': 1,
  'is_wdl': True,
  'has_smolgen': True,
  'embedding_dense_size': 16,
  'num_possible_policies': 2550,
  'smolgen_config': {'smolgen_hidden_channels': 32,
   'smolgen_hidden_sz': 128,
   'smolgen_gen_sz': 128,
   'activation_type': 1}}}

In [3]:
import jieqi_game as game

In [4]:
model_config = factory.EXAMPLE_CONFIGS["small"].copy()

In [5]:
model_config

{'input_channels': 167,
 'embedding_size': 512,
 'dff_size': 768,
 'num_encoder_blocks': 10,
 'num_heads': 8,
 'policy_embedding_size': 256,
 'policy_d_model': 128,
 'activation_type': 1,
 'is_wdl': True,
 'has_smolgen': True,
 'embedding_dense_size': 16,
 'num_possible_policies': 2550,
 'smolgen_config': {'smolgen_hidden_channels': 32,
  'smolgen_hidden_sz': 128,
  'smolgen_gen_sz': 128,
  'activation_type': 1}}

In [6]:
model_config["policy_index_array"]=numpy(game.K_ATTN_POLICY_MAP)

NameError: name 'numpy' is not defined

In [7]:
import numpy as np

In [22]:
model_config["policy_index_array"]=list([int(a) for a in game.K_ATTN_POLICY_MAP])

In [23]:
model_config

{'input_channels': 167,
 'embedding_size': 512,
 'dff_size': 768,
 'num_encoder_blocks': 10,
 'num_heads': 8,
 'policy_embedding_size': 256,
 'policy_d_model': 128,
 'activation_type': 1,
 'is_wdl': True,
 'has_smolgen': True,
 'embedding_dense_size': 16,
 'num_possible_policies': 2550,
 'smolgen_config': {'smolgen_hidden_channels': 32,
  'smolgen_hidden_sz': 128,
  'smolgen_gen_sz': 128,
  'activation_type': 1},
 'policy_index_array': [-1,
  9,
  12,
  15,
  16,
  17,
  18,
  19,
  20,
  0,
  10,
  13,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  1,
  11,
  14,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  2,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  3,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  4,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  5,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  6,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  7,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  8,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  -1,
  238,
  -1,
  250,
  253,
  25

In [15]:
model = factory.create_leela_model(model_config)
print(f"Created model with {sum(p.numel() for p in model.parameters())} parameters")

Created model with 37509059 parameters


In [24]:
factory.save_leela_model(
    model, 
    "eval_small", 
    config=model_config,
    metadata={"version": "0.0", "training_steps": 0}
)


In [None]:
game.DeterminizedGame(