In [1]:
# load artifacts (from disk)
import json, pandas as pd
from chessrl.eval.helpers import (
    vi_move_from_policy_map, vi_move_from_values,
    mcts_move_from_instance,
)
from chessrl.eval.evaluator import evaluate
from chessrl.algorithms.mcts import MCTS
from chessrl.eval.helpers import optimal_moves_syzygy

# load artifacts (from disk)
from chessrl.utils.io import load_vf_parquet, load_policy_jsonl

2025-08-26 19:18:01,114 - INFO - Loading config file...


In [2]:
TB_PATH = "../../../tablebase/krk/"

# dtm oracle
dtz_oracle = pd.read_csv(TB_PATH + "krk_full.csv").set_index("fen")["dtz"].to_dict()
dtm_oracle = lambda fen: abs(int(dtz_oracle[fen]))

# optimal moves source
opt_moves   = optimal_moves_syzygy(TB_PATH)

# evaluate (same function for all)
test_fens = [ "8/8/8/8/8/8/k7/2K4R w - - 0 1", "8/8/8/8/8/8/k7/2K2R2 w - - 0 1", "8/8/8/8/8/8/k7/2K4R w - - 0 1" ] 

# MCTS

In [3]:
# 3) MCTS
mcts = MCTS(iterations=10000, seconds=0.0)
move_fn = mcts_move_from_instance(mcts, "seconds")
df_mcts   = evaluate(move_fn, test_fens, TB_PATH, dtm_oracle, optimal_moves=opt_moves, budget=6.0) # budget is in seconds
df_mcts

KeyboardInterrupt: 

# Value Iteration

## from policy

In [None]:
# choose version to test
policy_map = load_policy_jsonl("../../../artifacts/policies/vi_krk_greedy_intermediate_13.jsonl")

#simple mate in one endgame
test_fens = ["k7/2R5/1K6/8/8/8/8/8 w - - 0 1",
             "6k1/8/6K1/8/8/8/1R6/8 w - - 0 1",
            "6k1/8/8/6K1/8/8/1R6/8 w - - 0 1",
            "8/1k6/3R4/8/8/4K3/8/8 w - - 0 1",
            "8/8/8/8/8/8/4k3/3R2K1 w - - 0 1",
            "7k/8/8/8/8/4R3/4K3/8 w - - 0 1",
            "4k3/8/8/8/8/8/1R6/4K3 w - - 0 1",
	        "8/8/8/4k3/8/3K4/8/1R6 w - - 0 1"]

# Load the test data
test_df = pd.read_csv(TB_PATH + "krk_test.csv")

# Calculate DTM (absolute value of DTZ)
test_df['dtm'] = test_df['dtz'].abs()

# Sample 2 positions for each unique DTM value
test_fens = test_df.groupby('dtm').apply(lambda x: x.sample(min(2, len(x)), random_state=42)).reset_index(drop=True)["fen"]

vi_agent = vi_move_from_policy_map(policy_map)
df_vi     = evaluate(vi_agent,   test_fens, TB_PATH, dtm_oracle, optimal_moves=opt_moves)
df_vi

  test_fens = test_df.groupby('dtm').apply(lambda x: x.sample(min(2, len(x)), random_state=42)).reset_index(drop=True)["fen"]


Unnamed: 0,fen,dtm_policy,dtm_oracle,gap,success,ms_per_move,top1,top1_decisions,budget
0,8/8/k1K5/8/8/8/8/7R w - - 0 1,1,1,0.0,1,0.003636,1.0,1,
1,5k2/8/5K2/8/8/8/7R/8 w - - 0 1,1,1,0.0,1,0.003556,1.0,1,
2,4R3/8/8/8/8/7K/8/6k1 w - - 0 1,3,3,0.0,1,0.003515,1.0,2,
3,8/8/8/3R4/5K2/8/8/4k3 w - - 0 1,3,3,0.0,1,0.003601,1.0,2,
4,k2K4/8/5R2/8/8/8/8/8 w - - 0 1,5,5,0.0,1,0.002893,1.0,3,
5,8/8/8/6R1/8/k7/3K4/8 w - - 0 1,5,5,0.0,1,0.003562,1.0,3,
6,8/8/8/8/8/4K3/4R3/k7 w - - 0 1,7,7,0.0,1,0.004251,1.0,4,
7,R3K1k1/8/8/8/8/8/8/8 w - - 0 1,7,7,0.0,1,0.005213,1.0,4,
8,8/3R4/8/8/8/4K3/8/1k6 w - - 0 1,9,9,0.0,1,0.004156,1.0,5,
9,8/8/6K1/6R1/8/8/7k/8 w - - 0 1,9,9,0.0,1,0.007469,1.0,5,


## from value function

In [None]:
V = load_vf_parquet("../../../artifacts/values/vi_krk_values.parquet")

vi_v_agent = vi_move_from_values(V, TB_PATH)

df_vi_v   = evaluate(vi_v_agent, test_fens, TB_PATH, dtm_oracle, optimal_moves=opt_moves)
df_vi_v

Unnamed: 0,fen,dtm_policy,dtm_oracle,gap,success,ms_per_move,top1,top1_decisions,budget
0,8/8/k1K5/8/8/8/8/7R w - - 0 1,1,1,0,1,25.856577,1.0,1,
1,5k2/8/5K2/8/8/8/7R/8 w - - 0 1,1,1,0,1,25.288614,1.0,1,
2,4R3/8/8/8/8/7K/8/6k1 w - - 0 1,3,3,0,1,21.080375,1.0,2,
3,8/8/8/3R4/5K2/8/8/4k3 w - - 0 1,3,3,0,1,30.670188,1.0,2,
4,k2K4/8/5R2/8/8/8/8/8 w - - 0 1,5,5,0,1,18.113335,1.0,3,
5,8/8/8/6R1/8/k7/3K4/8 w - - 0 1,5,5,0,1,28.339297,1.0,3,
6,8/8/8/8/8/4K3/4R3/k7 w - - 0 1,7,7,0,1,21.968189,1.0,4,
7,R3K1k1/8/8/8/8/8/8/8 w - - 0 1,7,7,0,1,18.504751,1.0,4,
8,8/3R4/8/8/8/4K3/8/1k6 w - - 0 1,9,9,0,1,27.736651,1.0,5,
9,8/8/6K1/6R1/8/8/7k/8 w - - 0 1,9,9,0,1,25.457139,1.0,5,


# TD methods

In [4]:
# Load Q-Learning policy
qlearning_policy = load_policy_jsonl("../../../artifacts/policies/TD_QLearning_krk_greedy.jsonl")
qlearning_agent = vi_move_from_policy_map(qlearning_policy)

# Evaluate on same test positions
df_qlearning = evaluate(qlearning_agent, test_fens, TB_PATH, dtm_oracle, optimal_moves=opt_moves)

# Quick performance check
print(f"Q-Learning Success Rate: {df_qlearning['success'].mean():.2%}")
print(f"Average DTM Gap: {df_qlearning['gap'].mean():.2f}")
print(f"Top-1 Accuracy: {df_qlearning['top1'].mean():.2%}")

Q-Learning Success Rate: 100.00%
Average DTM Gap: 1.33
Top-1 Accuracy: 77.78%
