In [1]:
import os
from pprint import pprint
if '_fixed' not in locals():
    _fixed = os.chdir(os.path.dirname(os.getcwd()))
from pathlib import Path
import json, yaml
import inspect
import jsonschema
from tabulate import tabulate
from tqdm.notebook import tqdm
from ludwig.util import PromptTemplate, repo_root
from ludwig.tictactoe import TakeTheMiddle
from ludwig.util import vllm_Client
from ludwig.util import extract_code_blocks
from ludwig.jsonutils import unflatten, flatten


In [11]:
# jobdir = Path(r'local_data/tree-build-ChessPuzzle-')
jobdir = Path(r'local_data/tree-build-ChessPuzzle-1f77_250808-174050') # active
jobdir = Path(r'local_data/tree-build-ChessPuzzle-1f77_250809-234455') # fen
jobdir = Path(r'local_data/tree-build-ChessPuzzle-1f77_250809-230620') # pgn
# jobdir = Path(r'local_data/tree-build-TakeTheMiddle-1f77_250809-003845') # moves
# jobdir = Path(r'local_data/tree-build-TakeTheMiddle-1f77_250809-005349') # compact
# jobdir = Path(r'local_data/tree-build-TakeTheMiddle-8fc9_250813-230409')
path = jobdir.joinpath('log.jsonl')
jobdir.exists()

True

In [12]:
full = [unflatten(json.loads(line)) for line in path.read_text().splitlines()]
len(full)

10

In [13]:
item = full[0]
print(tabulate(item.items()))

----------  ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [14]:
item['table'].keys()

dict_keys(['index', 'seed', 'task', 'context', 'rep', 'expand_code', 'extract_code', 'extract_approach', 'evaluate_code', 'evaluate_approach', 'chat'])

In [15]:
print(item['table']['chat'][0]['content'])

We will solve the following task using a structured tree-search framework.

**Task Description:**
Can you solve this chess puzzle? You will be given a board position and you must find the best move for the side to move. 

**Example Problem:**
```
White just played a2a7. Given the resulting board position:

PGN: 1. b4 d5 2. Bb2 Nf6 3. b5 g6 4. e3 Bg7 5. Be2 O-O 6. d4 c6 7. Nd2 cxb5 8. a4 bxa4 9. Rxa4 Bd7 10. Ra2 Nc6 11. Ngf3 e5 12. dxe5 Ng4 13. Qa1 Re8 14. h3 Ngxe5 15. Nxe5 Bxe5 16. Bxe5 Rxe5 17. Bb5 Qf6 18. Bxc6 Bxc6 19. Rxa7

What is the best move for black? Answer using the UCI or SAN format.
```

Your first task is to design an optimal state representation for this kind of problem. The representation must be:
1.  **High-Fidelity:** It must capture all details necessary to solve the task unambiguously.
2.  **Efficient:** It should contain minimal redundancy and exclude irrelevant "nuisance" details.
3.  **Conducive to Search:** The structure should make it easy to implement the searc

In [16]:
for item in full:
    # pprint(item['table']['rep'])
    rep = item['table']['rep']
    print(yaml.dump(rep, sort_keys=False, allow_unicode=True, default_flow_style=None))
    print('*'*100)
    print()

$schema: http://json-schema.org/draft-07/schema#
title: Chess Board State
type: object
properties:
  board:
    type: array
    items:
      type: array
      items:
        type: string
        enum: [R, N, B, Q, K, P, r, n, b, q, k, p, '-']
      minItems: 8
      maxItems: 8
    minItems: 8
    maxItems: 8
    description: 'An 8x8 grid representing the chess board. Each cell contains a piece
      (R: rook, N: knight, B: bishop, Q: queen, K: king, P: pawn, and their lowercase
      counterparts for black pieces) or ''-'' for an empty cell.'
  turn:
    type: string
    enum: [white, black]
    description: The side to move.
  castlingRights:
    type: object
    properties:
      whiteKingSide: {type: boolean}
      whiteQueenSide: {type: boolean}
      blackKingSide: {type: boolean}
      blackQueenSide: {type: boolean}
    required: [whiteKingSide, whiteQueenSide, blackKingSide, blackQueenSide]
    description: Castling rights for each side.
  enPassantTarget:
    type: [string, '

In [17]:

_schema_bad_keys = {'uniqueItems', '$schema'}
class SchemaInvalidError(ValueError):
    pass

def filter_schema(data):
    if isinstance(data, dict):
        if data.get('type') == 'array' and 'items' not in data:
            raise SchemaInvalidError("Array schema must have 'items' key")
        if 'items' in data and isinstance(data['items'], list):
            assert len(data['items']) == 1
            data['items'] = data['items'][0]
        return {key: filter_schema(val) for key, val in data.items() if key not in _schema_bad_keys}
    if isinstance(data, list):
        return [filter_schema(item) for item in data]
    return data

def publish(fn):
    if fn is None:
        return fn
    return inspect.getsource(fn).replace(fn.__name__, 'formalize')

In [9]:
from ludwig.util import vllm_Client, SAIA_Client
# client = vllm_Client('8001')
api_secrets_info = yaml.safe_load(repo_root().joinpath('config', 'secrets', 'saia.yml').open('r'))
client = SAIA_Client(api_key=api_secrets_info['api-key'], model='openai-gpt-oss-120b')
client.prepare()
client.ident



'openai-gpt-oss-120b'

In [10]:
item = full[9]
rep = filter_schema(item['table']['rep'])
rep

{'title': 'TicTacToeGameState',
 'description': 'Represents the state of a Tic-Tac-Toe game.',
 'type': 'object',
 'properties': {'board': {'type': 'array',
   'description': 'A 2D array representing the Tic-Tac-Toe board.',
   'items': {'type': 'array',
    'items': {'type': 'string',
     'enum': ['.', 'X', 'O'],
     'description': "Represents a cell on the board. '.' for empty, 'X' for player X, and 'O' for player O."}},
   'minItems': 3,
   'maxItems': 3},
  'currentPlayer': {'type': 'string',
   'enum': ['X', 'O'],
   'description': "Indicates which player's turn it is."},
  'availableMoves': {'type': 'array',
   'description': 'An array of coordinate pairs representing the empty cells on the board.',
   'items': {'type': 'array',
    'items': {'type': 'integer', 'minimum': 0, 'maximum': 2},
    'minItems': 2,
    'maxItems': 2}}},
 'required': ['board', 'currentPlayer', 'availableMoves']}

In [13]:
chat = client.begin_chat(f'Give me a random state of a Tic Tac Toe game using this representation?\n{json.dumps(rep, indent=2)}')
resp = client.step(chat, grammar=rep)
raw = chat[-1]['content']
example = json.loads(raw)
example

{'board': [['X', 'O', 'X'], ['O', 'X', '.'], ['.', 'O', '.']],
 'currentPlayer': 'X',
 'lastMove': {'row': 2, 'col': 1}}

In [1]:
def formalize1(board, active_player):
    symbols = {' ':'.'}
    board = [symbols.get(cell, cell) for cell in board]
    state = {'board': [[board[0], board[1], board[2]], [board[3], board[4], board[5]], [board[6], board[7], board[8]]],
             'currentPlayer': active_player,
             }
    return state
def formalize2(board, active_player):
    symbols = {' ':'.'}
    board = [symbols.get(cell, cell) for cell in board]
    state = {'board': [[board[0], board[1], board[2]], [board[3], board[4], board[5]], [board[6], board[7], board[8]]],
             'currentPlayer': active_player,
             'moveNumber': sum(1 for cell in board if cell != '.')
             }
    return state
def formalize3(board, active_player):
    def check_winner(state: str):
        lines = [
            state[0:3], state[3:6], state[6:9],  # rows
            state[0::3], state[1::3], state[2::3],  # columns
            state[0::4], state[2:7:2]  # diagonals
        ]
        for line in lines:
            if line == 'XXX':
                return 'X'
            if line == 'OOO':
                return 'O'
    # winner = check_winner(board)
    symbols = {' ':'.'}
    board = [symbols.get(cell, cell) for cell in board]
    state = {'board': [[board[0], board[1], board[2]], [board[3], board[4], board[5]], [board[6], board[7], board[8]]],
             'currentPlayer': active_player,
            #  'isTerminal': winner is not None or all(cell != '.' for cell in board),
            #  'winner': winner,
             }
    return state
def formalize4(board, active_player):
    coords = [[0,0], [0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [2,2]]
    symbols = {' ':'.'}
    board = [symbols.get(cell, cell) for cell in board]
    state = {'board': [[board[0], board[1], board[2]], [board[3], board[4], board[5]], [board[6], board[7], board[8]]],
             'currentPlayer': active_player,
             'availableMoves': [coords[i] for i, cell in enumerate(board) if cell == '.']}

In [15]:
gt_formalizations = [
    formalize1,
    formalize2,
    formalize2,
    formalize2,
    None,
    formalize2,
    formalize3,
    formalize2,
    formalize2,
    formalize4,
]

In [16]:
fn = gt_formalizations[0]
fn

<function __main__.formalize1(board, active_player)>

In [17]:
outpath = jobdir.joinpath('log-formal.jsonl')
with outpath.open('w') as f:
    for item, fn in zip(full, gt_formalizations):
        item['table']['formalize_code'] = publish(fn)
        f.write(json.dumps(item) + '\n')