In [20]:
tiny_markdown_path = "output/2025-airport-charges-terms-and-conditions/tinychargesmarkdown.md"

condition = "transfer_passenger_count: 30, season: winter airline scheduling season"
with open(tiny_markdown_path, "r", encoding="utf-8") as file:
    content = file.read()

In [21]:
prompt = """You are an expert aviation cost analyst. Your task is to extract aircraft charging rules from the provided document text.

You must then convert each rule into a computation graph represented as a recursive JSON object. 
The computation graph follows this structure:

Node:
    - operator (str, optional): one of ["+", "-", "*", "/", "and", "or", "=="]
    - value (int | float | str, optional): only for leaf nodes
    - childA (Node, optional): left operand
    - childB (Node, optional): right operand

Return only the computation graph as a valid JSON matching this schema. Do not return any explanation.

---

# EXAMPLE

Document Text:
# 3.5. Transfer Passenger Charge  
<html> <table><thead><tr><th>Charging Basis (€)</th><th>Summer Airline<br>Scheduling Season</th><th>Winter Airline<br>Scheduling Season</th></tr></thead><tbody><tr><td>Transfer Passenger Charge</td><td><span style="background-color: yellow;">3.90</span></td><td><span style="background-color: yellow;">2.80</span></td></tr></tbody></table></html>  
• Transfer Passenger information shall be provided via PTM.  
• A QRF will have its Transfer Passenger Charge exempted on first departure.  
This fee contributes to security and baggage screening.

condition: "transfer_passenger_count: 30, season: winter"
---

Computation Graph (JSON):

{
  "operator": "*",
  "childA": {
    "value": "30"
  },
  "childB": {
    "value": 2.80
  },
  value=None
}
"""
prompt += f"""
---

Now process the following Document Text:

{content}

Following the below conditions:

{ condition }
Return only the computation graph JSON.
"""

In [25]:
from outlines import Generator
from outlines.types import JsonSchema
from pydantic import BaseModel, Field
from typing import Literal, Union, Optional
import outlines

# 1. Define the Node model (you already have this)
class Node(BaseModel):
    childA: Optional["Node"] = Field(None)
    childB: Optional["Node"] = Field(None)
    operator: Optional[Literal["+", "-", "*", "/", "and", "or", "=="]] = None
    value: Optional[Union[int, float]] = None

Node.model_rebuild()
schema = Node.model_json_schema()

MODEL_ID = "Qwen/Qwen3-30B-A3B"
import torch, outlines
from transformers import AutoModelForCausalLM, AutoTokenizer

# 1️⃣  HF loads & shards the model – ONE LINE does the heavy lifting.
hf_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,       # Qwen-3 is bfloat16-friendly
    device_map="auto",                # shards across all visible GPUs
    low_cpu_mem_usage=True,           # avoids a large RAM peak
)

# 2️⃣  Wrap in Outlines.
tok        = AutoTokenizer.from_pretrained(MODEL_ID)
model      = outlines.from_transformers(hf_model, tok)          # note new API
json_match = outlines.json_schema(schema)                       # as before

# 2. Load model

to_node    = JsonSchema(schema)
generator  = Generator(model, to_node)   # FSM compiled only once
graph_json = generator(prompt, max_new_tokens=512)

node_obj = Node.model_validate_json(graph_json)

Loading checkpoint shards:   0%|          | 0/16 [00:00<?, ?it/s]

In [26]:
node_obj

Node(childA=None, childB=None, operator='*', value=None)

In [24]:
schema

{'$defs': {'Node': {'properties': {'childA': {'anyOf': [{'$ref': '#/$defs/Node'},
      {'type': 'null'}],
     'default': None},
    'childB': {'anyOf': [{'$ref': '#/$defs/Node'}, {'type': 'null'}],
     'default': None},
    'operator': {'anyOf': [{'enum': ['+', '-', '*', '/', 'and', 'or', '=='],
       'type': 'string'},
      {'type': 'null'}],
     'default': None,
     'title': 'Operator'},
    'value': {'anyOf': [{'type': 'integer'},
      {'type': 'number'},
      {'type': 'null'}],
     'default': None,
     'title': 'Value'}},
   'title': 'Node',
   'type': 'object'}},
 '$ref': '#/$defs/Node'}