In [16]:
import torch
torch.cuda.empty_cache()


In [2]:

import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from pydantic import BaseModel, Field, RootModel
from typing import Optional, Union, Literal, ForwardRef
from enum import Enum
from guidance import models, system, user, assistant, json as gen_json
import torch, outlines
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from pydantic import BaseModel, Field, RootModel
from typing import Optional, Union
from enum import Enum
from guidance import models, system, user, assistant, json as gen_json
import guidance

In [3]:

with open("../output/2025-airport-charges-terms-and-conditions/tinychargesmarkdown.md", "r") as f:
    markdown_content = f.read()


In [4]:

transfer_passenger_count = 30

condition = [
    {"charge_name": "transfer passenger charge", "transfer_passenger_count": transfer_passenger_count, "period": "summer airline scheduling season", "rate": "?"},
    # {"charge_name": "transfer passenger charge", "transfer_passenger_count": transfer_passenger_count, "period": "winter airline scheduling season", "rate": "?"},
    # {"charge_name": "runway landing and takeoff charge", "period": "summer airline scheduling season", "atm": "landing", "per tonne MTOW": 1, "rate": "?"},
    # {"charge_name": "runway landing and takeoff charge", "period": "summer airline scheduling season", "atm": "takeoff", "per tonne MTOW": 1, "rate": "?"},
    # {"charge_name": "runway landing and takeoff charge", "period": "winter airline scheduling season", "atm": "landing", "per tonne MTOW": 1, "rate": "?"},
    # {"charge_name": "runway landing and takeoff charge", "period": "winter airline scheduling season", "atm": "takeoff", "per tonne MTOW": 1, "rate": "?"}
]


In [5]:

MODEL_ID = "Qwen/Qwen3-30B-A3B"

hf_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",               
    low_cpu_mem_usage=True,          
)


tok        = AutoTokenizer.from_pretrained(MODEL_ID)


Loading checkpoint shards:   0%|          | 0/16 [00:00<?, ?it/s]

In [6]:

model = guidance.models.Transformers(hf_model, tok)

gpustat is not installed, run `pip install gpustat` to collect GPU stats.


In [7]:


class Op(str, Enum):
    ADD = "ADD"
    MULTIPLY = "MULTIPLY"

class ValueNode(BaseModel):
    """A leaf node with a numeric value"""
    type: Literal["VALUE"] = "VALUE"
    value: float
    description: str = Field(description="Explanation of what this value represents")
    # unit: 

class OpNode(BaseModel):
    """An operation node with two children"""
    type: Literal["OPERATION"] = "OPERATION"
    operator: Op
    # The 'left' and 'right' fields will be added dynamically

# the Union for the recursive fields
AnyNode = Union[OpNode, ValueNode]

# Dynamically update the OpNode to add the recursive fields
OpNode.model_fields.update({
    'left': (AnyNode, Field(..., discriminator='type')),
    'right': (AnyNode, Field(..., discriminator='type')),
})

# The root of the expression tree must be an operation.
class Node(RootModel):
    """The root of the expression tree, which must be an OpNode."""
    root: OpNode


In [8]:

@guidance
def create_expression_tree(llm, markdown_content, cond, pydantic_class):
    
    with system():
        llm = llm + """You are a world-class algorithm for building expression trees from text. Your goal is to construct a JSON object that represents the calculation logic for a 'rate' based on a document and a set of conditions.
        
You MUST follow the Node schema exactly. It requires either:
1. A ValueNode with 'type': 'VALUE' and a 'value' field containing a number, OR
2. An OpNode with 'type': 'OPERATION', an 'operator' which must be 'ADD' or 'MULTIPLY', and 'left'/'right' fields containing other nodes.

Here are examples of valid expression trees:
1. Simple value: {"type": "VALUE", "value": 3.9, "description": "Transfer passenger charge rate in pounds"}
2. Simple multiplication: {"type": "OPERATION", "operator": "MULTIPLY", "left": {"type": "VALUE", "value": 3.9, "description": "Transfer passenger charge per passenger"}, "right": {"type": "VALUE", "value": 30.0, "description": "Number of transfer passengers"}}"""

    with user():
        llm += f"""Here is the document:
---
{markdown_content}
---

Given the following condition:
{cond}

Construct the expression tree for the rate based on the document and condition."""

    with assistant():
        llm += gen_json(
            name="expression_tree", 
            schema=pydantic_class,
            max_tokens=200,
        )

    return llm


In [9]:
for cond in condition:
    print(f"--- Condition: {cond} ---")
    try:
        result_lm = model + create_expression_tree(markdown_content=markdown_content, cond=cond, pydantic_class = Node)
        expression_tree = result_lm["expression_tree"]
        

    except Exception as e:
        print(f"An error occurred: {e}")
        cond["rate"] = None



--- Condition: {'charge_name': 'transfer passenger charge', 'transfer_passenger_count': 30, 'period': 'summer airline scheduling season', 'rate': '?'} ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

In [10]:
expression_tree

'{"type": "OPERATION", "operator": "MULTIPLY", "left": {"type": "VALUE", "value": 3.9, "description": "Transfer passenger charge rate in pounds"}, "right": {"type": "VALUE", "value": 30.0, "description": "Number of transfer passengers"}}'

In [11]:
loaded_json = json.loads(expression_tree)

In [12]:
loaded_json

{'type': 'OPERATION',
 'operator': 'MULTIPLY',
 'left': {'type': 'VALUE',
  'value': 3.9,
  'description': 'Transfer passenger charge rate in pounds'},
 'right': {'type': 'VALUE',
  'value': 30.0,
  'description': 'Number of transfer passengers'}}

In [13]:
print(json.dumps(loaded_json, indent=4))

{
    "type": "OPERATION",
    "operator": "MULTIPLY",
    "left": {
        "type": "VALUE",
        "value": 3.9,
        "description": "Transfer passenger charge rate in pounds"
    },
    "right": {
        "type": "VALUE",
        "value": 30.0,
        "description": "Number of transfer passengers"
    }
}


In [14]:
result = Node.model_validate_json(expression_tree)

In [15]:
print(result.model_dump_json(indent=8))

{
        "type": "OPERATION",
        "operator": "MULTIPLY"
}
