In [1]:
import torch
torch.cuda.empty_cache()

import time
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from pydantic import BaseModel, Field, RootModel
from typing import Optional, Union, Literal, ForwardRef, List, Any
from enum import Enum
from guidance import models, system, user, assistant, json as gen_json, gen
import torch, outlines
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from pydantic import BaseModel, Field, RootModel
from typing import Optional, Union
from enum import Enum
from guidance import models, system, user, assistant, json as gen_json
import guidance
from utils import timing_decorator

In [2]:

with open("../output/2025-airport-charges-terms-and-conditions/tinychargesmarkdown.md", "r") as f:
    markdown_content = f.read()


In [3]:

MODEL_ID = "Qwen/Qwen3-30B-A3B"

hf_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",               
    low_cpu_mem_usage=True,          
)


tok        = AutoTokenizer.from_pretrained(MODEL_ID)


Loading checkpoint shards:   0%|          | 0/16 [00:00<?, ?it/s]

In [4]:

model = guidance.models.Transformers(hf_model, tok)

gpustat is not installed, run `pip install gpustat` to collect GPU stats.


In [5]:
from pydantic import BaseModel, Field
from typing import List, Any

class DomainVariable(BaseModel):
    """Defines a single variable the LLM can use in the computation graph."""
    name: str = Field(..., description="The unique identifier for the variable.")
    description: str = Field(..., description="A detailed explanation of what this variable represents.")
    # Optional: You could add type hints, units, etc. for more advanced validation
    unit: Optional[str] = Field(..., description="The unit of the variable")
    data_type : type = Field(..., description="The data type of the variable")


In [6]:
ALL_VARIABLES = {
    
    'transfer_passenger_count': DomainVariable(name='transfer_passenger_count', description='Total number of transferring passengers.', unit=None, data_type=float),
    'airline_scheduling_season': DomainVariable(name='airline_scheduling_season', description='Whether summer/winter airline scheduling season.', unit=None, data_type=float),
    'takeoff_aircraft_mtow_tonnes': DomainVariable(name='takeoff_aircraft_mtow_tonnes', description='The Maximum Take-Off Weight in tonnes.', unit='tonne', data_type=float),
    'landing_aircraft_mtow_tonnes': DomainVariable(name='landing_aircraft_mtow_tonnes', description='The Maximum Landing Weight in tonnes.', unit='tonne', data_type=float),
    
    'parking_duration_hours': DomainVariable(
        name='parking_duration_hours',
        description='Total duration of parking in hours. Surcharges apply at 48 and 72 hours.',
        unit='hours',
        data_type=float
    ),
    'aircraft_stand_type': DomainVariable(
        name='aircraft_stand_type',
        description='The type of aircraft stand used for parking. E.g., "Wide Contact", "Narrow Remote", "LAP", "Long Term Remote".',
        unit=None,
        data_type=str
    ),
    'parking_location': DomainVariable(
        name='parking_location',
        description='The location of the parking stand, either "EAP" (East Aerodrome Parking) or "WAP" (West Aerodrome Parking).',
        unit=None,
        data_type=str
    ),
    'is_overnight_parking': DomainVariable(
        name='is_overnight_parking',
        description='True if the parking occurs during the free overnight period (2300-0600hrs).',
        unit=None,
        data_type=bool
    )
}
# 2. Map charge types to the variable names they are allowed to use
CHARGE_CATEGORY_VARIABLES = {
    # --- Existing Categories ---
    "transfer_passenger_charge": [
        'transfer_passenger_count', 
        'airline_scheduling_season'
    ],
    "runway_landing_charge": [
        'landing_aircraft_mtow_tonnes', 
        'airline_scheduling_season'
    ],
    "runway_takeoff_charge": [
        'takeoff_aircraft_mtow_tonnes', 
        'airline_scheduling_season'
    ],

    # --- New Categories for Parking Charges ---
    "east_aerodrome_parking_charge": [
        'parking_duration_hours',
        'aircraft_stand_type',
        'is_overnight_parking'
        # 'parking_location' is implicitly 'EAP' for this category
    ],
    "west_aerodrome_parking_charge": [
        'parking_duration_hours',
        'aircraft_stand_type',
        'is_overnight_parking'
        # 'parking_location' is implicitly 'WAP' for this category
    ]
}



In [7]:
def create_dynamic_variable_enum(charge_category: str) -> type(Enum):
    """
    Creates a new Enum class containing only the variables relevant
    to the specified charge category.
    """
    variable_names = CHARGE_CATEGORY_VARIABLES.get(charge_category)
    if not variable_names:
        raise ValueError(f"Unknown charge category: {charge_category}")
    
    # The dictionary for the Enum must have {MEMBER_NAME: value}
    # We'll use uppercase for the member name for convention.
    enum_dict = {name.upper(): name for name in variable_names}
    
    # Create the Enum class dynamically
    return Enum("Var", enum_dict)

Var = create_dynamic_variable_enum("transfer_passenger_charge")
print(Var.TRANSFER_PASSENGER_COUNT.value)

transfer_passenger_count


In [8]:
allowed_variables = [el.value for el in list(Var)]
allowed_variables

['transfer_passenger_count', 'airline_scheduling_season']

In [9]:
from pydantic import BaseModel, Field, RootModel
from typing import Union, Literal
from enum import Enum

# Separate enums for clarity and type safety
class MathOperator(str, Enum):
    ADD = "ADD"
    MULTIPLY = "MULTIPLY"
    DIVIDE = "DIVIDE"

class Comparator(str, Enum):
    GREATER_THAN = "GREATER_THAN"
    LESS_THAN = "LESS_THAN"
    EQUAL_TO = "EQUAL_TO"

class Units(str, Enum):
    HOURS = "HOURS"
    MINUTES = "MINUTES"
    EUROS = "EUROS"
    PERCENT = "PERCENT"
    UNITLESS = "UNITLESS"
    
# --- Node Definitions ---

class ValueNode(BaseModel):
    type: Literal["VALUE"] = "VALUE"
    value: float
    description: str
    unit: Units

class VariableNode(BaseModel):
    type: Literal["VARIABLE"] = "VARIABLE"
    name: str 
    description: str
    unit: Units

class BinaryOpNode(BaseModel):
    """Node for mathematical operations that produce a number."""
    type: Literal["BINARY_OPERATION"] = "BINARY_OPERATION"
    operator: MathOperator
    left: 'AnyNode'
    right: 'AnyNode'

class ComparisonNode(BaseModel):
    """Node for comparison operations that produce a boolean."""
    type: Literal["COMPARISON"] = "COMPARISON"
    operator: Comparator
    left: 'AnyNode'
    right: 'AnyNode'

class ConditionalNode(BaseModel):
    """Node for if-then-else logic."""
    type: Literal["CONDITIONAL"] = "CONDITIONAL"
    condition: ComparisonNode # Condition must be a comparison
    if_true: 'AnyNode'
    if_false: 'AnyNode'

# --- Recursive Setup ---

AnyNode = Union[
    ValueNode, 
    VariableNode, 
    BinaryOpNode, 
    ConditionalNode
]

# Use model_rebuild() to safely resolve all forward references
BinaryOpNode.model_rebuild()
ConditionalNode.model_rebuild()
ComparisonNode.model_rebuild()

class Node(RootModel):
    root: BinaryOpNode

In [65]:
from pydantic import BaseModel, Field, field_validator
from typing import List, Dict, Any, Optional
from enum import Enum


class ParameterStatus(str, Enum):
    """An enumeration for clear, explicit parameter statuses."""
    KNOWN = "KNOWN"
    SYMBOLIC = "SYMBOLIC"

class ParameterDetail(BaseModel):
    """A structured model to describe each parameter identified from the query."""
    name: str = Field(..., description="The name of the parameter.")
    status: ParameterStatus = Field(..., description="Whether the parameter's value is known from the query or is a symbolic variable.")
    value: Optional[Any] = Field(None, description="The actual value of the parameter, if its status is 'KNOWN'. Must be null if status is 'SYMBOLIC'.")


class ReasoningSchemaStep1(BaseModel):
    query_parameters: List[ParameterDetail] = Field(
        ...,
        description="A structured list of all parameters identified from the query and their status."
    )
class ReasoningSchemaStep2(BaseModel):
    """
    A simplified schema for Step 2 that captures all constants and rules as a simple list of descriptive strings.
    """
    identified_constants_and_rules: List[str] = Field(
        ...,
        description="A comprehensive list of all facts, constants, and conditional rules extracted from the document that are necessary for the final calculation. Each string in the list should be a self-contained, clear statement. For example: 'The rate for a Narrow Satellite stand is €32.90 per 15 minutes' or 'A 100% surcharge is applied if parking duration is between 48 and 72 hours'."
    )
class ReasoningSchemaStep3(BaseModel):
    synthesis_plan: str = Field(
        ...,
        description="A concise, step-by-step plan describing how the variables and constants are combined into the final computation graph."
    )
class ReasoningSchemaStep4(BaseModel):
    rethink: str = Field(
        ...,
        description="Final check to ensure the plan correctly uses variables and constants and handles all logic from the document."
    )


In [66]:
@guidance
def create_graph_with_cot(llm, allowed_variables_prompt, document, query, output_schema):
    
    with system():
        llm += f"""You are an expert system that converts textual calculation rules into structured JSON expression trees.
        You MUST think step-by-step and reason before generating the final JSON.
        
        **Reasoning Guidelines:**
        1.  **Analyze Query Parameters:** Identify all relevant parameters from the user's query. For each parameter, create a structured object specifying its 'name', its 'status' ('KNOWN' if the value is given, or 'SYMBOLIC' if it's a variable), and its 'value' (or null if symbolic). For example: `[ {{"name": "aircraft_stand_type", "status": "KNOWN", "value": "Wide Remote"}}, {{"name": "parking_duration_hours", "status": "SYMBOLIC", "value": null}} ]`
        **Allowed Variables for this Task:**
        ---
        {allowed_variables_prompt}
        ---
        2. **Identify All Relevant Information**: Review the document and extract every fact, constant, and conditional rule needed for the calculation. Each piece of information should be written as a clear, self-contained sentence and collected into a list of strings.
        3.  **Synthesize Plan:** Briefly describe how you will combine these pieces into a final expression tree.
        4. **Rethink and Finalize Approach**: Before processing with generation, rethink your progress so far and make adjustments if necessary, then finalize and proceed to generate the expression tree.

        **Crucial Rule 1:** If a parameter from the 'Allowed Variables' list is given a specific value in the query, you MUST treat it as a fixed value to find constants. You MUST NOT include it as a `VARIABLE` node in the final JSON.
        **Crucial Rule 2**: If a calculation path or value depends on the value of a symbolic variable, you MUST capture the rules for all possible values and represent this logic using CONDITIONAL nodes in the final expression tree. You MUST NOT assume a default value for the variable to simplify the logic.
        
        After writing your reasoning, you WILL generate the JSON object.


        """

    with user():
        llm += f"""
        **Document:**
        ---
        {document}
        ---

        **Query:**
        
        Based on the document, construct the computation graph for the following request:
        
        "{query}"
        
        """

    with assistant():
        llm += "I will now follow the reasoning guidelines step-by-step before generating the final JSON.\n"
        llm += "Step1. Analyze Query Parameters:\n"
        llm += gen_json(
            name="thought1", 
            schema=ReasoningSchemaStep1, 
            max_tokens=600)

        llm += "Step2. Identify All Relevant Information:\n"
        llm += gen_json(
            name="thought2", 
            schema=ReasoningSchemaStep2, 
            max_tokens=600)

        llm += "Step3. Synthesize Plan:\n"
        llm += gen_json(
            name="thought3", 
            schema=ReasoningSchemaStep3, 
            max_tokens=600)

        llm += "Step4. Rethink and Finalize Approach:\n"
        llm += gen_json(
            name="thought4", 
            schema=ReasoningSchemaStep4, 
            max_tokens=600)

        # After thinking, it generates the JSON.
        llm += "\n\nFinal JSON object:\n"
        llm += gen_json(
            name="result_graph", 
            schema=output_schema,
            max_tokens=2000 
        )
        
    return llm

In [67]:

class ComputationGraphBuilder:
    """
    Orchestrates the creation of a computation graph by preparing dynamic
    constraints and prompting the LLM.
    """
    
    def __init__(self, model):
        """
        Initializes the builder with a guidance model.
        """
        self.model = model
        # Set the default LLM for all guidance programs
        # guidance.llm = self.model
    @timing_decorator
    def build(self, document_content: str, query: str, charge_category: str) -> dict:
        """
        Generates a computation graph for a given query and document.

        Args:
            document_content: The text containing the rules.
            query: A natural language question about what to calculate.
            charge_category: The specific charge context used to filter variables.

        Returns:
            A dictionary representing the computation graph or an error.
        """
        print(f"--- Building graph for charge category: '{charge_category}' ---")
        
        # 1. Dynamically create the filtered Enum for this specific task
        try:
            Var = create_dynamic_variable_enum(charge_category)
        except ValueError as e:
            print(f"Error: {e}")
            return {"error": str(e)}

        # 3. Create a formatted prompt string of allowed variables for the LLM
        allowed_variables = [el.value for el in list(Var)]
        allowed_variables_prompt = "\n".join(
            [f"- **{v.name}**: {v.description}" for name, v in ALL_VARIABLES.items() if name in allowed_variables]
        )

        try:
            # 4. Execute the guidance program with all dynamic components
            result_lm = self.model + create_graph_with_cot(
                allowed_variables_prompt=allowed_variables_prompt,
                document=document_content,
                query=query,
                output_schema=Node
            )
            
            
            # print("\nSuccessfully generated graph:")
            # # Use model_dump_json for Pydantic v2
            # print(pydantic_graph.model_dump_json(indent=2)) 
            return result_lm
            
        except Exception as e:
            print(f"\nAn error occurred while building the graph for '{query}': {e}")
            return {"error": str(e)}

In [78]:
import sympy
from sympy import Symbol, Piecewise, sympify, Add, Mul, Pow

def compose_expression(node: dict):
    """
    Recursively parses a JSON graph into a SymPy expression,
    preventing automatic simplification.

    Args:
        node: A dictionary representing a node in the computation graph.

    Returns:
        A non-evaluated sympy expression representing the computation.
    """

    node_type = node.get('type')

    if node_type == "VALUE":
        return sympify(node['value'])

    elif node_type == "VARIABLE":
        return Symbol(node['name'])

    elif node_type == "BINARY_OPERATION":
        left = compose_expression(node['left'])
        right = compose_expression(node['right'])
        operator = node['operator']
        
        # Use class constructors with evaluate=False to prevent simplification
        if operator == "ADD":
            return Add(left, right, evaluate=False)
        elif operator == "MULTIPLY":
            return Mul(left, right, evaluate=False)
        elif operator == "DIVIDE":
            # Division (a/b) is represented as a * (b**-1)
            power = Pow(right, -1, evaluate=False)
            return Mul(left, power, evaluate=False)
        else:
            raise ValueError(f"Unsupported binary operator: {operator}")

    elif node_type == "COMPARISON":
        left = compose_expression(node['left'])
        right = compose_expression(node['right'])
        operator = node['operator']

        if operator == "GREATER_THAN":
            return left > right
        if operator == "LESS_THAN":
            return left < right
        else:
            raise ValueError(f"Unsupported comparison operator: {operator}")

    elif node_type == "CONDITIONAL":
        condition = compose_expression(node['condition'])
        if_true_expr = compose_expression(node['if_true'])
        if_false_expr = compose_expression(node['if_false'])
        
        return Piecewise((if_true_expr, condition), (if_false_expr, True))

    else:
        raise ValueError(f"Unknown node type: {node_type}")

In [69]:
import json
import traceback


def create_computation_graph(model, query, charge_category):
    graph_builder = ComputationGraphBuilder(model=model)

    start_time = time.perf_counter()
    llm_structured_response = graph_builder.build(
        document_content=markdown_content,
        query=query,
        charge_category=charge_category
    )
    end_time = time.perf_counter()
    build_time = end_time - start_time

    return llm_structured_response, build_time
    

1. Changes:
- Added a structured schema for our steps of reasoning to counter the problems with variable length reasoning. Tokens capped at 1k.
2. Observations:
- The reasoning produced is pretty good, although the model makes a mistakes in one of the steps where it thinks that the surcharge may not be required; it quickly corrects this in the final reconsideration step and evaluates the problem correctly.
- The graph generated is perfection! :') 
3. Discussions:
- Structured generation for the win! Guiding the model to think in a structured way, assuming 1k tokens is enough inference time compute (there is a paper, which says that any problem solvable by boolean circuits of size T, constant size transformers can solve it by generating O(T) INTERMEDIATE tokens, and if directly generating final answers, either requires a huge depth or cannot solve at all! (Source: Reasoning talk by google reasoning team lead Denny Zhou (https://youtu.be/ebnX5Ur1hBk?si=cfl9k0_CGLuxeN5y)
- Above implies that if we simply scale the token budget for reasoning which is now in our control, we could potentially come up with such a computation graph for any problem? 
- To Note: We still may be bypassing the natural training of the model by using such structured generation! During SFT and RLFT, the models may be trained to generate their thinking between <think> </think> tokens, it still remains to be seen how they stop the thinking process, and whether is done naturally or ends when the model outputs </think>. Next, how is thinking something that can simply be turned off or on, example in qwen models.
4. Next Steps:
- See if other entities from the pdf are extractable.
- Modify the smpy solver and to handle these additional nodes.

In [13]:
graph_builder = ComputationGraphBuilder(model=model)

my_query = "Calculate the total parking charge for a Long Term Remote stand type at West Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
my_charge_category = "west_aerodrome_parking_charge"

# 3. Call the build method to generate the graph
start_time = time.perf_counter()
expression_tree_dict = graph_builder.build(
    document_content=markdown_content,
    query=my_query,
    charge_category=my_charge_category
)
end_time = time.perf_counter()
execution_time = end_time - start_time
# print(f"Execution time for '{func.__name__}': {execution_time:.4f} seconds")

--- Building graph for charge category: 'west_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [14]:
execution_time
# print(expression_tree_dict['thought'])
# loaded_json = json.loads(expression_tree_dict['result_graph'])
# print(json.dumps(loaded_json, indent=4))

261.31297062599333

In [17]:
# execution_time
loaded_json = json.loads(expression_tree_dict['thought'])
print(json.dumps(loaded_json, indent=4))
# loaded_json = json.loads(expression_tree_dict['result_graph'])
# print(json.dumps(loaded_json, indent=4))

{
    "identified_values": [
        "parking_duration_hours",
        "aircraft_stand_type",
        "is_overnight_parking"
    ],
    "identified_variables": [
        "parking_duration_hours",
        "aircraft_stand_type",
        "is_overnight_parking"
    ],
    "synthesis_plan": "The total parking charge depends on the stand type (West Aerodrome Parking Long Term Remote), parking duration, and whether it's overnight. The base rate is \u20ac180.00 per day or part thereof. Since it's not overnight, we don't apply the free night-time charge. We need to calculate the number of days (ceiling of duration/24) and multiply by the base rate. No surcharges apply since the duration is not 48+ hours.",
    "rethink": "I need to verify the exact charging rules for Long Term Remote in WAP. The document says it's charged per day or part thereof. Also, check if there are any surcharges for extended periods. The surcharge applies for 48-72 hours (100% surcharge) and 72+ hours (200% surcharge). H

In [25]:
# execution_time
# print(expression_tree_dict['thought'])
loaded_json = json.loads(expression_tree_dict['result_graph'])
print(json.dumps(loaded_json, indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "GREATER_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 72,
                "description": "Threshold for 200% surcharge",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "BINARY_OPERATION",
                "operator": "MULTIPLY",
                "left": {
                    "type": "BINARY_OPERATION",
                    "operator": "DIVIDE",
               

1. Changes:
- Expanding problem sets
- Expanded equation composer functionality to include newer nodes

In [45]:


# Print the resulting expression
loaded_json = json.loads(expression_tree_dict['result_graph'])
symbolic_expr = compose_expression(loaded_json)
print(f"Sympy Expression: {symbolic_expr}")

Sympy Expression: Piecewise((((parking_duration_hours/24)*180)*3, parking_duration_hours > 72), (Piecewise(((180*2)*(parking_duration_hours/24), parking_duration_hours > 48), ((parking_duration_hours/24)*180, True)), True)) + 0


In [46]:
from sympy import Symbol

# Define the symbol that you want to replace
variable_to_replace = Symbol('parking_duration_hours')

# Define the value you want to substitute
value_to_substitute = 60

final_charge_cost = symbolic_expr.subs(variable_to_replace, value_to_substitute)
print(f"The parking cost for {value_to_substitute} hours is: {final_charge_cost} EUR") 

The parking cost for 60 hours is: 900 EUR


1. Changes:
- Expanding problem sets
- Expanded equation composer functionality to include newer nodes [DONE]
- Functionalize creating computation graphs

In [56]:


# Print the resulting expression
loaded_json = json.loads(expression_tree_dict['result_graph'])
symbolic_expr = compose_expression(loaded_json)
print(f"Sympy Expression: {symbolic_expr}")

1. Changes:
- Expanding problem sets
- Expanded equation composer functionality to include newer nodes [DONE]
- Functionalize creating computation graphs [DONE]

In [16]:
query = "Calculate the total parking charge for a Narrow Remote stand type at West Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "west_aerodrome_parking_charge"

graph_builder = ComputationGraphBuilder(model=model)

start_time = time.perf_counter()
llm_structured_response = graph_builder.build(
    document_content=markdown_content,
    query=query,
    charge_category=charge_category
)
end_time = time.perf_counter()
build_time = end_time - start_time

# computation_graph, llm_structured_thought, build_time = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'west_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …


An error occurred while building the graph for 'Calculate the total parking charge for a Narrow Remote stand type at West Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours.': CUDA out of memory. Tried to allocate 1.16 GiB. GPU 1 has a total capacity of 44.45 GiB of which 612.56 MiB is free. Process 236628 has 41.19 GiB memory in use. Including non-PyTorch memory, this process has 2.65 GiB memory in use. Of the allocated memory 1.58 GiB is allocated by PyTorch, and 780.21 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Execution time for 'build': 85.2672 seconds


In [52]:

print(json.dumps(llm_structured_thought, indent=4))

print(json.dumps(computation_graph, indent=4))

symbolic_expr = compose_expression(computation_graph)
print(f"Sympy Expression: {symbolic_expr}")

{
    "identified_values": [
        "parking_duration_hours",
        "aircraft_stand_type",
        "is_overnight_parking"
    ],
    "identified_variables": [
        "parking_duration_hours",
        "aircraft_stand_type",
        "is_overnight_parking"
    ],
    "synthesis_plan": "The total parking charge calculation requires: 1. Determining the base rate for Narrow Remote stands at WAP 2. Calculating the number of 15-minute intervals in the parking duration 3. Applying surcharges if parking duration exceeds 48 or 72 hours 4. Considering overnight parking status (though this case specifies no overnight parking) The formula would be: Total Charge = (Base Rate \u00d7 Number of Intervals) \u00d7 (1 + Surcharges) The surcharge depends on duration: - No surcharge for <48 hours - 100% surcharge for 48-72 hours - 200% surcharge for >72 hours",
    "rethink": "I need to make sure I correctly interpret the surcharge structure from the document. The WAP surcharge table shows 48-72 hours ha

In [59]:
print('hi')

hi


In [71]:
query = "Calculate the total parking charge for a Narrow Remote stand type at West Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "west_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'west_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

In [72]:
result

({'type': 'BINARY_OPERATION',
  'operator': 'MULTIPLY',
  'left': {'type': 'VARIABLE',
   'name': 'parking_duration_hours',
   'description': 'Total duration of parking in hours.',
   'unit': 'HOURS'},
  'right': {'type': 'BINARY_OPERATION',
   'operator': 'DIVIDE',
   'left': {'type': 'VALUE',
    'value': 4.0,
    'description': 'Number of 15-minute intervals per hour.',
    'unit': 'UNITLESS'},
   'right': {'type': 'VALUE',
    'value': 1.0,
    'description': 'Conversion from hours to 15-minute intervals.',
    'unit': 'UNITLESS'}}},
 {'fixed_parameters': {'aircraft_stand_type': 'Narrow Remote',
   'parking_duration_hours': 'variable',
   'is_overnight_parking': 'False'},
  'symbolic_variables': ['parking_duration_hours'],
  'constants_from_document': {'standard_charge_per_aircraft_stand_type': 7.7,
   'surcharges': {'48_hours_up_to_72_hours': 0, '72_hours_and_over': 0}},
  'synthesis_plan': 'The total parking charge is calculated by multiplying the standard charge per aircraft/sta

In [15]:
print('hi')

hi


In [16]:
query = "Calculate the total parking charge for a Narrow Remote stand type at West Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "west_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'west_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [22]:
result[2]

388.95055588299874

In [18]:
result[1]

{'query_parameters': [{'name': 'aircraft_stand_type',
   'status': 'KNOWN',
   'value': 'Narrow Remote'},
  {'name': 'parking_duration_hours', 'status': 'SYMBOLIC', 'value': None},
  {'name': 'is_overnight_parking', 'status': 'KNOWN', 'value': 'False'}],
 'constants_from_document': {'global_constants': [{'name': 'parking_surcharge_48_to_72_hours',
    'value': 100},
   {'name': 'parking_surcharge_72_hours_and_over', 'value': 200},
   {'name': 'overnight_parking_free_period', 'value': '2300-0600hrs'}],
  'conditional_constants': [{'name': 'standard_charge_wap_narrow_remote',
    'value': 7.7},
   {'name': 'surcharge_48_to_72_hours_wap', 'value': 100},
   {'name': 'surcharge_72_hours_and_over_wap', 'value': 200}]},
 'synthesis_plan': 'The total parking charge is calculated by first determining the standard charge per 15 minutes for the Narrow Remote stand at WAP. Then, based on the parking duration, apply the appropriate surcharge percentage. Since the duration is symbolic, we need to cr

In [23]:
computation_graph = result[0]
print(json.dumps(computation_graph, indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "LESS_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 48,
                "description": "Threshold for 48-hour surcharge.",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "BINARY_OPERATION",
                "operator": "MULTIPLY",
                "left": {
                    "type": "VARIABLE",
                    "name": "parking_duration_hours",
          

In [30]:
query = "Calculate the total parking charge for a Narrow Satellite stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

In [33]:
computation_graph = result[0]
print(json.dumps(computation_graph, indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "LESS_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 48,
                "description": "Time thresholds for surcharges",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "VALUE",
                "value": 32.9,
                "description": "Standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking",
                "unit": "EUROS"

In [31]:
result[1]

{'query_parameters': [{'name': 'aircraft_stand_type',
   'status': 'KNOWN',
   'value': 'Narrow Satellite'},
  {'name': 'parking_duration_hours', 'status': 'SYMBOLIC', 'value': None},
  {'name': 'is_overnight_parking', 'status': 'KNOWN', 'value': 'False'}],
 'constants_from_document': {'global_constants': [{'description': 'Standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking',
    'value': 32.9,
    'unit': 'EUROS'},
   {'description': 'Minimum charge for light aircraft',
    'value': 3.2,
    'unit': 'EUROS'},
   {'description': 'Surcharges for parking duration',
    'value': 100,
    'unit': 'EUROS'},
   {'description': 'Surcharges for parking duration',
    'value': 200,
    'unit': 'EUROS'},
   {'description': 'Time thresholds for surcharges',
    'value': 48,
    'unit': 'HOURS'},
   {'description': 'Time thresholds for surcharges',
    'value': 72,
    'unit': 'HOURS'}],
  'conditional_constants': []},
 'synthesis_plan': 'The total parking charge

In [32]:
result[2]

253.98508896899875

In [25]:
query = "Calculate the total parking charge for a Narrow Satellite stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

Execution time for 'build': 597.8819 seconds


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [27]:
result[1]

{'query_parameters': [{'name': 'aircraft_stand_type',
   'status': 'KNOWN',
   'value': 'Narrow Satellite'},
  {'name': 'parking_duration_hours', 'status': 'SYMBOLIC', 'value': None},
  {'name': 'is_overnight_parking', 'status': 'KNOWN', 'value': 'False'}],
 'constants_from_document': {'global_constants': [{'description': 'Standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking',
    'value': 32.9,
    'unit': 'EUROS'},
   {'description': 'Minimum charge for light aircraft',
    'value': 3.2,
    'unit': 'EUROS'},
   {'description': 'Surcharges for parking duration',
    'value': 100,
    'unit': 'PERCENT'},
   {'description': 'Surcharges for parking duration',
    'value': 200,
    'unit': 'PERCENT'},
   {'description': 'Time thresholds for surcharges',
    'value': 48,
    'unit': 'HOURS'},
   {'description': 'Time thresholds for surcharges',
    'value': 72,
    'unit': 'HOURS'}],
  'lookup_constants': [{'condition': {'aircraft_stand_type': 'Narrow Sat

In [34]:
query = "Calculate the total parking charge for a Narrow Satellite stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

Execution time for 'build': 267.1873 seconds


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [39]:
result[1]

267.1874566380284

In [50]:
print(json.dumps(json.loads(result[0]["thought1"]), indent=4))

{
    "query_parameters": [
        {
            "name": "aircraft_stand_type",
            "status": "KNOWN",
            "value": "Narrow Satellite"
        },
        {
            "name": "parking_duration_hours",
            "status": "SYMBOLIC",
            "value": null
        },
        {
            "name": "is_overnight_parking",
            "status": "KNOWN",
            "value": "False"
        }
    ]
}


In [51]:
print(json.dumps(json.loads(result[0]["thought2"]), indent=4))

{
    "constants_from_document": {
        "global_constants": [
            {
                "description": "Standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking",
                "value": 32.9,
                "unit": "EUROS"
            },
            {
                "description": "Surcharges for parking duration",
                "value": 100,
                "unit": "PERCENT"
            },
            {
                "description": "Surcharges for parking duration",
                "value": 200,
                "unit": "PERCENT"
            },
            {
                "description": "Time thresholds for surcharges",
                "value": 48,
                "unit": "HOURS"
            },
            {
                "description": "Time thresholds for surcharges",
                "value": 72,
                "unit": "HOURS"
            }
        ],
        "lookup_constants": [
            {
                "condition": {
         

In [52]:
print(json.dumps(json.loads(result[0]["thought3"]), indent=4))

{
    "synthesis_plan": "The total parking charge is calculated by first determining the standard charge for the Narrow Satellite stand at East Aerodrome Parking, which is \u20ac32.90 per 15 minutes or part thereof. Since the parking duration is a variable, we need to apply surcharges based on the duration. If the duration is between 48 and 72 hours, a 100% surcharge is applied. If it's over 72 hours, a 200% surcharge is applied. The final charge is the standard charge multiplied by (1 + surcharge percentage)."
}


In [53]:
print(json.dumps(json.loads(result[0]["thought4"]), indent=4))

{
    "rethink": "The approach correctly identifies the standard charge for the Narrow Satellite stand at East Aerodrome Parking. The surcharge logic is properly structured with conditional checks for the parking duration. The calculation formula is accurate, using the standard charge multiplied by (1 + surcharge percentage). No adjustments are needed to the approach."
}


In [54]:
print(json.dumps(json.loads(result[0]["result_graph"]), indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "MULTIPLY",
    "left": {
        "type": "BINARY_OPERATION",
        "operator": "ADD",
        "left": {
            "type": "VALUE",
            "value": 1,
            "description": "Base multiplier for standard charge",
            "unit": "UNITLESS"
        },
        "right": {
            "type": "CONDITIONAL",
            "condition": {
                "type": "COMPARISON",
                "operator": "EQUAL_TO",
                "left": {
                    "type": "VARIABLE",
                    "name": "parking_duration_hours",
                    "description": "Total duration of parking in hours",
                    "unit": "HOURS"
                },
                "right": {
                    "type": "VALUE",
                    "value": 48,
                    "description": "Time threshold for surcharge",
                    "unit": "HOURS"
                }
            },
            "if_true": {
                "

In [70]:
query = "Calculate the total parking charge for a Narrow Satellite stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

In [71]:
print(json.dumps(json.loads(result[0]["thought1"]), indent=4))

{
    "query_parameters": [
        {
            "name": "aircraft_stand_type",
            "status": "KNOWN",
            "value": "Narrow Satellite"
        },
        {
            "name": "parking_duration_hours",
            "status": "SYMBOLIC",
            "value": null
        },
        {
            "name": "is_overnight_parking",
            "status": "KNOWN",
            "value": false
        }
    ]
}


In [72]:
print(json.dumps(json.loads(result[0]["thought2"]), indent=4))

{
    "identified_constants_and_rules": [
        "The standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking (EAP) is \u20ac32.90 per 15 minutes or part thereof.",
        "Parking surcharges apply for extended periods at EAP: 48 hours up to 72 hours (including night-time) attract a surcharge of Standard rate +100%, and 72 hours and over (including night-time) attract a surcharge of Standard rate +200%.",
        "Overnight parking from 2300-0600hrs is free of charge. The query specifies no overnight parking (is_overnight_parking = false).",
        "The parking duration is a variable with unit hours, meaning the calculation must account for all possible values of parking_duration_hours."
    ]
}


In [73]:
print(json.dumps(json.loads(result[0]["thought3"]), indent=4))

{
    "synthesis_plan": "The total parking charge calculation for a Narrow Satellite stand at EAP involves: 1. Determining the base rate (\u20ac32.90 per 15 minutes or part thereof). 2. Calculating the number of 15-minute intervals in the parking duration. 3. Applying surcharges based on the duration: - If parking_duration_hours < 48: no surcharge. - If 48 \u2264 parking_duration_hours < 72: add 100% surcharge. - If parking_duration_hours \u2265 72: add 200% surcharge. 4. Since the duration is symbolic, the expression tree must include conditional logic for all possible duration ranges."
}


In [74]:
print(json.dumps(json.loads(result[0]["thought4"]), indent=4))

{
    "rethink": "The approach correctly identifies the base rate, the surcharge conditions based on duration, and the need for conditional logic due to the symbolic duration. The is_overnight_parking parameter is set to false, so no adjustment for free overnight parking is needed. The expression tree will accurately represent the calculation logic for any given parking duration."
}


In [75]:
print(json.dumps(json.loads(result[0]["result_graph"]), indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "LESS_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 48,
                "description": "Threshold for 48-hour surcharge.",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "VALUE",
                "value": 32.9,
                "description": "Standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking (EAP).",
                "unit"

In [76]:
result[1]

352.01867974799825

In [79]:



# Print the resulting expression
loaded_json = json.loads(result[0]['result_graph'])
symbolic_expr = compose_expression(loaded_json)
print(f"Sympy Expression: {symbolic_expr}")

Sympy Expression: Piecewise((32.9*(parking_duration_hours/0.25), parking_duration_hours < 48), (Piecewise(((32.9*2)*(parking_duration_hours/0.25), parking_duration_hours < 72), ((32.9*3)*(parking_duration_hours/0.25), True)), True)) + 0
