In [16]:
import torch
torch.cuda.empty_cache()

import time
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from pydantic import BaseModel, Field, RootModel
from typing import Optional, Union, Literal, ForwardRef, List, Any
from enum import Enum
from guidance import models, system, user, assistant, json as gen_json, gen
import torch, outlines
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from pydantic import BaseModel, Field, RootModel
from typing import Optional, Union
from enum import Enum
from guidance import models, system, user, assistant, json as gen_json
import guidance
from utils import timing_decorator

In [17]:

with open("../output/2025-airport-charges-terms-and-conditions/tinychargesmarkdown.md", "r") as f:
    markdown_content = f.read()


In [18]:

MODEL_ID = "Qwen/Qwen3-30B-A3B"

hf_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",               
    low_cpu_mem_usage=True,          
)


tok        = AutoTokenizer.from_pretrained(MODEL_ID)


Loading checkpoint shards:   0%|          | 0/16 [00:00<?, ?it/s]

In [4]:

model = guidance.models.Transformers(hf_model, tok)

gpustat is not installed, run `pip install gpustat` to collect GPU stats.


In [5]:
from pydantic import BaseModel, Field
from typing import List, Any

class DomainVariable(BaseModel):
    """Defines a single variable the LLM can use in the computation graph."""
    name: str = Field(..., description="The unique identifier for the variable.")
    description: str = Field(..., description="A detailed explanation of what this variable represents.")
    # Optional: You could add type hints, units, etc. for more advanced validation
    unit: Optional[str] = Field(..., description="The unit of the variable")
    data_type : type = Field(..., description="The data type of the variable")


In [6]:
ALL_VARIABLES = {
    
    'transfer_passenger_count': DomainVariable(name='transfer_passenger_count', description='Total number of transferring passengers.', unit=None, data_type=float),
    'airline_scheduling_season': DomainVariable(name='airline_scheduling_season', description='Whether summer/winter airline scheduling season.', unit=None, data_type=float),
    'takeoff_aircraft_mtow_tonnes': DomainVariable(name='takeoff_aircraft_mtow_tonnes', description='The Maximum Take-Off Weight in tonnes.', unit='tonne', data_type=float),
    'landing_aircraft_mtow_tonnes': DomainVariable(name='landing_aircraft_mtow_tonnes', description='The Maximum Landing Weight in tonnes.', unit='tonne', data_type=float),
    
    'parking_duration_hours': DomainVariable(
        name='parking_duration_hours',
        description='Total duration of parking in hours. Surcharges apply at 48 and 72 hours.',
        unit='hours',
        data_type=float
    ),
    'aircraft_stand_type': DomainVariable(
        name='aircraft_stand_type',
        description='The type of aircraft stand used for parking. E.g., "Wide Contact", "Narrow Remote", "LAP", "Long Term Remote".',
        unit=None,
        data_type=str
    ),
    'parking_location': DomainVariable(
        name='parking_location',
        description='The location of the parking stand, either "EAP" (East Aerodrome Parking) or "WAP" (West Aerodrome Parking).',
        unit=None,
        data_type=str
    ),
    'is_overnight_parking': DomainVariable(
        name='is_overnight_parking',
        description='True if the parking occurs during the free overnight period (2300-0600hrs).',
        unit=None,
        data_type=bool
    )
}
# 2. Map charge types to the variable names they are allowed to use
CHARGE_CATEGORY_VARIABLES = {
    # --- Existing Categories ---
    "transfer_passenger_charge": [
        'transfer_passenger_count', 
        'airline_scheduling_season'
    ],
    "runway_landing_charge": [
        'landing_aircraft_mtow_tonnes', 
        'airline_scheduling_season'
    ],
    "runway_takeoff_charge": [
        'takeoff_aircraft_mtow_tonnes', 
        'airline_scheduling_season'
    ],

    # --- New Categories for Parking Charges ---
    "east_aerodrome_parking_charge": [
        'parking_duration_hours',
        'aircraft_stand_type',
        'is_overnight_parking'
        # 'parking_location' is implicitly 'EAP' for this category
    ],
    "west_aerodrome_parking_charge": [
        'parking_duration_hours',
        'aircraft_stand_type',
        'is_overnight_parking'
        # 'parking_location' is implicitly 'WAP' for this category
    ]
}



In [7]:
def create_dynamic_variable_enum(charge_category: str) -> type(Enum):
    """
    Creates a new Enum class containing only the variables relevant
    to the specified charge category.
    """
    variable_names = CHARGE_CATEGORY_VARIABLES.get(charge_category)
    if not variable_names:
        raise ValueError(f"Unknown charge category: {charge_category}")
    
    # The dictionary for the Enum must have {MEMBER_NAME: value}
    # We'll use uppercase for the member name for convention.
    enum_dict = {name.upper(): name for name in variable_names}
    
    # Create the Enum class dynamically
    return Enum("Var", enum_dict)

Var = create_dynamic_variable_enum("transfer_passenger_charge")
print(Var.TRANSFER_PASSENGER_COUNT.value)

transfer_passenger_count


In [8]:
allowed_variables = [el.value for el in list(Var)]
allowed_variables

['transfer_passenger_count', 'airline_scheduling_season']

In [9]:
from pydantic import BaseModel, Field, RootModel
from typing import Union, Literal
from enum import Enum

# Separate enums for clarity and type safety
class MathOperator(str, Enum):
    ADD = "ADD"
    MULTIPLY = "MULTIPLY"
    DIVIDE = "DIVIDE"

class Comparator(str, Enum):
    GREATER_THAN = "GREATER_THAN"
    LESS_THAN = "LESS_THAN"
    EQUAL_TO = "EQUAL_TO"

class Units(str, Enum):
    HOURS = "HOURS"
    MINUTES = "MINUTES"
    EUROS = "EUROS"
    PERCENT = "PERCENT"
    UNITLESS = "UNITLESS"
    
# --- Node Definitions ---

class ValueNode(BaseModel):
    type: Literal["VALUE"] = "VALUE"
    value: float
    description: str
    unit: Units

class VariableNode(BaseModel):
    type: Literal["VARIABLE"] = "VARIABLE"
    name: str 
    description: str
    unit: Units

class BinaryOpNode(BaseModel):
    """Node for mathematical operations that produce a number."""
    type: Literal["BINARY_OPERATION"] = "BINARY_OPERATION"
    operator: MathOperator
    left: 'AnyNode'
    right: 'AnyNode'

class ComparisonNode(BaseModel):
    """Node for comparison operations that produce a boolean."""
    type: Literal["COMPARISON"] = "COMPARISON"
    operator: Comparator
    left: 'AnyNode'
    right: 'AnyNode'

class ConditionalNode(BaseModel):
    """Node for if-then-else logic."""
    type: Literal["CONDITIONAL"] = "CONDITIONAL"
    condition: ComparisonNode # Condition must be a comparison
    if_true: 'AnyNode'
    if_false: 'AnyNode'

# --- Recursive Setup ---

AnyNode = Union[
    ValueNode, 
    VariableNode, 
    BinaryOpNode, 
    ConditionalNode
]

# Use model_rebuild() to safely resolve all forward references
BinaryOpNode.model_rebuild()
ConditionalNode.model_rebuild()
ComparisonNode.model_rebuild()

class Node(RootModel):
    root: BinaryOpNode

In [10]:
from pydantic import BaseModel, Field, field_validator
from typing import List, Dict, Any, Optional
from enum import Enum


class ParameterStatus(str, Enum):
    """An enumeration for clear, explicit parameter statuses."""
    KNOWN = "KNOWN"
    SYMBOLIC = "SYMBOLIC"

class ParameterDetail(BaseModel):
    """A structured model to describe each parameter identified from the query."""
    name: str = Field(..., description="The name of the parameter.")
    status: ParameterStatus = Field(..., description="Whether the parameter's value is known from the query or is a symbolic variable.")
    value: Optional[Any] = Field(None, description="The actual value of the parameter, if its status is 'KNOWN'. Must be null if status is 'SYMBOLIC'.")


class ReasoningSchemaStep1(BaseModel):
    query_parameters: List[ParameterDetail] = Field(
        ...,
        description="A structured list of all parameters identified from the query and their status."
    )
class ReasoningSchemaStep2(BaseModel):
    """
    A simplified schema for Step 2 that captures all constants and rules as a simple list of descriptive strings.
    """
    identified_constants_and_rules: List[str] = Field(
        ...,
        description="A comprehensive list of all facts, constants, and conditional rules extracted from the document that are necessary for the final calculation. Each string in the list should be a self-contained, clear statement. For example: 'The rate for a Narrow Satellite stand is €32.90 per 15 minutes' or 'A 100% surcharge is applied if parking duration is between 48 and 72 hours'."
    )
class ReasoningSchemaStep3(BaseModel):
    synthesis_plan: str = Field(
        ...,
        description="A concise, step-by-step plan describing how the variables and constants are combined into the final computation graph."
    )
class ReasoningSchemaStep4(BaseModel):
    rethink: str = Field(
        ...,
        description="Final check to ensure the plan correctly uses variables and constants and handles all logic from the document."
    )


In [11]:
@guidance
def create_graph_with_cot(llm, allowed_variables_prompt, document, query, output_schema):
    
    with system():
        llm += f"""You are an expert system that converts textual calculation rules into structured JSON expression trees.
        You MUST think step-by-step and reason before generating the final JSON.
        
        **Reasoning Guidelines:**
        1.  **Analyze Query Parameters:** Identify all relevant parameters from the user's query. For each parameter, create a structured object specifying its 'name', its 'status' ('KNOWN' if the value is given, or 'SYMBOLIC' if it's a variable), and its 'value' (or null if symbolic). For example: `[ {{"name": "aircraft_stand_type", "status": "KNOWN", "value": "Wide Remote"}}, {{"name": "parking_duration_hours", "status": "SYMBOLIC", "value": null}} ]`
        **Allowed Variables for this Task:**
        ---
        {allowed_variables_prompt}
        ---
        2. **Identify All Relevant Information**: Review the document and extract every fact, constant, and conditional rule needed for the calculation. Each piece of information should be written as a clear, self-contained sentence and collected into a list of strings.
        3.  **Synthesize Plan:** Briefly describe how you will combine these pieces into a final expression tree.
        4. **Rethink and Finalize Approach**: Before processing with generation, rethink your progress so far and make adjustments if necessary, then finalize and proceed to generate the expression tree.

        **Crucial Rule 1:** If a parameter from the 'Allowed Variables' list is given a specific value in the query, you MUST treat it as a fixed value to find constants. You MUST NOT include it as a `VARIABLE` node in the final JSON.
        **Crucial Rule 2**: If a calculation path or value depends on the value of a symbolic variable, you MUST capture the rules for all possible values and represent this logic using CONDITIONAL nodes in the final expression tree. You MUST NOT assume a default value for the variable to simplify the logic.
        
        After writing your reasoning, you WILL generate the JSON object.


        """

    with user():
        llm += f"""
        **Document:**
        ---
        {document}
        ---

        **Query:**
        
        Based on the document, construct the computation graph for the following request:
        
        "{query}"
        
        """

    with assistant():
        llm += "I will now follow the reasoning guidelines step-by-step before generating the final JSON.\n"
        llm += "Step1. Analyze Query Parameters:\n"
        llm += gen_json(
            name="thought1", 
            schema=ReasoningSchemaStep1, 
            max_tokens=600)

        llm += "Step2. Identify All Relevant Information:\n"
        llm += gen_json(
            name="thought2", 
            schema=ReasoningSchemaStep2, 
            max_tokens=600)

        llm += "Step3. Synthesize Plan:\n"
        llm += gen_json(
            name="thought3", 
            schema=ReasoningSchemaStep3, 
            max_tokens=600)

        llm += "Step4. Rethink and Finalize Approach:\n"
        llm += gen_json(
            name="thought4", 
            schema=ReasoningSchemaStep4, 
            max_tokens=600)

        # After thinking, it generates the JSON.
        llm += "\n\nFinal JSON object:\n"
        llm += gen_json(
            name="result_graph", 
            schema=output_schema,
            max_tokens=2000 
        )
        
    return llm

In [12]:

class ComputationGraphBuilder:
    """
    Orchestrates the creation of a computation graph by preparing dynamic
    constraints and prompting the LLM.
    """
    
    def __init__(self, model):
        """
        Initializes the builder with a guidance model.
        """
        self.model = model
        # Set the default LLM for all guidance programs
        # guidance.llm = self.model
    @timing_decorator
    def build(self, document_content: str, query: str, charge_category: str) -> dict:
        """
        Generates a computation graph for a given query and document.

        Args:
            document_content: The text containing the rules.
            query: A natural language question about what to calculate.
            charge_category: The specific charge context used to filter variables.

        Returns:
            A dictionary representing the computation graph or an error.
        """
        print(f"--- Building graph for charge category: '{charge_category}' ---")
        
        # 1. Dynamically create the filtered Enum for this specific task
        try:
            Var = create_dynamic_variable_enum(charge_category)
        except ValueError as e:
            print(f"Error: {e}")
            return {"error": str(e)}

        # 3. Create a formatted prompt string of allowed variables for the LLM
        allowed_variables = [el.value for el in list(Var)]
        allowed_variables_prompt = "\n".join(
            [f"- **{v.name}**: {v.description}" for name, v in ALL_VARIABLES.items() if name in allowed_variables]
        )

        try:
            # 4. Execute the guidance program with all dynamic components
            result_lm = self.model + create_graph_with_cot(
                allowed_variables_prompt=allowed_variables_prompt,
                document=document_content,
                query=query,
                output_schema=Node
            )
            
            
            # print("\nSuccessfully generated graph:")
            # # Use model_dump_json for Pydantic v2
            # print(pydantic_graph.model_dump_json(indent=2)) 
            return result_lm
            
        except Exception as e:
            print(f"\nAn error occurred while building the graph for '{query}': {e}")
            return {"error": str(e)}

In [13]:
import sympy
from sympy import Symbol, Piecewise, sympify, Add, Mul, Pow

def compose_expression(node: dict):
    """
    Recursively parses a JSON graph into a SymPy expression,
    preventing automatic simplification.

    Args:
        node: A dictionary representing a node in the computation graph.

    Returns:
        A non-evaluated sympy expression representing the computation.
    """

    node_type = node.get('type')

    if node_type == "VALUE":
        return sympify(node['value'])

    elif node_type == "VARIABLE":
        return Symbol(node['name'])

    elif node_type == "BINARY_OPERATION":
        left = compose_expression(node['left'])
        right = compose_expression(node['right'])
        operator = node['operator']
        
        # Use class constructors with evaluate=False to prevent simplification
        if operator == "ADD":
            return Add(left, right, evaluate=False)
        elif operator == "MULTIPLY":
            return Mul(left, right, evaluate=False)
        elif operator == "DIVIDE":
            # Division (a/b) is represented as a * (b**-1)
            power = Pow(right, -1, evaluate=False)
            return Mul(left, power, evaluate=False)
        else:
            raise ValueError(f"Unsupported binary operator: {operator}")

    elif node_type == "COMPARISON":
        left = compose_expression(node['left'])
        right = compose_expression(node['right'])
        operator = node['operator']

        if operator == "GREATER_THAN":
            return left > right
        if operator == "LESS_THAN":
            return left < right
        else:
            raise ValueError(f"Unsupported comparison operator: {operator}")

    elif node_type == "CONDITIONAL":
        condition = compose_expression(node['condition'])
        if_true_expr = compose_expression(node['if_true'])
        if_false_expr = compose_expression(node['if_false'])
        
        return Piecewise((if_true_expr, condition), (if_false_expr, True))

    else:
        raise ValueError(f"Unknown node type: {node_type}")

In [14]:
import json
import traceback


def create_computation_graph(model, query, charge_category):
    graph_builder = ComputationGraphBuilder(model=model)

    start_time = time.perf_counter()
    llm_structured_response = graph_builder.build(
        document_content=markdown_content,
        query=query,
        charge_category=charge_category
    )
    end_time = time.perf_counter()
    build_time = end_time - start_time

    return llm_structured_response, build_time
    

In [46]:
# from sympy import Symbol

# # Define the symbol that you want to replace
# variable_to_replace = Symbol('parking_duration_hours')

# # Define the value you want to substitute
# value_to_substitute = 60

# final_charge_cost = symbolic_expr.subs(variable_to_replace, value_to_substitute)
# print(f"The parking cost for {value_to_substitute} hours is: {final_charge_cost} EUR") 

The parking cost for 60 hours is: 900 EUR


In [70]:
query = "Calculate the total parking charge for a Narrow Satellite stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

In [71]:
print(json.dumps(json.loads(result[0]["thought1"]), indent=4))

{
    "query_parameters": [
        {
            "name": "aircraft_stand_type",
            "status": "KNOWN",
            "value": "Narrow Satellite"
        },
        {
            "name": "parking_duration_hours",
            "status": "SYMBOLIC",
            "value": null
        },
        {
            "name": "is_overnight_parking",
            "status": "KNOWN",
            "value": false
        }
    ]
}


In [72]:
print(json.dumps(json.loads(result[0]["thought2"]), indent=4))

{
    "identified_constants_and_rules": [
        "The standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking (EAP) is \u20ac32.90 per 15 minutes or part thereof.",
        "Parking surcharges apply for extended periods at EAP: 48 hours up to 72 hours (including night-time) attract a surcharge of Standard rate +100%, and 72 hours and over (including night-time) attract a surcharge of Standard rate +200%.",
        "Overnight parking from 2300-0600hrs is free of charge. The query specifies no overnight parking (is_overnight_parking = false).",
        "The parking duration is a variable with unit hours, meaning the calculation must account for all possible values of parking_duration_hours."
    ]
}


In [73]:
print(json.dumps(json.loads(result[0]["thought3"]), indent=4))

{
    "synthesis_plan": "The total parking charge calculation for a Narrow Satellite stand at EAP involves: 1. Determining the base rate (\u20ac32.90 per 15 minutes or part thereof). 2. Calculating the number of 15-minute intervals in the parking duration. 3. Applying surcharges based on the duration: - If parking_duration_hours < 48: no surcharge. - If 48 \u2264 parking_duration_hours < 72: add 100% surcharge. - If parking_duration_hours \u2265 72: add 200% surcharge. 4. Since the duration is symbolic, the expression tree must include conditional logic for all possible duration ranges."
}


In [74]:
print(json.dumps(json.loads(result[0]["thought4"]), indent=4))

{
    "rethink": "The approach correctly identifies the base rate, the surcharge conditions based on duration, and the need for conditional logic due to the symbolic duration. The is_overnight_parking parameter is set to false, so no adjustment for free overnight parking is needed. The expression tree will accurately represent the calculation logic for any given parking duration."
}


In [75]:
print(json.dumps(json.loads(result[0]["result_graph"]), indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "LESS_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 48,
                "description": "Threshold for 48-hour surcharge.",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "VALUE",
                "value": 32.9,
                "description": "Standard charge per aircraft/stand type for Narrow Satellite at East Aerodrome Parking (EAP).",
                "unit"

In [76]:
result[1]

352.01867974799825

In [79]:



# Print the resulting expression
loaded_json = json.loads(result[0]['result_graph'])
symbolic_expr = compose_expression(loaded_json)
print(f"Sympy Expression: {symbolic_expr}")

Sympy Expression: Piecewise((32.9*(parking_duration_hours/0.25), parking_duration_hours < 48), (Piecewise(((32.9*2)*(parking_duration_hours/0.25), parking_duration_hours < 72), ((32.9*3)*(parking_duration_hours/0.25), True)), True)) + 0



# Changes: 
- Tried on a new problem

# Observations: 
- The reasoning traces are decent, except that I may need to add a ceil function to account for cases when one may need to round up.
- The graph produced is not perfect. It makes a potential mistake in computing surcharge, however, at this point, it is not clear whether this is a mistake as the document is quite confusing by itself. It does not clarify whether the surcharge is applicable for Long Term Remote, and whether that is on a per day basis.
- The left hand side branch does not use the variable parking hours to compute the surcharge even in cases where the hours are > 24. One potential reason for this could be that I mentioned there is no overnight parking, which may have led it to assume that the hours would be less than 24.
- The overnight parking charges also have ambiguity in their application. It is not clear, whether one has to discount the free period from the computed total fees or whether the surcharges are still applicable during that period, or it could be that aircrafts which strictly use the airport services starting and ending strictly during this period are exempt from any fees? 

# Discussion:
- More testing is required to understand where the model may be making mistakes.
- Larger models may perform better, as we currently only use the humble 32B A3B parameter MoE model.

# Next Steps:
- Continue testing

In [16]:
query = "Calculate the total parking charge for a Long Term Remote stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

Execution time for 'build': 213.9923 seconds


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [17]:
print(json.dumps(json.loads(result[0]["thought1"]), indent=4))

{
    "query_parameters": [
        {
            "name": "aircraft_stand_type",
            "status": "KNOWN",
            "value": "Long Term Remote"
        },
        {
            "name": "parking_duration_hours",
            "status": "SYMBOLIC",
            "value": null
        },
        {
            "name": "is_overnight_parking",
            "status": "KNOWN",
            "value": false
        }
    ]
}


In [18]:
print(json.dumps(json.loads(result[0]["thought2"]), indent=4))

{
    "identified_constants_and_rules": [
        "The standard charge per aircraft/stand type for Long Term Remote at East Aerodrome Parking (EAP) is \u20ac234.50 per day or part thereof.",
        "Aircraft parking for extended periods in EAP attract surcharges based on duration: 48-72 hours (including night-time) is standard rate +100%, 72 hours and over is standard rate +200%.",
        "Overnight parking from 2300-0600hrs is free of charge.",
        "The parking duration is a variable with unit hours.",
        "The parking duration is not during overnight hours (is_overnight_parking = false).",
        "The parking is at East Aerodrome Parking (EAP).",
        "The stand type is Long Term Remote."
    ]
}


In [19]:
print(json.dumps(json.loads(result[0]["thought3"]), indent=4))

{
    "synthesis_plan": "The total parking charge calculation will start with the standard rate for Long Term Remote at EAP (\u20ac234.50/day). Since the duration is variable, we need to create conditional branches based on the duration in hours. The surcharge percentages depend on whether the duration is between 48-72 hours or over 72 hours. We'll convert the duration from hours to days (considering part thereof) and apply the appropriate surcharge. Since it's not overnight parking, we don't need to adjust for free night-time periods."
}


In [20]:
print(json.dumps(json.loads(result[0]["thought4"]), indent=4))

{
    "rethink": "The approach correctly identifies the standard rate and the surcharge conditions based on duration. The conversion from hours to days needs to be handled as 'per day or part thereof' which means any fraction of a day counts as a full day. The surcharge conditions are correctly identified as dependent on the duration. The is_overnight_parking parameter is correctly set to false so no adjustments for free overnight periods are needed."
}


In [21]:
print(json.dumps(json.loads(result[0]["result_graph"]), indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "GREATER_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 72,
                "description": "Threshold for 200% surcharge",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "VALUE",
                "value": 234.5,
                "description": "Standard charge per day for Long Term Remote at East Aerodrome Parking",
                "unit": "EUROS"
            }


# Changes: 
- Tried on a new problem

# Observations: 

# Discussion:

# Next Steps:


In [22]:
query = "Calculate the total parking charge for a Wide Satellite stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

Execution time for 'build': 298.1813 seconds


In [23]:
print(json.dumps(json.loads(result[0]["thought1"]), indent=4))

{
    "query_parameters": [
        {
            "name": "aircraft_stand_type",
            "status": "KNOWN",
            "value": "Wide Satellite"
        },
        {
            "name": "parking_duration_hours",
            "status": "SYMBOLIC",
            "value": null
        },
        {
            "name": "is_overnight_parking",
            "status": "KNOWN",
            "value": false
        }
    ]
}


In [24]:
print(json.dumps(json.loads(result[0]["thought2"]), indent=4))

{
    "identified_constants_and_rules": [
        "The standard charge per aircraft/stand type for Wide Satellite at East Aerodrome Parking (EAP) is \u20ac42.00 per 15 minutes or part thereof.",
        "Parking surcharges apply for extended periods at EAP: 48 hours up to 72 hours (including night-time) attract a surcharge of Standard rate +100%, and 72 hours and over (including night-time) attract Standard rate +200%.",
        "Overnight parking from 2300-0600hrs is free of charge, but the query specifies no overnight parking (is_overnight_parking = false).",
        "The parking duration is a variable with unit hours, meaning the calculation must account for all possible values of parking_duration_hours.",
        "The parking charge is calculated based on the number of 15-minute intervals in the parking duration, with each interval charged at the standard rate or surcharged rate depending on duration.",
        "The minimum charge for light aircraft is \u20ac3.20, but this does not

In [25]:
print(json.dumps(json.loads(result[0]["thought3"]), indent=4))

{
    "synthesis_plan": "The total parking charge for a Wide Satellite stand at EAP depends on the parking duration. The base rate is \u20ac42.00 per 15 minutes or part thereof. For durations between 48 and 72 hours, the rate is doubled (100% surcharge), and for durations over 72 hours, the rate is tripled (200% surcharge). Since the parking duration is symbolic, the calculation must include conditional logic to handle all possible values of parking_duration_hours. The formula will calculate the number of 15-minute intervals, apply the appropriate rate based on duration, and sum the charges."
}


In [26]:
print(json.dumps(json.loads(result[0]["thought4"]), indent=4))

{
    "rethink": "The approach correctly identifies the base rate, surcharge conditions, and the need for conditional logic due to the symbolic parking duration. The calculation must account for the number of 15-minute intervals, apply surcharges based on duration, and handle all possible values of parking_duration_hours. The final expression tree will include conditional nodes for the surcharge tiers and a calculation for the total charge."
}


In [27]:
print(json.dumps(json.loads(result[0]["result_graph"]), indent=4))

{
    "type": "BINARY_OPERATION",
    "operator": "ADD",
    "left": {
        "type": "CONDITIONAL",
        "condition": {
            "type": "COMPARISON",
            "operator": "LESS_THAN",
            "left": {
                "type": "VARIABLE",
                "name": "parking_duration_hours",
                "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.",
                "unit": "HOURS"
            },
            "right": {
                "type": "VALUE",
                "value": 48,
                "description": "Threshold for 100% surcharge on parking duration.",
                "unit": "HOURS"
            }
        },
        "if_true": {
            "type": "BINARY_OPERATION",
            "operator": "MULTIPLY",
            "left": {
                "type": "BINARY_OPERATION",
                "operator": "DIVIDE",
                "left": {
                    "type": "VARIABLE",
                    "name": "parking_duration_hou

In [None]:
query = "Calculate the total parking charge for a Light Aircraft Parking (LAP) stand type at East Aerodrome Parking with no overnight parking. The parking duration is a variable with unit hours."
charge_category = "east_aerodrome_parking_charge"

result = create_computation_graph(model, query, charge_category)


--- Building graph for charge category: 'east_aerodrome_parking_charge' ---


StitchWidget(initial_height='auto', initial_width='100%', srcdoc='<!doctype html>\n<html lang="en">\n<head>\n …

Execution time for 'build': 839.8929 seconds


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [20]:
print(json.dumps(json.loads(result[0]["thought1"]), indent=4))

{
    "query_parameters": [
        {
            "name": "aircraft_stand_type",
            "status": "KNOWN",
            "value": "Light Aircraft Parking (LAP)"
        },
        {
            "name": "parking_duration_hours",
            "status": "SYMBOLIC",
            "value": null
        },
        {
            "name": "is_overnight_parking",
            "status": "KNOWN",
            "value": false
        }
    ]
}


In [21]:
print(json.dumps(json.loads(result[0]["thought2"]), indent=4))

{
    "identified_constants_and_rules": [
        "The parking charge for Light Aircraft Parking (LAP) at East Aerodrome Parking (EAP) is \u20ac3.50 per 15 minutes or part thereof.",
        "The parking duration is a variable with unit hours.",
        "Overnight parking from 2300-0600hrs is free of charge, but the query specifies no overnight parking.",
        "Aircraft parking for extended periods in EAP attract surcharges: 48 hours up to 72 hours (including night-time) has a surcharge of Standard rate +100%, and 72 hours and over has Standard rate +200%.",
        "The parking charge is calculated from the actual time of arrival to the actual time of departure minus 30 minutes.",
        "The minimum charge of \u20ac3.20 applies for light aircraft.",
        "The parking duration is a variable, so we need to account for all possible values."
    ]
}


In [22]:
print(json.dumps(json.loads(result[0]["thought3"]), indent=4))

{
    "synthesis_plan": "The total parking charge for LAP at EAP is calculated by first determining the base rate of \u20ac3.50 per 15 minutes or part thereof. Since the parking duration is a variable, we need to calculate the number of 15-minute intervals. However, we must also consider the surcharges for durations over 48 and 72 hours. The calculation should include a conditional check for the duration: if it's less than 48 hours, use the base rate; if between 48-72 hours, apply 100% surcharge; if over 72 hours, apply 200% surcharge. Additionally, the minimum charge of \u20ac3.20 must be considered, but since the base rate is already higher than this, it may not affect the calculation. The overnight parking is not applicable here as per the query."
}


In [23]:
print(json.dumps(json.loads(result[0]["thought4"]), indent=4))

{
    "rethink": "The approach is correct, but I need to ensure that the surcharge conditions are properly represented as conditional nodes in the expression tree. Also, the calculation of 15-minute intervals needs to be accurately represented. The minimum charge of \u20ac3.20 is lower than the base rate, so it should be considered as a minimum floor in the calculation."
}


In [25]:
result[0]["result_graph"]

'{"type": "BINARY_OPERATION", "operator": "ADD", "left": {"type": "CONDITIONAL", "condition": {"type": "COMPARISON", "operator": "GREATER_THAN", "left": {"type": "VARIABLE", "name": "parking_duration_hours", "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.", "unit": "HOURS"}, "right": {"type": "VALUE", "value": 72, "description": "Threshold for 200% surcharge on parking duration.", "unit": "HOURS"}}, "if_true": {"type": "BINARY_OPERATION", "operator": "MULTIPLY", "left": {"type": "VALUE", "value": 3.50, "description": "Standard charge per 15 minutes or part thereof for Light Aircraft Parking (LAP) at East Aerodrome Parking (EAP).", "unit": "EUROS"}, "right": {"type": "BINARY_OPERATION", "operator": "DIVIDE", "left": {"type": "VARIABLE", "name": "parking_duration_hours", "description": "Total duration of parking in hours. Surcharges apply at 48 and 72 hours.", "unit": "HOURS"}, "right": {"type": "VALUE", "value": 0.25, "description": "Conversion f