In [1]:
from plan_width import calc_width

domain_path = "test_data/PDDL/Blocksworld/domain.pddl"
problem_path = "test_data/PDDL/Blocksworld/instances/instance-1.pddl"

width = calc_width(domain_path, problem_path)

print(f"Width of the problem is: {width}")

Width of the problem is: 1


In [2]:
import os
from plan_width import calc_width

domain_path = 'test_data/PDDL/Blocksworld/domain.pddl'
instances_dir = 'test_data/PDDL/Blocksworld/instances'
widths = []
for idx, fname in enumerate(os.listdir(instances_dir)):
    if not fname.endswith('.pddl'):
        continue
    problem_path = os.path.join(instances_dir, fname)
    width = calc_width(domain_path, problem_path)
    print(f"Width of {fname} is: {width}")
    widths.append(width)
print(f"Average width of all instances is: {sum(widths) / len(widths)}")

Width of instance-1.pddl is: 1
Width of instance-10.pddl is: 2
Width of instance-2.pddl is: 2
Width of instance-3.pddl is: 2
Width of instance-4.pddl is: 2
Width of instance-5.pddl is: 1
Width of instance-6.pddl is: 2
Width of instance-7.pddl is: 2
Width of instance-8.pddl is: 3
Width of instance-9.pddl is: 3
Average width of all instances is: 2.0


In [1]:
from llm_engine import Engine
from dotenv import load_dotenv
import os
from openai import AsyncOpenAI

mode = "vllm"

if mode == "openai":
    # OpenAI client
    load_dotenv(".env")

    OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

    client_openai = AsyncOpenAI(api_key=OPENAI_API_KEY)

    model = "gpt-4o-mini"

    engine = Engine(client_openai, chat=True, model=model)

elif mode == "vllm":
    NGROK_LINK = "https://" + "fitting-shrimp-pleasantly.ngrok-free.app"

    client_vllm = AsyncOpenAI(
        api_key="None",
        base_url=str(NGROK_LINK + "/v1")
    )

    model = "Qwen/Qwen3-14B"  # "Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8"

    engine = Engine(client_vllm, chat=True, model=model)

else:
    raise ValueError("Invalid mode. Choose 'openai' or 'vllm'")

In [2]:
import logging
import sys
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(sys.stdout)], force=True)
#logger = logging.getLogger("nov_tot")")
#logger.setLevel(logging.DEBUG)
#logger.addHandler(logging.StreamHandler(sys.stdout))

In [2]:
from task_tester import TaskRunner, TaskType
from pddl_translation.translator import StandardTranslator, NatLangTranslator

context = "I am playing with a set of blocks where I need to arrange the blocks into stacks. Here are the actions I can do\n\nPick up a block\nUnstack a block from on top of another block\nPut down a block\nStack a block on top of another block\n\nI have the following restrictions on my actions:\nI can only pick up or unstack one block at a time.\nI can only pick up or unstack a block if my hand is empty.\nI can only pick up a block if the block is on the table and the block is clear. A block is clear if the block has no other blocks on top of it and if the block is not picked up.\nI can only unstack a block from on top of another block if the block I am unstacking was really on top of the other block.\nI can only unstack a block from on top of another block if the block I am unstacking is clear.\nOnce I pick up or unstack a block, I am holding the block.\nI can only put down a block that I am holding.\nI can only stack a block on top of another block if I am holding the block being stacked.\nI can only stack a block on top of another block if the block onto which I am stacking the block is clear.\nOnce I put down or stack a block, my hand becomes empty.\nOnce you stack a block on top of a second block, the second block is no longer clear."
translator = NatLangTranslator()
thinking = False
runner = TaskRunner(engine, translator, context)

In [5]:
results_novelty = await runner.run(TaskType.NOVELTY, num_tasks=50)

In [7]:
import json
import os

thinking_dir = "/thinking/" if thinking else "/non_thinking/"
results_dir = "results/" + model + thinking_dir + translator.type
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# save results to files in the results directory
with open(os.path.join(results_dir, "results_novelty.json"), "w") as f:
    json.dump([result.model_dump() for result in results_novelty], f, indent=4)

In [6]:
num_correct = sum([result.success for result in results_novelty])

# print how many tasks were correct of the total number of tasks
print(f"Novelty task results: {num_correct} out of {len(results_novelty)} tasks were correct.")

Novelty task results: 25 out of 50 tasks were correct.
