In [9]:
# %% Cell 1: Imports
from datasets import load_dataset
import json
import random
import numpy as np
from sklearn.model_selection import train_test_split


In [10]:
# %% Cell 2: Load Dataset
ds = load_dataset("2077AIDataFoundation/VeriGUI")["test"]
print("Total trajectories:", len(ds))


Total trajectories: 302


In [11]:
# %% Cell 3: Task Type Heuristic
def classify_task(text):
    text = text.lower()
    if any(k in text for k in ["filter", "select", "identify"]):
        return 0
    if any(k in text for k in ["search", "find", "look up"]):
        return 1
    if any(k in text for k in ["compare", "analyze", "extract"]):
        return 2
    return 1


In [12]:
# %% Cell 4: Complexity Score
def compute_complexity(task):
    return min(
        1.0,
        0.4 * len(task["sub_tasks"]) +
        0.3 * len(task["instruct"].split()) / 200 +
        0.3 * sum(len(st["instruct"].split()) for st in task["sub_tasks"]) / 500
    )


In [13]:
# %% Cell 5: Oracle Mapping (USED ONLY FOR REWARD)
def map_subtask_to_agent(text):
    text = text.lower()
    if any(k in text for k in ["click", "open", "navigate", "filter"]):
        return 0
    if any(k in text for k in ["preference", "personalized"]):
        return 1
    return 2


In [14]:
# %% Cell 6: Build Clean RL Tasks
clean_tasks = []

for i, task in enumerate(ds):
    oracle_seq = [map_subtask_to_agent(st["instruct"]) for st in task["sub_tasks"]]

    obs = {
        "instruction_length": len(task["instruct"].split()),
        "num_subtasks": len(task["sub_tasks"]),
        "task_type": classify_task(task["instruct"]),
        "task_complexity": compute_complexity(task),
        "agent_diversity_required": len(set(oracle_seq)) / 3
    }

    clean_tasks.append({
        "task_id": f"verigui_{i}",
        "observation": obs,
        "oracle": {"optimal_agent_sequence": oracle_seq}
    })

print("Prepared tasks:", len(clean_tasks))


Prepared tasks: 302


In [15]:
# %% Cell 7: Split
train, temp = train_test_split(clean_tasks, test_size=0.30, random_state=42)
val, test = train_test_split(temp, test_size=0.50, random_state=42)


In [16]:
# %% Cell 8: Save
with open("verigui_rl_clean.json", "w") as f:
    json.dump({"train": train, "val": val, "test": test}, f, indent=2)

print("Saved verigui_rl_clean.json")


Saved verigui_rl_clean.json


ZEFT ZEFT 