In [1]:
from datasets import load_dataset

def load_verigui():
    ds_dict = load_dataset("2077AIDataFoundation/VeriGUI")
    
    # VeriGUI only contains split "test"
    ds = ds_dict["test"]

    print(f"Loaded VeriGUI with {len(ds)} tasks.")
    return ds

ds = load_verigui()
ds[0]


  from .autonotebook import tqdm as notebook_tqdm


Loaded VeriGUI with 302 tasks.


{'folder': '675',
 'instruct': 'Find the Italian film that won the Grand Prix du Jury at the 1989 Cannes Film Festival, and was written and directed by Giuseppe Tornatore. For this film, please provide the following information: the name of the actor who played the blind projectionist Alfredo and won the BAFTA Award for Best Actor for this role; a famous quote about "life" spoken by this actor in the film; the specific award jointly received by the film\'s composer and his son; the duration of the film\'s longest version; the film\'s initial release date in Italy; the name of the designer who won the César Award for Best Poster; and the film\'s box office gross in the United States and Canada.',
 'result': ["film's title:Cinema Paradiso",
  "the actor's name:Philippe Noiret",
  'line spoken:Life isn’t like in the movies. Life… is much harder',
  'the names of the composer and his son:Ennio Morricone,Andrea Morricone,specific award:BAFTA Award for For Best Original Film Music',
  'durat

In [2]:
import json

def prepare_rl_transitions(ds):
    transitions = []

    for item in ds:
        task_instruction = item["instruct"]
        subtasks = item["sub_tasks"]

        for i, st in enumerate(subtasks):

            # --- VISIBLE STATE (no labels, no future subtasks)
            state = {
                "task_instruction": task_instruction,
                "completed_subtasks": [
                    {
                        "instruction": s["instruct"]
                    }
                    for s in subtasks[:i]   # no results here
                ],
                "current_subtask_instruction": st["instruct"]
            }

            # --- HIDDEN TARGET (not shown to model during input)
            gold_output = st["result"]

            transitions.append({
                "state": state,
                "gold_output": gold_output,     # used internally during reward computation only
                "reward": None,                 # RL will fill this later
                "done": (i == len(subtasks) - 1)
            })

    print(f"Prepared {len(transitions)} RL transitions (safe, masked).")
    return transitions

rl_data = prepare_rl_transitions(ds)
rl_data[:2]


Prepared 1300 RL transitions (safe, masked).


[{'state': {'task_instruction': 'Find the Italian film that won the Grand Prix du Jury at the 1989 Cannes Film Festival, and was written and directed by Giuseppe Tornatore. For this film, please provide the following information: the name of the actor who played the blind projectionist Alfredo and won the BAFTA Award for Best Actor for this role; a famous quote about "life" spoken by this actor in the film; the specific award jointly received by the film\'s composer and his son; the duration of the film\'s longest version; the film\'s initial release date in Italy; the name of the designer who won the César Award for Best Poster; and the film\'s box office gross in the United States and Canada.',
   'completed_subtasks': [],
   'current_subtask_instruction': "Collect a list of films that won the Grand Prix du Jury at the 1989 Cannes Film Festival, recording each film's title, director, screenwriter, and country of production."},
  'gold_output': ['Cinema Paradiso,Giuseppe Tornatore,Giu

In [4]:
import os
import json

OUTPUT_FILE = "backend/agents/RL/data/verigui_coordinator_rl2.json"

# Ensure directory exists
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    json.dump(rl_data, f, indent=2, ensure_ascii=False)

print("Saved preprocessed RL dataset to:", OUTPUT_FILE)


Saved preprocessed RL dataset to: backend/agents/RL/data/verigui_coordinator_rl2.json
