In [19]:
import pandas as pd

In [20]:
PATH = "gs://induction-labs/evals/checkpoints/merged_step_-1_uUhioFvR_step_868_Nik8u94a/2025-08-04T04-31-39/osworld_eval_8yv5xv_j"
DATA_FILE = f"{PATH}/samples.jsonl"

In [3]:
# fix samples
ORIGINAL_TRAJECTORIES = pd.read_json(DATA_FILE, lines=True)
FIXED_TRAJECTORIES = ORIGINAL_TRAJECTORIES.copy()
FIXED_TRAJECTORIES["eval_task_id"] = FIXED_TRAJECTORIES["eval_task_id"].str.replace(r"-2$", "", regex=True)
FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[(FIXED_TRAJECTORIES["reward"] == 0) | (FIXED_TRAJECTORIES["reward"] == 1)]

In [4]:
from google.cloud import storage
import asyncio
import json
import re

_GS_RE = re.compile(r"^gs://([^/]+)/(.+)$")
def load_turns_gcs(gs_uri: str):
    """
    Reads a JSON array from Google Cloud Storage and returns
    [{"image": ..., "text": ...}, ...] minus the last row.
    """
    m = _GS_RE.match(gs_uri)
    if not m:
        raise ValueError(f"Not a valid gs:// URI: {gs_uri}")
    bucket_name, blob_name = m.groups()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    # download the whole object as one string
    data_str = blob.download_as_text()

    records = json.loads(data_str)  # list-of-dicts
    return [
        {"image": r["image"], "text": r["text"]} for r in records[:-1]
    ]  # mimic .iloc[0:-1]

async def load_turns_gcs_async(semaphore: asyncio.Semaphore, gs_uri: str):
    """
    Asynchronous version of load_turns_gcs.
    """
    async with semaphore:
        return await asyncio.to_thread(load_turns_gcs, gs_uri)

In [5]:
legacy = False
if legacy:
    semaphore = asyncio.Semaphore(64)  # limit concurrent loads
    async def check_if_fail(semaphore, attempt_id):
        try:
            return (await load_turns_gcs_async(semaphore, f"{PATH}/metadata/{attempt_id}.json"))[-1]["text"] is None
        except:
            return True

    failed = await asyncio.gather(*[
        check_if_fail(semaphore, row["attempt_id"])
        for _i, row in FIXED_TRAJECTORIES.iterrows()
    ])

In [6]:
if legacy:
    FIXED_TRAJECTORIES["failed"] = failed
    FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[~FIXED_TRAJECTORIES["failed"]]

In [7]:
FIXED_TRAJECTORIES.to_json(f"{PATH}/samples_fixed.jsonl", orient="records", lines=True)

In [21]:
len(FIXED_TRAJECTORIES)

749

In [18]:
len(ORIGINAL_TRAJECTORIES)

1290

In [13]:
# FIXED_TRAJECTORIES = pd.read_json(f"{PATH}/samples_fixed.jsonl", lines=True)
import pandas as pd
PATH = "gs://induction-labs/passive_data/2025-08-14/reprocess_all_no_loading-04-30-15/".rstrip("/")
# PREFIX = "hard"
FIXED_TRAJECTORIES = pd.read_json(f"{PATH}/samples.jsonl", lines=True)
FIXED_TRAJECTORIES['NUM_HOTKEYS'] = FIXED_TRAJECTORIES['actions'].apply(lambda x: sum(k.count('hotkey(key=') for k in x))
print(FIXED_TRAJECTORIES['NUM_HOTKEYS'].describe())
FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[FIXED_TRAJECTORIES['NUM_HOTKEYS'] <= 5]
FIXED_TRAJECTORIES
# FIXED_TRAJECTORIES['actions'].iloc[0][0]
# FIXED_TRAJECTORIES = FIXED_TRAJECTORIES.drop(columns=["thinking", "actions"])

count    677.000000
mean       0.850812
std        1.246256
min        0.000000
25%        0.000000
50%        0.000000
75%        2.000000
max        4.000000
Name: NUM_HOTKEYS, dtype: float64


Unnamed: 0,attempt_id,eval_task_id,actions,thinking,instruction,trajectory_length,source_dir,image_turns_start,image_turns_end,text_turns_start,text_turns_end,unmask_last_only,NUM_HOTKEYS
0,3RPNEpUpBt8B,3RPNEpUpBt8B,"[click(start_box='(687,0)'), click(start_box='...","[The deals list is still in the foreground, an...","On the YC Bookface website, navigate to the De...",6,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
1,ysBGeJaIcIg0,ysBGeJaIcIg0,"[click(start_box='(762,0)'), click(start_box='...",[The browser is still on the ChatGPT share pag...,"1. In Chrome, switch from the ChatGPT tab to t...",6,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
2,WM1VRv1ZGWA0,WM1VRv1ZGWA0,"[click(start_box='(1559,227)'), scroll(directi...",[The GPU.net deal page is open; pricing detail...,"1. In your browser, open bookface.ycombinator....",6,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
3,4NhwPtQL8ePo,4NhwPtQL8ePo,"[click(start_box='(657,629)'), hotkey(key='z',...","[The Notion page shows the Outbound list, and ...",Open Notion and go to the page titled “Q2-25 R...,6,gs://induction-labs-data-ext/action_capture/ar...,0,5,0,5,False,2
4,T34h7cPwrGwd,T34h7cPwrGwd,"[click(start_box='(569,0)'), click(start_box='...","[The SFCompute “Getting started” doc is open, ...","In Google Chrome, review the documentation on ...",6,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
672,SBYyzRpCFX2T,SBYyzRpCFX2T,"[type(content=''), click(start_box='(1004,918...","[The Notion page is still open, and the cursor...",Research current H100 GPU pricing for alternat...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
673,v6D59d7Dpymt,v6D59d7Dpymt,"[type(content='\n'), hotkey(key='t', modifiers...",[The diff is ready and the integrated terminal...,Open Visual Studio Code (with Vim key-bindings...,11,gs://induction-labs-data-ext/action_capture/je...,0,10,0,10,False,2
674,6jVs1SHFLU1B,6jVs1SHFLU1B,"[hotkey(key='a', modifiers='ctrl'), click(star...",[LibreOffice Writer is open with the file disp...,"Open the file ""loa-one-time-submission-sealand...",11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,1
675,HahLlXsiEkeV,HahLlXsiEkeV,"[type(content='Time'), click(start_box='(988,4...",[The Options window is open to “Basic Fonts (W...,Open LibreOffice Writer and change the default...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0


In [14]:
FIXED_TRAJECTORIES['trajectory_length'] = FIXED_TRAJECTORIES['trajectory_length'] - 1

In [29]:
FIXED_TRAJECTORIES

Unnamed: 0,attempt_id,eval_task_id,actions,thinking,instruction,trajectory_length,source_dir,image_turns_start,image_turns_end,text_turns_start,text_turns_end,unmask_last_only,NUM_HOTKEYS
0,3RPNEpUpBt8B,3RPNEpUpBt8B,"[click(start_box='(687,0)'), click(start_box='...","[The deals list is still in the foreground, an...","On the YC Bookface website, navigate to the De...",5,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
1,ysBGeJaIcIg0,ysBGeJaIcIg0,"[click(start_box='(762,0)'), click(start_box='...",[The browser is still on the ChatGPT share pag...,"1. In Chrome, switch from the ChatGPT tab to t...",5,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
2,WM1VRv1ZGWA0,WM1VRv1ZGWA0,"[click(start_box='(1559,227)'), scroll(directi...",[The GPU.net deal page is open; pricing detail...,"1. In your browser, open bookface.ycombinator....",5,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
3,4NhwPtQL8ePo,4NhwPtQL8ePo,"[click(start_box='(657,629)'), hotkey(key='z',...","[The Notion page shows the Outbound list, and ...",Open Notion and go to the page titled “Q2-25 R...,5,gs://induction-labs-data-ext/action_capture/ar...,0,5,0,5,False,2
4,T34h7cPwrGwd,T34h7cPwrGwd,"[click(start_box='(569,0)'), click(start_box='...","[The SFCompute “Getting started” doc is open, ...","In Google Chrome, review the documentation on ...",5,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
672,SBYyzRpCFX2T,SBYyzRpCFX2T,"[type(content=''), click(start_box='(1004,918...","[The Notion page is still open, and the cursor...",Research current H100 GPU pricing for alternat...,10,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
673,v6D59d7Dpymt,v6D59d7Dpymt,"[type(content='\n'), hotkey(key='t', modifiers...",[The diff is ready and the integrated terminal...,Open Visual Studio Code (with Vim key-bindings...,10,gs://induction-labs-data-ext/action_capture/je...,0,10,0,10,False,2
674,6jVs1SHFLU1B,6jVs1SHFLU1B,"[hotkey(key='a', modifiers='ctrl'), click(star...",[LibreOffice Writer is open with the file disp...,"Open the file ""loa-one-time-submission-sealand...",10,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,1
675,HahLlXsiEkeV,HahLlXsiEkeV,"[type(content='Time'), click(start_box='(988,4...",[The Options window is open to “Basic Fonts (W...,Open LibreOffice Writer and change the default...,10,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0


In [18]:
import pandas as pd
import numpy as np
from numpy.random import default_rng

rng = default_rng(42)  # for reproducibility
def expand_with_sliding_windows(df: pd.DataFrame, width: int,
                                length_col: str = "trajectory_length") -> pd.DataFrame:
    """
    For every row in *df*, create (trajectory_length – width + 1) copies,
    one for every sliding window of size *width*.
    Adds two new columns:  `window_start` and `window_end` (inclusive).
    Rows whose `trajectory_length` < width are silently dropped.

    Parameters
    ----------
    df : pd.DataFrame                 Original data.
    width : int                       Size of the sliding window.
    length_col : str, default 'trajectory_length'
                                      Column holding the total length.

    Returns
    -------
    pd.DataFrame                      Expanded frame.
    """
    if width <= 0:
        raise ValueError("width must be a positive integer")

    # how many windows each row will produce
    n_windows = df[length_col] - width + 1

    # keep only rows where at least one window fits
    valid_mask = n_windows > 0
    df_valid   = df[valid_mask].copy()
    n_windows  = n_windows[valid_mask]

    # repeat the rows the right number of times
    expanded = df_valid.loc[df_valid.index.repeat(n_windows)].reset_index(drop=True)
    # rng = np.random.default_rng(42)

    # build the window start indices in one NumPy shot
    starts = np.concatenate([np.arange(k) for k in n_windows])



    expanded["image_turns_end"]   = starts + width        # inclusive upper bound
    expanded["text_turns_end"]   = starts + width
    expanded["rand_turns_start"] = rng.integers(0, (expanded["image_turns_end"] -1).to_numpy(), endpoint=True)

    # expanded["text_turns_start"] = 0
    # expanded["image_turns_start"] = starts

    expanded["text_turns_start"] = np.maximum(0, expanded["rand_turns_start"])
    expanded["image_turns_start"] = np.maximum(starts, expanded["rand_turns_start"])

    expanded["unmask_last_only"] = None
    expanded["unmask_last_n_turns"] = -1
    expanded["unmask_last_n_turns"] = expanded["image_turns_end"] - expanded["image_turns_start"]

    return expanded

In [19]:
CORRECT_TRAJECTORIES_EXPANDED = FIXED_TRAJECTORIES.copy()
CORRECT_TRAJECTORIES_EXPANDED = CORRECT_TRAJECTORIES_EXPANDED #[CORRECT_TRAJECTORIES_EXPANDED["reward"] == 1]
CORRECT_TRAJECTORIES_EXPANDED = expand_with_sliding_windows(
    CORRECT_TRAJECTORIES_EXPANDED,
    width=5,
    length_col="trajectory_length"
)
# num = 50
# CORRECT_TRAJECTORIES_EXPANDED = CORRECT_TRAJECTORIES_EXPANDED[CORRECT_TRAJECTORIES_EXPANDED["text_turns_end"] <= num]
CORRECT_TRAJECTORIES_EXPANDED

Unnamed: 0,attempt_id,eval_task_id,actions,thinking,instruction,trajectory_length,source_dir,image_turns_start,image_turns_end,text_turns_start,text_turns_end,unmask_last_only,NUM_HOTKEYS,rand_turns_start,unmask_last_n_turns
0,3RPNEpUpBt8B,3RPNEpUpBt8B,"[click(start_box='(687,0)'), click(start_box='...","[The deals list is still in the foreground, an...","On the YC Bookface website, navigate to the De...",5,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,,0,0,5
1,ysBGeJaIcIg0,ysBGeJaIcIg0,"[click(start_box='(762,0)'), click(start_box='...",[The browser is still on the ChatGPT share pag...,"1. In Chrome, switch from the ChatGPT tab to t...",5,gs://induction-labs-data-ext/action_capture/jo...,3,5,3,5,,0,3,2
2,WM1VRv1ZGWA0,WM1VRv1ZGWA0,"[click(start_box='(1559,227)'), scroll(directi...",[The GPU.net deal page is open; pricing detail...,"1. In your browser, open bookface.ycombinator....",5,gs://induction-labs-data-ext/action_capture/jo...,3,5,3,5,,0,3,2
3,4NhwPtQL8ePo,4NhwPtQL8ePo,"[click(start_box='(657,629)'), hotkey(key='z',...","[The Notion page shows the Outbound list, and ...",Open Notion and go to the page titled “Q2-25 R...,5,gs://induction-labs-data-ext/action_capture/ar...,2,5,2,5,,2,2,3
4,T34h7cPwrGwd,T34h7cPwrGwd,"[click(start_box='(569,0)'), click(start_box='...","[The SFCompute “Getting started” doc is open, ...","In Google Chrome, review the documentation on ...",5,gs://induction-labs-data-ext/action_capture/jo...,2,5,2,5,,0,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3393,ixVnKchUDibi,ixVnKchUDibi,"[type(content='lxjjjo}'), type(content='kkkko'...",[Just under the Report Infeasible </button> I ...,Open VS Code and load the file `frontend/src/a...,10,gs://induction-labs-data-ext/action_capture/jo...,1,6,1,6,,1,1,5
3394,ixVnKchUDibi,ixVnKchUDibi,"[type(content='lxjjjo}'), type(content='kkkko'...",[Just under the Report Infeasible </button> I ...,Open VS Code and load the file `frontend/src/a...,10,gs://induction-labs-data-ext/action_capture/jo...,2,7,2,7,,1,2,5
3395,ixVnKchUDibi,ixVnKchUDibi,"[type(content='lxjjjo}'), type(content='kkkko'...",[Just under the Report Infeasible </button> I ...,Open VS Code and load the file `frontend/src/a...,10,gs://induction-labs-data-ext/action_capture/jo...,6,8,6,8,,1,6,2
3396,ixVnKchUDibi,ixVnKchUDibi,"[type(content='lxjjjo}'), type(content='kkkko'...",[Just under the Report Infeasible </button> I ...,Open VS Code and load the file `frontend/src/a...,10,gs://induction-labs-data-ext/action_capture/jo...,4,9,4,9,,1,4,5


In [None]:
save_path = f"{PATH}/expanded_random_truncate_full.jsonl"
CORRECT_TRAJECTORIES_EXPANDED.to_json(f"{save_path}", orient="records", lines=True)
print(f"Saved {len(CORRECT_TRAJECTORIES_EXPANDED)} expanded trajectories to {save_path}")

Saved 3398 expanded trajectories to gs://induction-labs/passive_data/2025-08-14/reprocess_all_no_loading-04-30-15/expanded_random_truncate_full.jsonl


In [None]:
shuffled = CORRECT_TRAJECTORIES_EXPANDED.sample(frac=1, random_state=248239)
test_size = 32
train_size = len(shuffled) - test_size
TRAIN_SET = shuffled.iloc[:train_size]
TEST_SET = shuffled.iloc[train_size:]
TEST_SET = pd.concat([TEST_SET], ignore_index=True)

TRAIN_SET.to_json(f"{PATH}/{PREFIX}_samples_correct_trajectories_expanded_under_{num}_train.jsonl", orient="records", lines=True)
TEST_SET.to_json(f"{PATH}/{PREFIX}_samples_correct_trajectories_expanded_under_{num}_test.jsonl", orient="records", lines=True)

In [None]:
# pd.concat([TRAIN_SET] * 10, ignore_index=True).to_json(f"{PATH}/{PREFIX}_samples_correct_trajectories_expanded_under_{num}_train_10x.jsonl", orient="records", lines=True)

In [14]:
len(TRAIN_SET)/32

46.0