In [1]:
import pandas as pd

In [2]:
PATH = "gs://induction-labs/evals/checkpoints/merged_step_-1_uUhioFvR_step_868_Nik8u94a/2025-08-04T04-31-39/osworld_eval_8yv5xv_j"
DATA_FILE = f"{PATH}/samples.jsonl"

In [3]:
# fix samples
ORIGINAL_TRAJECTORIES = pd.read_json(DATA_FILE, lines=True)
FIXED_TRAJECTORIES = ORIGINAL_TRAJECTORIES.copy()
FIXED_TRAJECTORIES["eval_task_id"] = FIXED_TRAJECTORIES["eval_task_id"].str.replace(r"-2$", "", regex=True)
FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[(FIXED_TRAJECTORIES["reward"] == 0) | (FIXED_TRAJECTORIES["reward"] == 1)]

In [4]:
from google.cloud import storage
import asyncio
import json
import re

_GS_RE = re.compile(r"^gs://([^/]+)/(.+)$")
def load_turns_gcs(gs_uri: str):
    """
    Reads a JSON array from Google Cloud Storage and returns
    [{"image": ..., "text": ...}, ...] minus the last row.
    """
    m = _GS_RE.match(gs_uri)
    if not m:
        raise ValueError(f"Not a valid gs:// URI: {gs_uri}")
    bucket_name, blob_name = m.groups()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    # download the whole object as one string
    data_str = blob.download_as_text()

    records = json.loads(data_str)  # list-of-dicts
    return [
        {"image": r["image"], "text": r["text"]} for r in records[:-1]
    ]  # mimic .iloc[0:-1]

async def load_turns_gcs_async(semaphore: asyncio.Semaphore, gs_uri: str):
    """
    Asynchronous version of load_turns_gcs.
    """
    async with semaphore:
        return await asyncio.to_thread(load_turns_gcs, gs_uri)

In [5]:
legacy = False
if legacy:
    semaphore = asyncio.Semaphore(64)  # limit concurrent loads
    async def check_if_fail(semaphore, attempt_id):
        try:
            return (await load_turns_gcs_async(semaphore, f"{PATH}/metadata/{attempt_id}.json"))[-1]["text"] is None
        except:
            return True

    failed = await asyncio.gather(*[
        check_if_fail(semaphore, row["attempt_id"])
        for _i, row in FIXED_TRAJECTORIES.iterrows()
    ])

In [6]:
if legacy:
    FIXED_TRAJECTORIES["failed"] = failed
    FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[~FIXED_TRAJECTORIES["failed"]]

In [7]:
FIXED_TRAJECTORIES.to_json(f"{PATH}/samples_fixed.jsonl", orient="records", lines=True)

In [21]:
len(FIXED_TRAJECTORIES)

749

In [18]:
len(ORIGINAL_TRAJECTORIES)

1290

In [None]:
# FIXED_TRAJECTORIES = pd.read_json(f"{PATH}/samples_fixed.jsonl", lines=True)
import pandas as pd
PATH = "gs://induction-labs/passive_data/2025-08-13/joyce_data-05-39-48".rstrip("/")
# PREFIX = "hard"
FIXED_TRAJECTORIES = pd.read_json(f"{PATH}/samples.jsonl", lines=True)
FIXED_TRAJECTORIES['NUM_HOTKEYS'] = FIXED_TRAJECTORIES['actions'].apply(lambda x: sum(k.count('hotkey(key=') for k in x))
print(FIXED_TRAJECTORIES['NUM_HOTKEYS'].describe())
FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[FIXED_TRAJECTORIES['NUM_HOTKEYS'] <= 5]
# FIXED_TRAJECTORIES['actions'].iloc[0][0]
# FIXED_TRAJECTORIES = FIXED_TRAJECTORIES.drop(columns=["thinking", "actions"])

count    233.000000
mean       0.081545
std        0.379715
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        4.000000
Name: NUM_HOTKEYS, dtype: float64


In [13]:
FIXED_TRAJECTORIES = FIXED_TRAJECTORIES[FIXED_TRAJECTORIES['NUM_HOTKEYS'] <= 5]
FIXED_TRAJECTORIES['NUM_HOTKEYS'].describe()

count    233.000000
mean       0.081545
std        0.379715
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        4.000000
Name: NUM_HOTKEYS, dtype: float64

In [6]:
FIXED_TRAJECTORIES['trajectory_length'] = FIXED_TRAJECTORIES['trajectory_length'] - 1

In [14]:
FIXED_TRAJECTORIES

Unnamed: 0,attempt_id,eval_task_id,actions,thinking,instruction,trajectory_length,source_dir,image_turns_start,image_turns_end,text_turns_start,text_turns_end,unmask_last_only,NUM_HOTKEYS
0,4EMcPZpeDX7s,4EMcPZpeDX7s,"[click(start_box='(485,0)'), click(start_box='...",[I need to see the exact wording of the Papers...,Open the YC Bookface page for the Paperspace d...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
1,1t7ltbe9Uvmq,1t7ltbe9Uvmq,"[click(start_box='(349,509)'), left_double(sta...",[I need to change the memo on the 07/08/2025 P...,"In YNAB, open your Scotiabank Visa register, d...",11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
2,d1ZICwaD5VXq,d1ZICwaD5VXq,"[click(start_box='(1182,0)'), click(start_box=...",[I still need to add the Porter Airlines split...,"In the Split Transaction window, add a new lin...",11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
3,TPSFiCYMpiBY,TPSFiCYMpiBY,"[type(content='YC deals for compute, the prici...",[The sentence in Background currently cuts off...,On the “GPU Compute Deals/Pricing” Notion page...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
4,lsAcHl0Yp4PD,lsAcHl0Yp4PD,[type(content='$350k in Azure credits for 4 ye...,[I need the Azure section to list the headline...,"In the GPU Compute Deals/Pricing Notion page, ...",11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,LWTk6ZYbjgPY,LWTk6ZYbjgPY,"[click(start_box='(1241,828)'), type(content='...",[I still need to add the extra note to the Str...,Open the 27 Jul 2025 “Stamp House” split trans...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
229,HEl4EUb9sIQW,HEl4EUb9sIQW,"[right_single(start_box='(1558,504)'), click(s...",[I still need the exact spot for 2800 on this ...,Switch to the Wanderlog “Trip to SF” tab and a...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
230,vjn7uH3zh1cI,vjn7uH3zh1cI,"[type(content='GPU Compute Deals/Pricing'), cl...",[I still need to add the new document record b...,"In the “Document Hub” database, create a new e...",11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0
231,1Wdkd07hWGYX,1Wdkd07hWGYX,"[type(content='the painted ladies'), click(sta...",[I still need to add The Painted Ladies to the...,Add “The Painted Ladies” to today’s plan and r...,11,gs://induction-labs-data-ext/action_capture/jo...,0,10,0,10,False,0


In [15]:
import pandas as pd
import numpy as np

def expand_with_sliding_windows(df: pd.DataFrame, width: int,
                                length_col: str = "trajectory_length") -> pd.DataFrame:
    """
    For every row in *df*, create (trajectory_length – width + 1) copies,
    one for every sliding window of size *width*.
    Adds two new columns:  `window_start` and `window_end` (inclusive).
    Rows whose `trajectory_length` < width are silently dropped.

    Parameters
    ----------
    df : pd.DataFrame                 Original data.
    width : int                       Size of the sliding window.
    length_col : str, default 'trajectory_length'
                                      Column holding the total length.

    Returns
    -------
    pd.DataFrame                      Expanded frame.
    """
    if width <= 0:
        raise ValueError("width must be a positive integer")

    # how many windows each row will produce
    n_windows = df[length_col] - width + 1

    # keep only rows where at least one window fits
    valid_mask = n_windows > 0
    df_valid   = df[valid_mask].copy()
    n_windows  = n_windows[valid_mask]

    # repeat the rows the right number of times
    expanded = df_valid.loc[df_valid.index.repeat(n_windows)].reset_index(drop=True)

    # build the window start indices in one NumPy shot
    starts = np.concatenate([np.arange(k) for k in n_windows])
    expanded["image_turns_start"] = starts
    expanded["image_turns_end"]   = starts + width        # inclusive upper bound
    expanded["text_turns_start"] = 0
    expanded["text_turns_end"]   = starts + width
    expanded["unmask_last_only"] = True
    expanded["unmask_last_only"][expanded["image_turns_end"] <= 5] = False

    return expanded

In [16]:
CORRECT_TRAJECTORIES_EXPANDED = FIXED_TRAJECTORIES.copy()
CORRECT_TRAJECTORIES_EXPANDED = CORRECT_TRAJECTORIES_EXPANDED #[CORRECT_TRAJECTORIES_EXPANDED["reward"] == 1]
CORRECT_TRAJECTORIES_EXPANDED = expand_with_sliding_windows(
    CORRECT_TRAJECTORIES_EXPANDED,
    width=5,
    length_col="trajectory_length"
)
# num = 50
# CORRECT_TRAJECTORIES_EXPANDED = CORRECT_TRAJECTORIES_EXPANDED[CORRECT_TRAJECTORIES_EXPANDED["text_turns_end"] <= num]
CORRECT_TRAJECTORIES_EXPANDED

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  expanded["unmask_last_only"][expanded["image_turns_end"] <= 5] = False
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a

Unnamed: 0,attempt_id,eval_task_id,actions,thinking,instruction,trajectory_length,source_dir,image_turns_start,image_turns_end,text_turns_start,text_turns_end,unmask_last_only,NUM_HOTKEYS
0,4EMcPZpeDX7s,4EMcPZpeDX7s,"[click(start_box='(485,0)'), click(start_box='...",[I need to see the exact wording of the Papers...,Open the YC Bookface page for the Paperspace d...,11,gs://induction-labs-data-ext/action_capture/jo...,0,5,0,5,False,0
1,4EMcPZpeDX7s,4EMcPZpeDX7s,"[click(start_box='(485,0)'), click(start_box='...",[I need to see the exact wording of the Papers...,Open the YC Bookface page for the Paperspace d...,11,gs://induction-labs-data-ext/action_capture/jo...,1,6,0,6,True,0
2,4EMcPZpeDX7s,4EMcPZpeDX7s,"[click(start_box='(485,0)'), click(start_box='...",[I need to see the exact wording of the Papers...,Open the YC Bookface page for the Paperspace d...,11,gs://induction-labs-data-ext/action_capture/jo...,2,7,0,7,True,0
3,4EMcPZpeDX7s,4EMcPZpeDX7s,"[click(start_box='(485,0)'), click(start_box='...",[I need to see the exact wording of the Papers...,Open the YC Bookface page for the Paperspace d...,11,gs://induction-labs-data-ext/action_capture/jo...,3,8,0,8,True,0
4,4EMcPZpeDX7s,4EMcPZpeDX7s,"[click(start_box='(485,0)'), click(start_box='...",[I need to see the exact wording of the Papers...,Open the YC Bookface page for the Paperspace d...,11,gs://induction-labs-data-ext/action_capture/jo...,4,9,0,9,True,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,RzqnS7Ywf3Vj,RzqnS7Ywf3Vj,"[type(content='wanderlog sf\n'), click(start_b...",[I need Google results for Wanderlog’s San Fra...,"Search Google for “wanderlog sf”, open the Wan...",11,gs://induction-labs-data-ext/action_capture/jo...,2,7,0,7,True,0
1627,RzqnS7Ywf3Vj,RzqnS7Ywf3Vj,"[type(content='wanderlog sf\n'), click(start_b...",[I need Google results for Wanderlog’s San Fra...,"Search Google for “wanderlog sf”, open the Wan...",11,gs://induction-labs-data-ext/action_capture/jo...,3,8,0,8,True,0
1628,RzqnS7Ywf3Vj,RzqnS7Ywf3Vj,"[type(content='wanderlog sf\n'), click(start_b...",[I need Google results for Wanderlog’s San Fra...,"Search Google for “wanderlog sf”, open the Wan...",11,gs://induction-labs-data-ext/action_capture/jo...,4,9,0,9,True,0
1629,RzqnS7Ywf3Vj,RzqnS7Ywf3Vj,"[type(content='wanderlog sf\n'), click(start_b...",[I need Google results for Wanderlog’s San Fra...,"Search Google for “wanderlog sf”, open the Wan...",11,gs://induction-labs-data-ext/action_capture/jo...,5,10,0,10,True,0


In [17]:
CORRECT_TRAJECTORIES_EXPANDED.to_json(f"{PATH}/expanded_samples.jsonl", orient="records", lines=True)
print(f"Saved {len(CORRECT_TRAJECTORIES_EXPANDED)} expanded trajectories to {PATH}/expanded_samples.jsonl")

Saved 1631 expanded trajectories to gs://induction-labs/passive_data/2025-08-13/joyce_data-05-39-48/expanded_samples.jsonl


In [None]:
shuffled = CORRECT_TRAJECTORIES_EXPANDED.sample(frac=1, random_state=248239)
test_size = 32
train_size = len(shuffled) - test_size
TRAIN_SET = shuffled.iloc[:train_size]
TEST_SET = shuffled.iloc[train_size:]
TEST_SET = pd.concat([TEST_SET], ignore_index=True)

TRAIN_SET.to_json(f"{PATH}/{PREFIX}_samples_correct_trajectories_expanded_under_{num}_train.jsonl", orient="records", lines=True)
TEST_SET.to_json(f"{PATH}/{PREFIX}_samples_correct_trajectories_expanded_under_{num}_test.jsonl", orient="records", lines=True)

In [None]:
# pd.concat([TRAIN_SET] * 10, ignore_index=True).to_json(f"{PATH}/{PREFIX}_samples_correct_trajectories_expanded_under_{num}_train_10x.jsonl", orient="records", lines=True)

In [14]:
len(TRAIN_SET)/32

46.0