In [1]:
import json
from pathlib import Path

import pandas as pd
from pyprojroot import here

In [2]:
def load_data(path: Path) -> pd.DataFrame:
    data: dict[str, list[str]] = json.loads(Path(path).read_text())
    df = []
    for domain, examples in data.items():
        for example_id in examples:
            test_config = json.loads(
                here(f"../OSWorld/evaluation_examples/examples/{domain}/{example_id}.json").read_text())
            df.append({
                "example_id": example_id,
                "domain": domain,
                "test_config": test_config,
                "instruction": test_config["instruction"],
            })
    df = pd.DataFrame(df)
    return df

In [3]:
df_all = load_data(here("../OSWorld/evaluation_examples/test_all.json"))
df_all

Unnamed: 0,example_id,domain,test_config,instruction
0,bb5e4c0d-f964-439c-97b6-bdb9747de3f4,chrome,"{'id': 'bb5e4c0d-f964-439c-97b6-bdb9747de3f4',...",Can you make Bing the main search thingy when ...
1,7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,chrome,"{'id': '7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3',...",Can you help me clean up my computer by gettin...
2,06fe7178-4491-4589-810f-2e2bc9502122,chrome,"{'id': '06fe7178-4491-4589-810f-2e2bc9502122',...",Can you make my computer bring back the last t...
3,e1e75309-3ddb-4d09-92ec-de869c928143,chrome,"{'id': 'e1e75309-3ddb-4d09-92ec-de869c928143',...","Computer, can you turn the webpage I'm looking..."
4,35253b65-1c19-4304-8aa4-6884b8218fc0,chrome,"{'id': '35253b65-1c19-4304-8aa4-6884b8218fc0',...","Hey, I need a quick way back to this site. Cou..."
...,...,...,...,...
364,847a96b6-df94-4927-97e6-8cc9ea66ced7,vs_code,"{'id': '847a96b6-df94-4927-97e6-8cc9ea66ced7',...","Please help me open two workspaces ""/home/user..."
365,7aeae0e2-70ee-4705-821d-1bba5d5b2ddd,vs_code,"{'id': '7aeae0e2-70ee-4705-821d-1bba5d5b2ddd',...",Please help me visualize all numpy arrays in c...
366,dcbe20e8-647f-4f1d-8696-f1c5bbb570e3,vs_code,"{'id': 'dcbe20e8-647f-4f1d-8696-f1c5bbb570e3',...",Please help me change the background of VS Cod...
367,7c4cc09e-7a92-40dd-8338-b2286535c4ed,vs_code,"{'id': '7c4cc09e-7a92-40dd-8338-b2286535c4ed',...",Please help me change the display language of ...


In [4]:
df_small = load_data(here("../OSWorld/evaluation_examples/test_small.json"))
df_small

Unnamed: 0,example_id,domain,test_config,instruction
0,bb5e4c0d-f964-439c-97b6-bdb9747de3f4,chrome,"{'id': 'bb5e4c0d-f964-439c-97b6-bdb9747de3f4',...",Can you make Bing the main search thingy when ...
1,7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3,chrome,"{'id': '7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3',...",Can you help me clean up my computer by gettin...
2,35253b65-1c19-4304-8aa4-6884b8218fc0,chrome,"{'id': '35253b65-1c19-4304-8aa4-6884b8218fc0',...","Hey, I need a quick way back to this site. Cou..."
3,a96b564e-dbe9-42c3-9ccf-b4498073438a,chrome,"{'id': 'a96b564e-dbe9-42c3-9ccf-b4498073438a',...",Find discussions of community and open one wit...
4,7a4deb26-d57d-4ea9-9a73-630f66a7b568,gimp,"{'id': '7a4deb26-d57d-4ea9-9a73-630f66a7b568',...",Could you tone down the brightness of my photo?
5,554785e9-4523-4e7a-b8e1-8016f565f56a,gimp,"{'id': '554785e9-4523-4e7a-b8e1-8016f565f56a',...",Could you assist me in enhancing the color vib...
6,357ef137-7eeb-4c80-a3bb-0951f26a8aff,libreoffice_calc,"{'id': '357ef137-7eeb-4c80-a3bb-0951f26a8aff',...",I have calculated the total work hours from th...
7,42e0a640-4f19-4b28-973d-729602b5a4a7,libreoffice_calc,"{'id': '42e0a640-4f19-4b28-973d-729602b5a4a7',...","Compute the sum of ""Revenue"" and ""Total Expens..."
8,abed40dc-063f-4598-8ba5-9fe749c0615d,libreoffice_calc,"{'id': 'abed40dc-063f-4598-8ba5-9fe749c0615d',...","Check the names in column ""Names with duplicat..."
9,5d901039-a89c-4bfb-967b-bf66f4df075e,libreoffice_impress,"{'id': '5d901039-a89c-4bfb-967b-bf66f4df075e',...",I want to turn the rectangular image of Columb...


In [5]:
 len(df_all)

369

In [6]:
len(df_small)

39

In [7]:
set(df_all["example_id"]).issuperset(df_small["example_id"])

True

In [8]:
(len(df_small) / len(df_all))

0.10569105691056911

In [9]:
# Create a 'train' set that excludes 'test_small'.
train_example_ids = set(df_all["example_id"]).difference(df_small["example_id"])


In [10]:
df_train = df_all[df_all["example_id"].isin(train_example_ids)]
df_train

Unnamed: 0,example_id,domain,test_config,instruction
2,06fe7178-4491-4589-810f-2e2bc9502122,chrome,"{'id': '06fe7178-4491-4589-810f-2e2bc9502122',...",Can you make my computer bring back the last t...
3,e1e75309-3ddb-4d09-92ec-de869c928143,chrome,"{'id': 'e1e75309-3ddb-4d09-92ec-de869c928143',...","Computer, can you turn the webpage I'm looking..."
5,2ad9387a-65d8-4e33-ad5b-7580065a27ca,chrome,"{'id': '2ad9387a-65d8-4e33-ad5b-7580065a27ca',...",Can you make a new folder for me on the bookma...
6,7a5a7856-f1b6-42a4-ade9-1ca81ca0f263,chrome,"{'id': '7a5a7856-f1b6-42a4-ade9-1ca81ca0f263',...",Can you save this webpage I'm looking at to bo...
7,44ee5668-ecd5-4366-a6ce-c1c9b8d4e938,chrome,"{'id': '44ee5668-ecd5-4366-a6ce-c1c9b8d4e938',...",I am looking for an website address I accessed...
...,...,...,...,...
364,847a96b6-df94-4927-97e6-8cc9ea66ced7,vs_code,"{'id': '847a96b6-df94-4927-97e6-8cc9ea66ced7',...","Please help me open two workspaces ""/home/user..."
365,7aeae0e2-70ee-4705-821d-1bba5d5b2ddd,vs_code,"{'id': '7aeae0e2-70ee-4705-821d-1bba5d5b2ddd',...",Please help me visualize all numpy arrays in c...
366,dcbe20e8-647f-4f1d-8696-f1c5bbb570e3,vs_code,"{'id': 'dcbe20e8-647f-4f1d-8696-f1c5bbb570e3',...",Please help me change the background of VS Cod...
367,7c4cc09e-7a92-40dd-8338-b2286535c4ed,vs_code,"{'id': '7c4cc09e-7a92-40dd-8338-b2286535c4ed',...",Please help me change the display language of ...


In [11]:
len(df_train)

330

In [12]:
here("../OSWorld/evaluation_examples/train.json").write_text(
    json.dumps(
        {
            domain: examples["example_id"].tolist() for domain, examples in df_train.groupby("domain")
        },
        indent=2,
    ),
)

14738