In [1]:
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
import torch
import os
from datasets import load_dataset

SLURM_PATH = '/home/yandex/MLWG2025/amitr5'
CACHE_DIR = f'{SLURM_PATH}/tmp/hf_cache'  # Changed to /tmp to avoid quota issues

os.makedirs(CACHE_DIR, exist_ok=True)

if SLURM_PATH in os.getcwd():
    os.environ["PIP_PATH"] = f"{SLURM_PATH}/BaryGNN/anaconda3/envs/conf/bin/pip"
    os.environ["TEMP_DIR"] = CACHE_DIR
    os.environ["HF_HOME"] = CACHE_DIR
    os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
    os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
    os.environ["HF_HUB_CACHE"] = CACHE_DIR
    os.environ["TMPDIR"] = CACHE_DIR
    # os.environ["TOKENIZERS_PARALLELISM"] = "false"

## Mind2Web
Mind2Web is a large-scale dataset for grounding language instructions to web actions.

In [None]:
! python Mind2Web/src/candidate_generation/evaluate.py\
    --model_path osunlp/MindAct_CandidateGeneration_deberta-v3-base\
    --data_path osunlp/Multimodal-Mind2Web\
    --split_file test_website\
    --output_dir ./evaluation_results

Resolving data files: 100%|██████████████████| 27/27 [00:00<00:00, 10692.68it/s]
Resolving data files: 100%|██████████████████| 27/27 [00:00<00:00, 10692.68it/s]
Map:   4%|▉                          | 36/1019 [03:29<3:04:42, 11.27s/ examples]

In [17]:
ds = load_dataset("osunlp/Multimodal-Mind2Web", cache_dir=CACHE_DIR)

# Check available splits
print("Dataset splits:", list(ds.keys()))
# Access the train split
train_ds = ds['train']
print(f"Number of samples in train split: {len(train_ds)}")

Resolving data files:   0%|          | 0/27 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/23 [00:00<?, ?it/s]

Dataset splits: ['train', 'test_domain', 'test_task', 'test_website']
Number of samples in train split: 7775


In [26]:
test_ds = ds['test_domain']
print(f"Number of samples in test_domain split: {len(test_ds)}")
test_ds = ds['test_task']
print(f"Number of samples in test_task split: {len(test_ds)}")
test_ds = ds['test_website']
print(f"Number of samples in test_website split: {len(test_ds)}")

Number of samples in test_domain split: 4060
Number of samples in test_task split: 1339
Number of samples in test_website split: 1019


In [22]:
import pandas as pd
scores = pd.read_pickle('scores_all_data.pkl')
s = list(scores["scores"].keys())[0]
s

'490dc61c-873d-47b6-9050-369cd18e1253_f68804d6-48de-445b-b201-c63d35b8683c'

In [25]:
# Extract action_uid from the scores key
action_uid = s.split('_')[0]
print(f"Action UID: {action_uid}")

# Search for this action_uid in test_ds
test_df = test_ds.to_pandas()
matching_rows = test_df[test_df['action_uid'] == action_uid]
print(f"\nFound {len(matching_rows)} matching rows in test_ds:")
print(matching_rows)

Action UID: 490dc61c-873d-47b6-9050-369cd18e1253

Found 0 matching rows in test_ds:
Empty DataFrame
Columns: [action_uid, raw_html, cleaned_html, operation, pos_candidates, neg_candidates, website, domain, subdomain, annotation_id, confirmed_task, screenshot, action_reprs, target_action_index, target_action_reprs]
Index: []

Found 0 matching rows in test_ds:
Empty DataFrame
Columns: [action_uid, raw_html, cleaned_html, operation, pos_candidates, neg_candidates, website, domain, subdomain, annotation_id, confirmed_task, screenshot, action_reprs, target_action_index, target_action_reprs]
Index: []


In [None]:
# Extract action_uid from the scores key
action_uid = s.split('_')[1]
print(f"Action UID: {action_uid}")
matching_rows = test_df[test_df['action_uid'] == action_uid]
print(f"\nFound {len(matching_rows)} matching rows in test_ds:")
print(matching_rows)

In [3]:
import pandas as pd

# Convert the train split to a pandas DataFrame
df = train_ds.to_pandas()
df['action_id'] = range(len(df))  # Add a default integer ID column
df.head()

Unnamed: 0,action_uid,raw_html,cleaned_html,operation,pos_candidates,neg_candidates,website,domain,subdomain,annotation_id,confirmed_task,screenshot,action_reprs,target_action_index,target_action_reprs,action_id
0,6c7a7082-2897-41c7-9688-4b0f3d778cdb,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""208"">\n <body backend_...","{""original_op"": ""CLICK"", ""value"": """", ""op"": ""C...","[{""tag"": ""li"", ""attributes"": ""{\""backend_node_...","[{""tag"": ""div"", ""attributes"": ""{\""backend_node...",united,Travel,Airlines,401c4e6f-6b0b-47b4-8157-92d7ca468bbc,"rent a car in Brooklyn - Central, NY on from A...",{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,"[[heading] CAR -> CLICK, [combobox] Enter pi...",0,[heading] CAR -> CLICK,0
1,b64c2417-c44e-46c4-bb0b-ff1775e7da29,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""10021"">\n <body backen...","{""original_op"": ""TYPE"", ""value"": ""Brooklyn Cen...","[{""tag"": ""input"", ""attributes"": ""{\""backend_no...","[{""tag"": ""div"", ""attributes"": ""{\""backend_node...",united,Travel,Airlines,401c4e6f-6b0b-47b4-8157-92d7ca468bbc,"rent a car in Brooklyn - Central, NY on from A...",{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,"[[heading] CAR -> CLICK, [combobox] Enter pi...",1,"[combobox] Enter pick up city, airport name, ...",1
2,dad6690b-9b3e-4395-bd06-9aa065bf4027,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""20041"">\n <body backen...","{""original_op"": ""CLICK"", ""value"": """", ""op"": ""C...","[{""tag"": ""button"", ""attributes"": ""{\""backend_n...","[{""tag"": ""div"", ""attributes"": ""{\""backend_node...",united,Travel,Airlines,401c4e6f-6b0b-47b4-8157-92d7ca468bbc,"rent a car in Brooklyn - Central, NY on from A...",{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,"[[heading] CAR -> CLICK, [combobox] Enter pi...",2,"[div] Brooklyn - Central (New York), US -> CLICK",2
3,e0fd3f28-3f04-455d-8bde-a480f0ec1b0a,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""30061"">\n <body backen...","{""original_op"": ""CLICK"", ""value"": """", ""op"": ""C...","[{""tag"": ""input"", ""attributes"": ""{\""backend_no...","[{""tag"": ""div"", ""attributes"": ""{\""backend_node...",united,Travel,Airlines,401c4e6f-6b0b-47b4-8157-92d7ca468bbc,"rent a car in Brooklyn - Central, NY on from A...",{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,"[[heading] CAR -> CLICK, [combobox] Enter pi...",3,[textbox] Pickup -> CLICK,3
4,4762d735-9dc2-4717-ae8b-baab0b3446e5,"<!DOCTYPE html PUBLIC ""-//W3C//DTD HTML 4.0 Tr...","<html backend_node_id=""40453"">\n <body backen...","{""original_op"": ""CLICK"", ""value"": """", ""op"": ""C...","[{""tag"": ""td"", ""attributes"": ""{\""backend_node_...","[{""tag"": ""div"", ""attributes"": ""{\""backend_node...",united,Travel,Airlines,401c4e6f-6b0b-47b4-8157-92d7ca468bbc,"rent a car in Brooklyn - Central, NY on from A...",{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,"[[heading] CAR -> CLICK, [combobox] Enter pi...",4,"[button] Sunday, April 9, 2023 -> CLICK",4


In [51]:
n_steps = df.action_reprs.apply(lambda x: len(x))
print(f"max steps: {n_steps.max()}, min steps: {n_steps.min()}, mean: {n_steps.mean():.2f}, median: {n_steps.median()}")

max steps: 37, min steps: 2, mean: 10.82, median: 9.0


In [4]:
# Group by annotation_id (this creates a GroupBy object for fast access)
grouped = df.groupby('annotation_id')

# Retrieve all rows for a specific annotation_id
ann_id = train_ds[0]["annotation_id"]
task_df = grouped.get_group(ann_id).sort_values('target_action_index')

print(f"Task: {task_df.iloc[0]['confirmed_task']}")
# Iterate and display (task_df is a DataFrame)
for _, ex in task_df.iterrows():
    print(
        f"step={int(ex['target_action_index']) + 1}/{len(task_df)} | op={ex['operation']} "
        f"| target_action={ex["target_action_reprs"]} | pos_candidates={len(ex['pos_candidates'])}, action_id={ex['action_id']}"
    )
    # display(train_ds[ex["action_id"]]["screenshot"])

Task: rent a car in Brooklyn - Central, NY on from April 9 to April 15.
step=1/7 | op={"original_op": "CLICK", "value": "", "op": "CLICK"} | target_action=[heading]  CAR -> CLICK | pos_candidates=1, action_id=0
step=2/7 | op={"original_op": "TYPE", "value": "Brooklyn Central", "op": "TYPE"} | target_action=[combobox]  Enter pick up city, airport name, or airport code. -> TYPE: Brooklyn Central | pos_candidates=1, action_id=1
step=3/7 | op={"original_op": "CLICK", "value": "", "op": "CLICK"} | target_action=[div]  Brooklyn - Central (New York), US -> CLICK | pos_candidates=1, action_id=2
step=4/7 | op={"original_op": "CLICK", "value": "", "op": "CLICK"} | target_action=[textbox]  Pickup -> CLICK | pos_candidates=1, action_id=3
step=5/7 | op={"original_op": "CLICK", "value": "", "op": "CLICK"} | target_action=[button]  Sunday, April 9, 2023 -> CLICK | pos_candidates=1, action_id=4
step=6/7 | op={"original_op": "CLICK", "value": "", "op": "CLICK"} | target_action=[button]  Saturday, April

In [12]:
import torch
import sys
import os

# Add Mind2Web to path so we can import from it
sys.path.append('./Mind2Web/src')

from candidate_generation.dataloader import CandidateRankDataset, get_data_split
from candidate_generation.metric import CERerankingEvaluator
from candidate_generation.model import CrossEncoder
from torch.utils.data import DataLoader

eval_data = train_ds
batch_size = 350
max_seq_length = 512

eval_evaluator = CERerankingEvaluator(
    eval_data,
    k=50,
    max_neg=-1,
    batch_size=batch_size,
    name="train",
)

# Use the model path for the CrossEncoder (like in evaluate.py)
model_path = "osunlp/MindAct_CandidateGeneration_deberta-v3-base"
model = CrossEncoder(
    model_path,
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    num_labels=1,
    max_length=max_seq_length,
)

# Evaluate the model
print("Running evaluation...")
eval_evaluator(model, output_path="./output")
print("Evaluation completed!")
print("Results saved to: ./output")

ModuleNotFoundError: No module named 'dataloader'

In [77]:
scores = pd.read_pickle('scores_all_data.pkl')


In [None]:
len(scores["scores"].keys())

7775

In [64]:
k = list(scores["ranks"].keys())[0]
scores["scores"][k]

{'3187': 0.6205139756202698,
 '3195': 0.00034626497654244304,
 '3311': 0.000420488795498386,
 '3315': 0.00034257370862178504,
 '3317': 0.0003998225147370249,
 '3318': 0.00035015755565837026,
 '3322': 0.073184072971344,
 '3326': 0.000325483619235456,
 '3327': 0.00029365852242335677,
 '3328': 0.03268706426024437,
 '3337': 0.000284822192043066,
 '3338': 0.0006392230861820281,
 '3347': 0.00027889112243428826,
 '3348': 0.0006049860967323184,
 '3352': 0.000268565519945696,
 '3353': 0.00027551045059226453,
 '3359': 0.00028698210371658206,
 '3360': 0.0002576765255071223,
 '3361': 0.0002803734387271106,
 '3369': 0.00026953258202411234,
 '3370': 0.0007074717432260513,
 '3382': 0.00032085939892567694,
 '3383': 0.013145231641829014,
 '3398': 0.0014318458270281553,
 '3399': 0.073184072971344,
 '3406': 0.0004041744687128812,
 '3176': 0.0004632917698472738,
 '3193': 0.00036384232225827873,
 '3408': 0.00038974705967120826,
 '3412': 0.0005019463715143502,
 '3416': 0.0005030413158237934,
 '3417': 0.0003

In [74]:
task_df.iloc[0]["pos_candidates"]

array(['{"tag": "li", "attributes": "{\\"backend_node_id\\": \\"1250\\", \\"bounding_box_rect\\": \\"283.1875,220.390625,93.59375,33\\", \\"id\\": \\"bookCarTab\\", \\"role\\": \\"tab\\", \\"class\\": \\"app-components-BookFlight-bookFlight__carButton--3SSGj\\", \\"aria_label\\": \\"heading level 3 Search and reserve a car\\", \\"data_pw_testid_buckeye_candidate\\": \\"1\\"}", "is_original_target": false, "is_top_level_target": true, "backend_node_id": "1250"}'],
      dtype=object)

In [92]:
task_df.iloc[0]

action_uid                          6c7a7082-2897-41c7-9688-4b0f3d778cdb
raw_html               <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Tr...
cleaned_html           <html backend_node_id="208">\n  <body backend_...
operation              {"original_op": "CLICK", "value": "", "op": "C...
pos_candidates         [{"tag": "li", "attributes": "{\"backend_node_...
neg_candidates         [{"tag": "div", "attributes": "{\"backend_node...
website                                                           united
domain                                                            Travel
subdomain                                                       Airlines
annotation_id                       401c4e6f-6b0b-47b4-8157-92d7ca468bbc
confirmed_task         rent a car in Brooklyn - Central, NY on from A...
screenshot             {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...
action_reprs           [[heading]  CAR -> CLICK, [combobox]  Enter pi...
target_action_index                                

In [None]:
uid = task_df.iloc[0]["annotation_id"]
a_uid, a_id = list(scores["scores"].keys())[0].split('_')

In [19]:
list(scores["scores"].keys())[0]

NameError: name 'scores' is not defined

In [116]:
df[df['annotation_id'].str.startswith("15486e7c")]

Unnamed: 0,action_uid,raw_html,cleaned_html,operation,pos_candidates,neg_candidates,website,domain,subdomain,annotation_id,confirmed_task,screenshot,action_reprs,target_action_index,target_action_reprs,action_id


In [82]:
from bs4 import BeautifulSoup
html = task_df.iloc[0]["cleaned_html"]
soup = BeautifulSoup(html, "html.parser")

def get_element_html(node_id: str):
    el = soup.find(attrs={"backend_node_id": node_id})
    return str(el) if el is not None else None

# Example: show HTML for the top-1 candidate
best_id = task_df.iloc[0]["pos_candidates"][0]
print(get_element_html(best_id))

None


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Download the MindAct Candidate Generation model
print("Downloading MindAct Candidate Generation model...")
model_name = "osunlp/MindAct_CandidateGeneration_deberta-v3-base"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=CACHE_DIR)

# Load model
candidate_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    cache_dir=CACHE_DIR,
    torch_dtype=torch.float16,  # Use float16 for efficiency
    device_map="auto"  # Automatically handle device placement
)

print(f"Model {model_name} downloaded and loaded successfully!")
print(f"Model type: {type(candidate_model)}")
print(f"Number of parameters: {sum(p.numel() for p in candidate_model.parameters()):,}")