In [1]:
from run_vlm_eval import main, load_config, set_envs, log_first_batch

  import pynvml  # type: ignore[import]


INFO 12-28 13:49:19 [__init__.py:216] Automatically detected platform cuda.


In [2]:
import os
import json
import yaml
import torch, gc
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader

from vqa_dataset import PromptDataset, prompt_collate, create_template
from models import load_model_adapter

In [3]:
cfg = load_config("../configs/test_config.yaml")
model_cfg = cfg["model"]
tasks_cfg = cfg["tasks"]
run_cfg  = cfg["runtime"]
output_dir = '/pasteur/u/rdcunha/code/mmbu/results'

model_type = model_cfg["type"]
model_name = model_cfg["name"]
device     = model_cfg.get("device", "auto")
cache_dir  = "/pasteur/u/rdcunha/models"

set_envs(cache_dir)

In [4]:
adapter = load_model_adapter(model_type, model_name, device, cache_dir)
model, processor = adapter.load()

os.makedirs(output_dir, exist_ok=True)
file_model_name = model_name.split('/')[-1]
model_path = file_model_name.replace('/', '_')
output_dir = os.path.join(output_dir, model_path)
os.makedirs(output_dir, exist_ok=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [5]:
base_path = '/pasteur/u/rdcunha/data_cache/mmbu/final_data/subsampled_mmbu_data'

for task_cfg in tasks_cfg:
    print(f'Running task: {task_cfg['name']}')
    out_file = os.path.join(output_dir, f"{file_model_name.replace('/', '_')}_{task_cfg['name']}.jsonl")
    tsv_path = os.path.join(base_path, task_cfg["data_path"])
    df = pd.read_csv(tsv_path, sep='\t')
    
    add_options = ("open" not in task_cfg["name"])
    dataset = PromptDataset(df=df, add_options=add_options)
    loader = DataLoader(
        dataset,
        batch_size=run_cfg["batch_size"],
        shuffle=False,
        collate_fn=prompt_collate,
        num_workers=2,
        persistent_workers=True,
        pin_memory=True,
        prefetch_factor=2
    )

    existing = set()
    if os.path.exists(out_file):
        with open(out_file, "r") as f:
            for line in f:
                try:
                    j = json.loads(line)
                    existing.add(j["index"])
                except:
                    pass
                    
    counter = 0
    saved = []
    first_batch_logged = False
    
    with open(out_file, "a") as f:
        for batch in tqdm(loader, desc="Inference"):
            gc.collect()
            torch.cuda.empty_cache()
    
            new_batch = [x for x in batch if x["index"] not in existing]
            if not new_batch:
                continue
    
            # inference
            # try:
            all_inputs = []
            for item in new_batch:
                single_msg = adapter.create_template(item)
                single_inp = adapter.prepare_inputs([single_msg], processor, model)
                all_inputs.append(single_inp)
            
            batched_inputs = adapter.stack_inputs(all_inputs, model)
            
            outputs = adapter.infer(model, processor, batched_inputs, run_cfg["max_new_tokens"])
            # except: 
            #     print(f"could not generate for {batch}")
            #     continue
    
            # log first batch only
            if run_cfg["log_first_batch"] and not first_batch_logged:
                log_first_batch(outputs, output_dir)
                first_batch_logged = True
    
            # save results
            for it, out_text in zip(new_batch, outputs):
                obj = {
                    "index": it["index"],
                    "question": it["question"],
                    "image_path": it["image_path"],
                    "dataset": it["dataset"],
                    "modality": it["modality"],
                    "class_label": it["class_label"],
                    "answer": out_text
                }
                if "options" in it and it["options"] is not None:
                    obj["options"] = it["options"]
            
                saved.append(obj)
                existing.add(it["index"])
                counter += 1
    
                if counter % 50 == 0:
                    for s in saved:
                        f.write(json.dumps(s) + "\n")
                    f.flush()
                    saved = []
    
        # Save remainder
        for s in saved:
            f.write(json.dumps(s) + "\n")

print('Completed')

Running task: segmentation_grounding_closed_VQA


Inference:  95%|████████████▎| 582/613 [02:09<00:06,  4.52it/s]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Inference:  95%|████████████▎| 583/613 [02:49<06:02, 12.08s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Inference:  95%|████████████▍| 584/613 [04:04<14:58, 30.98s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Inference:  95%|████████████▍| 584/613 [04:14<00:12,  2.30it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.47 GiB. GPU 0 has a total capacity of 44.39 GiB of which 2.28 GiB is free. Including non-PyTorch memory, this process has 42.11 GiB memory in use. Of the allocated memory 34.78 GiB is allocated by PyTorch, and 6.83 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [11]:
import os
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm
from pathlib import Path

tsv_path = Path("/pasteur/u/rdcunha/data_cache/mmbu/final_data/subsampled_mmbu_data/final_seg/final_subsampled_seg_grounding_closed_12_29.tsv")

# 1) Load TSV
df = pd.read_csv(tsv_path, sep="\t")
assert "image_path" in df.columns, f"Expected column 'image_path' in TSV. Columns: {list(df.columns)}"

paths = df["image_path"].astype(str).fillna("")
unique_paths = pd.unique(paths)

print("Rows:", len(df))
print("Unique image paths:", len(unique_paths))

rows = []
errors = []

for p in tqdm(unique_paths, desc="Scanning image sizes"):
    p = p.strip()
    if not p or p.lower() in {"nan", "none"}:
        errors.append((p, "empty/NaN path"))
        continue
    if not os.path.exists(p):
        errors.append((p, "path does not exist"))
        continue

    try:
        # size doesn't require decoding full image into RAM
        with Image.open(p) as img:
            w, h = img.size
            mode = img.mode
        rows.append((p, w, h, w*h, mode))
    except Exception as e:
        errors.append((p, repr(e)))

sizes = pd.DataFrame(rows, columns=["image_path", "width", "height", "area", "mode"])

# 2) Flags
sizes["gt_512_any"] = (sizes["width"] > 512) | (sizes["height"] > 512)
sizes["ge_512_both"] = (sizes["width"] >= 512) & (sizes["height"] >= 512)

gt_any = sizes[sizes["gt_512_any"]].sort_values("area", ascending=False)
ge_both = sizes[sizes["ge_512_both"]].sort_values("area", ascending=False)

print(f"Original length: {len(df)}")
print(f"\nReadable images: {len(sizes)}")
print(f"Images with width>512 OR height>512: {len(gt_any)}")
print(f"Images with width>=512 AND height>=512 (your pad_to_512 keeps full-res): {len(ge_both)}")
print(f"Errors/missing: {len(errors)}")

Rows: 3064
Unique image paths: 1768


Scanning image sizes:   0%|          | 0/1768 [00:00<?, ?it/s]

Original length: 3064

Readable images: 1768
Images with width>512 OR height>512: 28
Images with width>=512 AND height>=512 (your pad_to_512 keeps full-res): 567
Errors/missing: 0


In [8]:
# --- map back to TSV rows to get indices ---
assert "index" in df.columns, f"Expected column 'index' in TSV. Columns: {list(df.columns)}"

gt_paths = set(gt_any["image_path"].tolist())

df_gt = df[df["image_path"].astype(str).isin(gt_paths)].copy()

# Attach width/height for convenience
df_gt = df_gt.merge(
    gt_any[["image_path", "width", "height", "area", "mode"]],
    on="image_path",
    how="left"
).sort_values(["area", "index"], ascending=[False, True])

# Unique indices (or keep all rows if you want duplicates)
bad_indices = df_gt["index"].astype(int).tolist()

print(f"\nTSV rows with width>512 OR height>512: {len(df_gt)}")
print(f"Unique indices affected: {df_gt['index'].nunique()}")

display(df_gt[["index", "image_path", "width", "height", "mode", "area"]].head(50))

# If you only want the unique index list:
bad_indices_unique = sorted(df_gt["index"].astype(int).unique().tolist())
bad_indices_unique[:50], len(bad_indices_unique)


TSV rows with width>512 OR height>512: 28
Unique indices affected: 28


Unnamed: 0,index,image_path,width,height,mode,area
10,2464,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
11,2466,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
12,2467,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
13,2468,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
14,2469,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
15,2470,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
16,2471,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
17,2472,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1280,1024,RGB,1310720
26,2712,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1024,768,RGB,786432
0,2424,/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,856,606,RGB,518736


([2424,
  2425,
  2426,
  2427,
  2428,
  2429,
  2430,
  2431,
  2432,
  2433,
  2464,
  2466,
  2467,
  2468,
  2469,
  2470,
  2471,
  2472,
  2704,
  2705,
  2706,
  2707,
  2708,
  2709,
  2710,
  2711,
  2712,
  2713],
 28)