In [1]:
from run_vlm_eval import main, load_config, set_envs, log_first_batch

  import pynvml  # type: ignore[import]


INFO 12-17 16:39:32 [__init__.py:216] Automatically detected platform cuda.


In [2]:
import os
import json
import yaml
import torch
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader

from vqa_dataset import PromptDataset, prompt_collate, create_template
from models import load_model_adapter

In [3]:
cfg = load_config("configs/test_config.yaml")
model_cfg = cfg["model"]
tasks_cfg = cfg["tasks"]
run_cfg  = cfg["runtime"]
output_dir = '/pasteur/u/rdcunha/code/mmbu/results'

model_type = model_cfg["type"]
model_name = model_cfg["name"]
device     = model_cfg.get("device", "auto")
cache_dir  = "/pasteur/u/rdcunha/models"

set_envs(cache_dir)

In [4]:
adapter = load_model_adapter(model_type, model_name, device, cache_dir)
model, processor = adapter.load()

os.makedirs(output_dir, exist_ok=True)
file_model_name = model_name.split('/')[-1]
model_path = file_model_name.replace('/', '_')
output_dir = os.path.join(output_dir, model_path)
os.makedirs(output_dir, exist_ok=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [None]:
base_path = '/pasteur/u/rdcunha/data_cache/mmbu/final_data/subsampled_mmbu_data'

for task_cfg in tasks_cfg:
    print(f"Running task: {task_cfg['name']}")
    out_file = os.path.join(output_dir, f"{file_model_name.replace('/', '_')}_{task_cfg['name']}.jsonl")
    tsv_path = os.path.join(base_path, task_cfg["data_path"])
    df = pd.read_csv(tsv_path, sep='\t')
    
    add_options = ("open" not in task_cfg["name"])
    dataset = PromptDataset(df=df, add_options=add_options)
    loader = DataLoader(
        dataset,
        batch_size=run_cfg["batch_size"],
        shuffle=False,
        collate_fn=prompt_collate,
        num_workers=4,
        persistent_workers=True,
        pin_memory=True,
        prefetch_factor=4
    )

    existing = set()
    if os.path.exists(out_file):
        with open(out_file, "r") as f:
            for line in f:
                try:
                    j = json.loads(line)
                    existing.add(j["index"])
                except:
                    pass
                    
    counter = 0
    saved = []
    first_batch_logged = False
    
    with open(out_file, "a") as f:
        for batch in tqdm(loader, desc="Inference"):
    
            new_batch = [x for x in batch if x["index"] not in existing]
            if not new_batch:
                continue
    
            # inference
            # try:
                # messages = [create_template(item) for item in new_batch]
            messages = [adapter.create_template(item) for item in new_batch]
            # model-specific input prep
            inputs = adapter.prepare_inputs(messages, processor, model)
            outputs = adapter.infer(model, processor, inputs, run_cfg["max_new_tokens"])
            # except: 
            #     print(f"could not generate for {batch}")
            #     continue
    
            # log first batch only
            if run_cfg["log_first_batch"] and not first_batch_logged:
                log_first_batch(outputs, output_dir)
                first_batch_logged = True
    
            # save results
            for it, out_text in zip(new_batch, outputs):
                obj = {
                    "index": it["index"],
                    "question": it["question"],
                    "image_path": it["image_path"],
                    "dataset": it["dataset"],
                    "modality": it["modality"],
                    "class_label": it["class_label"],
                    "answer": out_text
                }
                if "options" in it and it["options"] is not None:
                    obj["options"] = it["options"]
            
                saved.append(obj)
                existing.add(it["index"])
                counter += 1
    
                if counter % 50 == 0:
                    for s in saved:
                        f.write(json.dumps(s) + "\n")
                    f.flush()
                    saved = []
    
        # Save remainder
        for s in saved:
            f.write(json.dumps(s) + "\n")

print('Completed')

Running task: detection_grounding_open_VQA


100%|███████████| 3128/3128 [00:53<00:00, 58.22it/s]


✅ All images loaded


Inference:  65%|█▎| 204/313 [00:04<00:01, 61.04it/s]

In [5]:
from PIL import Image, ImageFile
import cv2

path = "/pasteur/u/rdcunha/data_cache/mmbu/final_data/VLMEvalData_v2/LMUData/standarized-subsampled/extra_det_v2/malaria-bounding-boxes/det/images_with_bbox/8757be1e-b832-407a-8e95-62abae485b24__bbox.png"

print("Testing PIL:")
try:
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    img = Image.open(path)
    img.load()
    print("PIL loaded successfully")
except Exception as e:
    print("PIL error:", e)

print("\nTesting OpenCV:")
img_cv = cv2.imread(path)
print("OpenCV loaded:", img_cv is not None)

Testing PIL:
PIL loaded successfully

Testing OpenCV:
OpenCV loaded: False


libpng error: IDAT: CRC error
