## Libraries

In [None]:
import os
import json
from tqdm import tqdm

from dataclasses import dataclass
import torch
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor

## Generate Responses

In [None]:
def get_verdict(response):
    if "yes" in response.strip()[:10].lower():
        return 1
    else:
        return 0
    
def save_dataset(dataset, file_path):
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4)

def get_img_path(img_folder_path: str, img_name, dataset="phd") -> str:
    """
    dataset: "phd" or "hallusion_bench"
    """
    if dataset == "phd":
        for subfolder_name in ["train2014", "val2014"]:
            subfolder_path = os.path.join(img_folder_path, subfolder_name)
            if os.path.exists(subfolder_path):
                local_img_name = f"COCO_{subfolder_name}_{img_name}.jpg"
                img_path = os.path.join(subfolder_path, local_img_name)
                if os.path.exists(img_path):
                    return img_path 
        print(f"Image {img_name} not found in PHD dataset.")
        return ""
    elif dataset == "hallusion_bench":
        return img_folder_path + img_name[1:]
    else:
        print("Dataset not recognized.")
        return ""

In [None]:
def get_response(messages, model, processor, max_new_tokens=64):
    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_dict=True,
        return_tensors="pt"
    )
    inputs = inputs.to(model.device)
    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    return output_text[0] if len(output_text) == 1 else output_text

def generate_answers(save_path, folder_path, file_name, img_dir_name="images", sample_size=None, save_interval=20, ids_range=None):
    # Load dataset
    dataset_path = os.path.join(folder_path, file_name)
    img_folder_path = os.path.join(folder_path, img_dir_name)
    res = []
    processed_ids = []

    with open(dataset_path, "r", encoding="utf-8") as f:
        dataset = json.load(f)
    if sample_size is not None and len(dataset) > sample_size:
        dataset = dataset[:sample_size]

    if ids_range is not None:
        dataset = [d for d in dataset if d["id"] >= ids_range[0] and d["id"] <= ids_range[1]]
    print("Dataset size after filtering by ids_range:", len(dataset))

    if os.path.exists(save_path):
        with open(save_path, "r", encoding="utf-8") as f:
            res = json.load(f)
    processed_ids = [data["id"] for data in res]

    # Init model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Qwen3VLForConditionalGeneration.from_pretrained(
        "Qwen/Qwen3-VL-2B-Instruct",
        dtype="auto",
        device_map=device
    )
    processor = AutoProcessor.from_pretrained(
        "Qwen/Qwen3-VL-2B-Instruct",
        max_pixels=1280 * 720    
    )

    # Get responses
    batch = []

    for data in tqdm(dataset, desc=f"Processing {file_name}:"):
        if data["id"] in processed_ids:
            continue
        
        img_path = get_img_path(img_folder_path, data["image_id"], dataset="phd")
        messages = [
            { 
                "role": "user",
                "content": [
                    {"type": "image", "image": img_path},
                    {"type": "text", "text": data["question"] + "\nAnswer ONLY in this exact format, make sure you add the explanation: yes/no, explanation based on what you see in the image."}],
            }
        ]
        response = get_response(messages, model, processor)
        
        data["qwen3_vl_2b_response"] = response
        hallucinated_label = 0 if get_verdict(response) == data["label"] else 1
        data["hallucinated_label"] = hallucinated_label
        batch.append(data)

        if len(batch) > save_interval:
            res.extend(batch)
            save_dataset(res, save_path)
            batch.clear()
    if batch:
        res.extend(batch)
        save_dataset(res, save_path)
    print(f"Completed. Dataset size: {len(res)}")
    return res

In [None]:
save_path = "../data/phd/phd_sampled_qwen3_vl_2b.json"
folder_path = "../data/phd"
file_name = "phd_sampled.json"

'''
Working directory:
- phd
    + phd_sampled.json
    - images
        - train2014
        - val2014
'''

res = generate_answers(
    save_path,
    folder_path,
    file_name,
    # sample_size=None,
    save_interval=20,
    # ids_range=[0, 2500],
)