In [1]:
# imports / globals
import os, math, random, time, contextlib, itertools
from dataclasses import dataclass
from typing import List, Tuple, Dict, Any

import torch
from PIL import Image
from transformers import AutoProcessor, AutoTokenizer

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Throughput: enable TF32 on RTX 30/40 or A100 class GPUs
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

random.seed(0); torch.manual_seed(0)

  from .autonotebook import tqdm as notebook_tqdm
  self.setter(val)


<torch._C.Generator at 0x167b8f25870>

In [2]:
import sys, platform
print("Python:", sys.version)
try:
    import torch, transformers
    from transformers import AutoProcessor, LlavaForConditionalGeneration
    print("PyTorch:", torch.__version__)
    print("Transformers:", transformers.__version__)
except Exception as e:
    print("Import check failed:", repr(e))

Python: 3.10.19 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 16:41:31) [MSC v.1929 64 bit (AMD64)]
PyTorch: 2.9.0+cu126
Transformers: 4.57.1


In [3]:
# Config 
POPE_ROOT      = "POPE/output/coco"
COCO_IMG_ROOT  = "val2014"
HF_MODEL_ID    = "llava-hf/llava-1.5-7b-hf"
DEVICE         = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE          = torch.float16 if DEVICE == "cuda" else torch.float32
MAX_NEW_TOKENS = 8
TEMPERATURE    = 0.0
CSV_OUT        = "pope_coco_predictions.csv"

print("DEVICE:", DEVICE)
print("MODEL:", HF_MODEL_ID)
print("POPE_ROOT:", POPE_ROOT)
print("COCO_IMG_ROOT:", COCO_IMG_ROOT)

DEVICE: cuda
MODEL: llava-hf/llava-1.5-7b-hf
POPE_ROOT: POPE/output/coco
COCO_IMG_ROOT: val2014


In [4]:
import json, pathlib, os

YES_TOKENS = {"1","true","yes","y","present","a"}   # include 'a' as some datasets use A/B
NO_TOKENS  = {"0","false","no","n","absent","b"}

def _read_any(path: pathlib.Path):
    txt = path.read_text(encoding="utf-8").strip()
    try:
        obj = json.loads(txt)
        if isinstance(obj, dict) and "data" in obj:
            return obj["data"]
        if isinstance(obj, list):
            return obj
        if isinstance(obj, dict):
            return [obj]
    except json.JSONDecodeError:
        pass
    # JSONL fallback
    return [json.loads(ln) for ln in txt.splitlines() if ln.strip()]

def _canon_answer(row):
    raw = None
    for k in ("answer","label","gt","target","ans","response","y"):
        if k in row:
            raw = row[k]
            break
    if raw is None:
        return None
    # normalize types
    if isinstance(raw, bool):
        return "yes" if raw else "no"
    if isinstance(raw, (int, float)):
        return "yes" if int(raw) == 1 else "no"
    s = str(raw).strip().lower()
    if s in YES_TOKENS: return "yes"
    if s in NO_TOKENS:  return "no"
    if s in {"a","b"}:
        # heuristic: map A->yes, B->no unless 'options' says otherwise
        opts = row.get("options") or row.get("choices")
        if isinstance(opts, dict):
            inv = {v.strip().lower(): k.lower() for k,v in opts.items()}
            # if options say 'yes' maps to 'a' explicitly, honor that
            if "yes" in inv and "no" in inv:
                return "yes" if s == inv["yes"] else "no"
        return "yes" if s == "a" else "no"
    return None

def _to_val2014_filename(x):
    if isinstance(x, int):
        return f"COCO_val2014_{x:012d}.jpg"
    if isinstance(x, str):
        return os.path.basename(x)
    return None

def _canon_image(row):
    for k in ("image","image_id","img","img_id","img_path","file_name","filename","path"):
        if k in row:
            v = row[k]
            fn = _to_val2014_filename(v)
            if fn:
                return fn
    return None

def _canon_question(row):
    for k in ("question","q","text","prompt","instruction"):
        if k in row:
            v = row[k]
            if isinstance(v, str) and v.strip():
                return v.strip()
    return None

def _pick(pope_root: pathlib.Path, key_substr: str):
    cands = [p for p in pope_root.rglob("*.json")] + [p for p in pope_root.rglob("*.jsonl")]
    cands = [p for p in cands if key_substr.lower() in p.name.lower() and "checkpoint" not in p.name.lower()]
    if not cands:
        raise FileNotFoundError(f"No file containing '{key_substr}' under {pope_root}")
    cands.sort(key=lambda p: p.stat().st_size, reverse=True)
    return cands[0]

def load_pope_split_robust(pope_root: str) -> dict:
    root = pathlib.Path(pope_root)
    assert root.exists(), f"POPE_ROOT not found: {root}"

    files = {
        "random": _pick(root, "random"),
        "popular": _pick(root, "popular"),
        "adversarial": _pick(root, "adversarial"),
    }
    print("Using files:")
    for k,p in files.items():
        print(f" - {k}: {p.relative_to(root)} (size {p.stat().st_size/1024:.1f} KB)")

    out = {}
    for split, path in files.items():
        raw_rows = _read_any(path)
        norm = []
        for r in raw_rows:
            img = _canon_image(r)
            q   = _canon_question(r)
            a   = _canon_answer(r)
            if img and q and a in {"yes","no"}:
                norm.append({"image": img, "question": q, "answer": a})
        if not norm:
            # show a few raw rows to help debug
            print(f"\n[WARN] 0 normalized rows for {split}. First raw rows:")
            for sample in raw_rows[:3]:
                print(sample)
            raise RuntimeError(f"0 normalized rows for split '{split}' from {path}")
        out[split] = norm
    return out

In [5]:
pope = load_pope_split_robust(POPE_ROOT)

for split in ("random","popular","adversarial"):
    print(f"{split}: {len(pope[split])} rows")
    print("  sample:", pope[split][0])

Using files:
 - random: coco_pope_random.json (size 362.5 KB)
 - popular: coco_pope_popular.json (size 361.5 KB)
 - adversarial: coco_pope_adversarial.json (size 361.7 KB)
random: 3000 rows
  sample: {'image': 'COCO_val2014_000000310196.jpg', 'question': 'Is there a snowboard in the image?', 'answer': 'yes'}
popular: 3000 rows
  sample: {'image': 'COCO_val2014_000000310196.jpg', 'question': 'Is there a snowboard in the image?', 'answer': 'yes'}
adversarial: 3000 rows
  sample: {'image': 'COCO_val2014_000000310196.jpg', 'question': 'Is there a snowboard in the image?', 'answer': 'yes'}


In [6]:
# Cell 4: model + processor
processor = AutoProcessor.from_pretrained(HF_MODEL_ID)

model = LlavaForConditionalGeneration.from_pretrained(
    HF_MODEL_ID,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
    low_cpu_mem_usage=True,
    device_map="auto" if DEVICE == "cuda" else None
).to(DEVICE)
model.eval()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
`torch_dtype` is deprecated! Use `dtype` instead!

oading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 3/3 [00:10<00:00,  3.51s/it]

LlavaForConditionalGeneration(
  (model): LlavaModel(
    (vision_tower): CLIPVisionModel(
      (vision_model): CLIPVisionTransformer(
        (embeddings): CLIPVisionEmbeddings(
          (patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False)
          (position_embedding): Embedding(577, 1024)
        )
        (pre_layrnorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (encoder): CLIPEncoder(
          (layers): ModuleList(
            (0-23): 24 x CLIPEncoderLayer(
              (self_attn): CLIPAttention(
                (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
                (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
                (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
                (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
              )
              (layer_norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
       

In [29]:
import contextlib, torch
from PIL import Image

def _get_llama_layers_from_llava(model):
    """
    Return the list of decoder layers (LlamaDecoderLayer) robustly across HF LLaVA variants.
    """
    # Common: LlavaForConditionalGeneration has .language_model as LlamaForCausalLM
    if hasattr(model, "language_model"):
        lm = model.language_model
        # Case A: language_model is LlamaForCausalLM with .model (LlamaModel)
        if hasattr(lm, "model") and hasattr(lm.model, "layers"):
            return lm.model.layers
        # Case B: language_model is already LlamaModel
        if hasattr(lm, "layers"):
            return lm.layers
    # Fallbacks (rare)
    if hasattr(model, "model") and hasattr(model.model, "layers"):
        return model.model.layers
    raise AttributeError("Could not locate LLaMA decoder layers in this HF LLaVA build.")

LAYERS = _get_llama_layers_from_llava(model)
NUM_LAYERS = len(LAYERS)
print(f"Found {NUM_LAYERS} decoder layers.")
# Pick a high-ish layer (e.g., 28) safely
STEER_LAYER_IDX = min(28, NUM_LAYERS - 1)  

def _forward_with_layer_capture(inputs, layer_idx):
    """
    Run a forward pass and capture the output of the chosen decoder layer.
    Returns tensor of shape (1, hidden_size) for the **last token**.
    """
    captured = {}
    target_layer = LAYERS[layer_idx]

    def hook_fn(module, inp, out):
        # out: (batch, seq, hidden) -- we take the last token
        captured["h"] = out[:, -1, :].detach()
        return out

    with torch.no_grad(), contextlib.ExitStack() as stack:
        handle = target_layer.register_forward_hook(hook_fn)
        # Use .__enter__() and .__exit__() for handle if contextlib.ExitStack is not available
        stack.enter_context(handle)
        _ = model(**inputs, output_hidden_states=False, use_cache=False)
    return captured.get("h")  # (1, hidden)

@torch.no_grad()
def _layer_hidden_image(question, image, layer_idx):
    prompt = (
        "USER: <image>\n"
        "Answer the question strictly with 'Yes' or 'No'. Do not add any other words.\n"
        f"Question: {question}\nASSISTANT:"
    )
    inputs = processor(images=image, text=prompt, return_tensors="pt").to(DEVICE)
    return _forward_with_layer_capture(inputs, layer_idx)

@torch.no_grad()
def _layer_hidden_langonly(question, layer_idx):
    # no <image> → pure language prior
    prompt = (
        "USER:\n"
        "Answer the question strictly with 'Yes' or 'No'. Do not add any other words.\n"
        f"Question: {question}\nASSISTANT:"
    )
    inputs = processor(text=prompt, return_tensors="pt").to(DEVICE)
    return _forward_with_layer_capture(inputs, layer_idx)

def build_midlayer_vector(pope, coco_img_root, layer_idx=STEER_LAYER_IDX, num_calib=600):
    vecs = []
    rows = pope["random"][:num_calib]
    for r in tqdm(rows, desc=f"Build v @ layer {layer_idx}", leave=False):
        p = os.path.join(coco_img_root, r["image"])
        if not os.path.exists(p): 
            continue
        img = Image.open(p).convert("RGB")
        h_img  = _layer_hidden_image(r["question"], img, layer_idx)
        h_lang = _layer_hidden_langonly(r["question"], layer_idx)
        if h_img is None or h_lang is None:
            continue
        vecs.append((h_img - h_lang).cpu())
    if not vecs:
        raise RuntimeError("No calibration vectors collected — check image paths/split names.")
    v = torch.stack(vecs).mean(0).squeeze(0)
    v = v / (v.norm(p=2) + 1e-8)
    return v

Found 32 decoder layers.


In [24]:
from tqdm import tqdm

# --- Run this cell to build the vector ---
steer_v_mid = build_midlayer_vector(pope, COCO_IMG_ROOT, layer_idx=STEER_LAYER_IDX, num_calib=600)
steer_v_mid = steer_v_mid.to(model.device, dtype=model.dtype) # Ensure it's on the right device/dtype
print("mid-layer v:", tuple(steer_v_mid.shape), "layer idx:", STEER_LAYER_IDX)

                                                                                                                       

mid-layer v: (4096,) layer idx: 28




In [25]:
# prompt + yes/no ids
def yn_prompt(question: str) -> str:
    return (
        "USER: <image>\n"
        "Answer the question strictly with 'Yes' or 'No'. Do not add any other words.\n"
        f"Question: {question}\nASSISTANT:"
    )

def get_yes_no_ids(tokenizer):
    yes_ids = [tokenizer.convert_tokens_to_ids("Yes"),
               tokenizer.convert_tokens_to_ids("yes")]
    no_ids  = [tokenizer.convert_tokens_to_ids("No"),
               tokenizer.convert_tokens_to_ids("no")]

    yes_ids = [i for i in yes_ids if i is not None and i >= 0]
    no_ids  = [i for i in no_ids  if i is not None and i >= 0]

    # Fallback: first subtoken if convert_tokens_to_ids fails
    if not yes_ids:
        tid = tokenizer("Yes", add_special_tokens=False).input_ids
        if tid: yes_ids = [tid[0]]
    if not no_ids:
        tid = tokenizer("No", add_special_tokens=False).input_ids
        if tid: no_ids = [tid[0]]

    if not yes_ids or not no_ids:
        raise RuntimeError("Could not build YES/NO token id sets.")

    # dedupe
    yes_ids = list(dict.fromkeys(yes_ids))
    no_ids  = list(dict.fromkeys(no_ids))
    return yes_ids, no_ids

YES_IDS, NO_IDS = get_yes_no_ids(processor.tokenizer)

In [26]:
import time
from dataclasses import dataclass
from typing import List

# --- From Cell 6 (Metrics + evaluator) ---

@dataclass
class Metrics:
    tp:int=0; tn:int=0; fp:int=0; fn:int=0
    def scores(self):
        acc = (self.tp + self.tn) / max(1, self.tp + self.tn + self.fp + self.fn)
        prec = self.tp / max(1, self.tp + self.fp)
        rec  = self.tp / max(1, self.tp + self.fn)
        f1   = 0.0 if (prec+rec)==0 else 2*prec*rec/(prec+rec)
        return {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1": f1}

def eval_rows(rows: List[dict], coco_img_root: str, infer_fn) -> dict:
    m = Metrics()
    for r in tqdm(rows, leave=False, desc="Evaluating"):
        img_file = r["image"]
        img_path = img_file if os.path.isabs(img_file) else os.path.join(coco_img_root, img_file)
        if not os.path.exists(img_path):
            continue  # skip missing files
        
        # Use the compatibility normalizer to ensure output is 'yes' or 'no'
        pred = normalize_yesno_compat(infer_fn(img_path, r["question"]))
        gt   = r["answer"]
        
        if   pred=="yes" and gt=="yes": m.tp += 1
        elif pred=="no"  and gt=="no" : m.tn += 1
        elif pred=="yes" and gt=="no" : m.fp += 1
        elif pred=="no"  and gt=="yes": m.fn += 1
    return m.scores()

def pct(d):  # pretty-print as percentages
    return {k: round(v*100, 2) for k,v in d.items()}

# --- From Cell 8 (Full POPE run + report) ---

def run_full_pope(pope, coco_img_root, infer_fn):
    results = {}
    counts  = {}
    all_rows = []
    t0 = time.time()
    for split in ("random","popular","adversarial"):
        print(f"\nRunning split: {split}")
        s = eval_rows(pope[split], coco_img_root, infer_fn)
        results[split] = s
        all_rows.extend(pope[split])
    
    # Re-run eval on all rows combined for a true overall score
    print(f"\nRunning split: Overall")
    overall = eval_rows(all_rows, coco_img_root, infer_fn)
    t1 = time.time()

    print("\nPOPE (COCO) results:")
    for k in ("random","popular","adversarial"):
        print(f"- {k.title():12}:", pct(results[k]))
    print(f"- Overall     :", pct(overall))
    print(f"\nTotal time: {t1 - t0:.1f}s for {len(all_rows)} items")
    return results, overall

# --- Add the 'normalize_yesno_compat' function ---
# This ensures the output of infer_fn matches the evaluator's 'yes'/'no' strings
def normalize_yesno_compat(s: str) -> str:
    """Return exactly 'yes' or 'no' to match eval_rows expectations."""
    t = (s or "").strip().lower()
    # common variants
    if t.startswith("y"): return "yes"
    if t.startswith("n"): return "no"
    # fallback: search tokens
    if "yes" in t: return "yes"
    if "no"  in t: return "no"
    # absolute fallback: default to 'no'
    return "no"

print("Evaluation functions (Metrics, eval_rows, pct, run_full_pope) are defined.")

Evaluation functions (Metrics, eval_rows, pct, run_full_pope) are defined.


In [33]:
import numpy as np
from PIL import Image, ImageFilter

def _make_contrast_image(
    image: Image.Image,
    mode: str = "black",
    mask_ratio: float = 0.9,
    seed: int | None = 1337,
):
    """
    Build a contrast image to *deflate* spurious cues for VCD.

    Args:
      image: PIL.Image (RGB recommended)
      mode: one of {"black", "gaussian", "random_mask"}
        - "black": return a solid-black image of same size
        - "gaussian": return pure Gaussian-noise image (destroy content)
        - "random_mask": randomly mask out `mask_ratio` of pixels to black
      mask_ratio: for modes that use masking (0.0..1.0). 0.9 -> mask 90% pixels.
      seed: optional RNG seed for reproducibility

    Returns:
      PIL.Image (RGB), same size as input
    """
    assert 0.0 <= mask_ratio <= 1.0, "mask_ratio must be in [0, 1]"
    img = image.convert("RGB")
    W, H = img.size

    if mode == "black":
        return Image.new("RGB", (W, H), color=(0, 0, 0))

    if seed is not None:
        rng = np.random.default_rng(seed)
    else:
        rng = np.random.default_rng()

    if mode == "gaussian":
        # Pure Gaussian noise image (mean 0.5, std 0.25, clipped to [0,1])
        noise = rng.normal(loc=0.5, scale=0.25, size=(H, W, 3))
        noise = np.clip(noise, 0.0, 1.0)
        arr = (noise * 255).astype(np.uint8)
        return Image.fromarray(arr, mode="RGB")

    if mode == "random_mask":
        # Mask `mask_ratio` fraction of pixels to black; keep the rest unchanged.
        arr = np.array(img, dtype=np.uint8)
        # Bernoulli mask shape (H, W, 1): True => keep, False => mask to black
        keep_prob = 1.0 - mask_ratio
        keep = rng.random((H, W, 1)) < keep_prob
        masked = arr.copy()
        masked[~keep.repeat(3, axis=2)] = 0
        return Image.fromarray(masked, mode="RGB")

    raise ValueError(f"Unknown contrast mode: {mode!r}. "
                     "Use one of: 'black', 'gaussian', 'random_mask'.")
    
def _logits_last_token(image, prompt):
    """
    Runs the model once on an <image> + prompt and returns the logits
    at the final (last) token position.
    """
    # Tokenize with image token inserted
    inputs = processor(
        text=prompt,
        images=image,
        return_tensors="pt"
    ).to(model.device)

    # Forward pass
    out = model(**inputs, output_hidden_states=False)
    
    # Extract logits of the last token
    logits = out.logits[:, -1, :]  # shape: [1, vocab_size]
    return logits

    import torch
from contextlib import ExitStack

@torch.no_grad()
def _asd_logits(
    image,
    prompt,
    steer_layers,              # int or list[int] (0-indexed decoder block ids)
    steer_v: torch.Tensor,     # shape: [hidden_size]
    lam_pos: float = 0.2,      # kept for API compat; not used in this minimal ASD
    lam_neg: float = 0.35,     # kept for API compat; not used in this minimal ASD
    alpha: float   = 1.0,      # steering strength
):
    """
    Minimal ASD: add alpha * v at the chosen decoder layer(s), then get final-token logits.
    - steer_layers: int or list of ints (e.g., 26, or [26,27,28])
    - steer_v: steering vector (will be L2-normalized and broadcast over [B,T,H])
    Returns: logits[:, -1, :]  (shape [1, vocab_size])
    """
    # Normalize & move v
    v = steer_v.to(model.device)
    v = v / (v.norm(p=2) + 1e-9)  # L2-normalize
    v = v.view(1, 1, -1)          # for broadcasting onto [B, T, H]

    # Ensure list of layers
    if isinstance(steer_layers, int):
        layer_ids = [steer_layers]
    else:
        layer_ids = list(steer_layers)

    # Distribute alpha across layers if multiple
    per_layer_alpha = alpha / max(1, len(layer_ids))

    # Build inputs
    inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)

    # ---- Hook that injects the steering vector on the residual stream ----
    def _hook_add_v(_module, _inp, out):
        # out: [B,T,H] or tuple(..., hidden_states)
        h = out[0] if isinstance(out, tuple) else out
        return h + per_layer_alpha * v

    # Get decoder blocks; adjust this path if your model differs
    try:
        decoder_layers = model.model.layers
    except AttributeError:
        # Some variants use model.model.decoder.layers or model.model.transformer.layers
        # If you hit this path, inspect(model) to locate your decoder blocks.
        decoder_layers = model.model.decoder.layers

    # Register hooks, run forward, remove hooks
    with ExitStack() as stack:
        handles = []
        for L in layer_ids:
            handles.append(decoder_layers[L].register_forward_hook(_hook_add_v))
        out = model(**inputs, output_hidden_states=False)
        for h in handles:
            h.remove()

    return out.logits[:, -1, :]  # final-token logits

In [38]:
@torch.no_grad()
def infer_yesno_vcd_asd(
    img_path, question,
    steer_layers, steer_v,
    lam_pos=0.2, lam_neg=0.35, alpha=1.0,    # ASD params
    gamma=1.0, contrast_mode="black", mask_ratio=0.9  # VCD params
):
    # Dependencies assumed: yn_prompt, _asd_logits, _logits_last_token, _make_contrast_image,
    # YES_IDS, NO_IDS, processor/model on GPU.
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question)

    # Good logits: ASD on original image
    logits_main = _asd_logits(
        image=img, prompt=prompt,
        steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )

    # Bad logits: base model on contrast image (NO steering)
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio)
    logits_con = _logits_last_token(image=cimg, prompt=prompt)

    # VCD combine
    logits = logits_main - gamma * logits_con

    # Decode yes/no
    probs = torch.softmax(logits, dim=-1)[0]
    p_yes = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no  = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    return "yes" if float(p_yes) >= float(p_no) else "no"


try:
    infer_yesno_vcd_asd_FIXED
    infer_yesno_vcd_asd = infer_yesno_vcd_asd_FIXED
except NameError:
    pass


def _normalize_yesno_compat(x: str) -> str:
    x = (x or "").strip().lower()
    if x in ("y", "yes", "yeah", "yep", "true"): return "yes"
    if x in ("n", "no", "nope", "false"):         return "no"
    # fall back to majority class "no" (POPE default is often balanced; pick one deterministically)
    return "no"

try:
    normalize_yesno_compat
except NameError:
    normalize_yesno_compat = _normalize_yesno_compat


try:
    STEER_LAYERS = STEER_LAYER_IDX
except NameError:
    try:
        STEER_LAYERS = [STEER_LAYER_IDX]  # single int -> list
    except NameError:
        STEER_LAYERS = [28]  # sane default for LLaVA/Vicuna-family

In [12]:
import time
from tqdm import tqdm

@torch.no_grad()
def infer_yesno_vcd_asd_FIXED(
    img_path, question,
    steer_layers, steer_v,
    lam_pos=0.1, lam_neg=0.35, alpha=1.0,    # ASD params
    gamma=0.6, contrast_mode="black", mask_ratio=0.9 # VCD params
):
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question) # Uses yn_prompt from Cell 18
    
    # --- Good Logits (Main image + ASD steering) ---
    logits_main = _asd_logits(
        image=img, prompt=prompt, steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )
    
    # --- Bad Logits (Contrast image, NO steering) ---
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio) # Uses _make_contrast_image from Cell 18
    
    # [THIS IS THE FIX]
    # We use the base model's logits, not the ASD-steered logits
    logits_con = _logits_last_token(image=cimg, prompt=prompt) # Uses _logits_last_token from Cell 18
    
    # --- Final VCD Logits ---
    logits = logits_main - gamma * logits_con
    
    # Decode yes/no
    probs  = torch.softmax(logits, dim=-1)[0]
    p_yes  = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no   = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    
    return "yes" if float(p_yes) >= float(p_no) else "no"


# ===== 2) Define the inference function for the evaluator =====
def infer_fn_smoke_test_v2(p, q):
    return infer_yesno_vcd_asd_FIXED(
        img_path=p, question=q,
        steer_layers=STEER_LAYER_IDX, steer_v=steer_v_mid, # Assumes these are in memory
        lam_pos=0.1, lam_neg=0.35, alpha=1.0,
        gamma=0.6, contrast_mode="black" # New gamma baseline
    )

# ===== 3) Create a 300-sample test set =====
smoke_test_samples = (
    pope["random"][:100] + 
    pope["popular"][:100] + 
    pope["adversarial"][:100]
)
print(f"Running smoke test (v2, Corrected Logic) on {len(smoke_test_samples)} samples...")

# ===== 4) Run the evaluation =====
t0 = time.time()

# Use the 'eval_rows' function defined earlier (in Cell 10 or Cell 18)
smoke_results = eval_rows(smoke_test_samples, COCO_IMG_ROOT, infer_fn_smoke_test_v2)
t1 = time.time()

print("\n--- Smoke Test Results (v2, Corrected Logic, gamma=0.6) ---")
pretty_results = {k: round(v * 100, 2) for k, v in smoke_results.items()}
print(f"Overall (300 samples): {pretty_results}")
print(f"\nTotal time: {t1 - t0:.1f}s")

Running smoke test (v2, Corrected Logic) on 300 samples...

--- Smoke Test Results (v2, Corrected Logic, gamma=0.6) ---
Overall (300 samples): {'acc': 74.0, 'precision': 67.14, 'recall': 94.0, 'f1': 78.33}

Total time: 107.5s


In [43]:
import time
from tqdm import tqdm # Make sure tqdm is imported if not already

# ===== 1) Define the CORRECTED inference function =====
# This function contrasts ASD(img) with BaseModel(cimg)
@torch.no_grad()
def infer_yesno_vcd_asd_FIXED(
    img_path, question,
    steer_layers, steer_v,
    lam_pos=0.2, lam_neg=0.35, alpha=4.0,    # ASD params
    gamma=0, contrast_mode="black", mask_ratio=0.9 # VCD params
):
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question) # Uses yn_prompt from Cell 18
    
    # --- Good Logits (Main image + ASD steering) ---
    logits_main = _asd_logits(
        image=img, prompt=prompt, steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )
    
    # --- Bad Logits (Contrast image, NO steering) ---
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio) # Uses _make_contrast_image from Cell 18
    
    # [THIS IS THE FIX]
    # We use the base model's logits, not the ASD-steered logits
    logits_con = _logits_last_token(image=cimg, prompt=prompt) # Uses _logits_last_token from Cell 18
    
    # --- Final VCD Logits ---
    logits = logits_main - gamma * logits_con
    
    # Decode yes/no
    probs  = torch.softmax(logits, dim=-1)[0]
    p_yes  = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no   = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    
    return "yes" if float(p_yes) >= float(p_no) else "no"


# ===== 2) Define the inference function for the evaluator =====
def infer_fn_smoke_test_v2(p, q):
    return infer_yesno_vcd_asd_FIXED(
        img_path=p, question=q,
        steer_layers=STEER_LAYER_IDX, steer_v=steer_v_mid, # Assumes these are in memory
        lam_pos=0.2, lam_neg=0.35, alpha=1.0,
        gamma=0, contrast_mode="black" # New gamma baseline
    )

# ===== 3) Create a 300-sample test set =====
smoke_test_samples = (
    pope["random"][:100] + 
    pope["popular"][:100] + 
    pope["adversarial"][:100]
)
print(f"Running smoke test (v2, Corrected Logic) on {len(smoke_test_samples)} samples...")

# ===== 4) Run the evaluation =====
t0 = time.time()

# Use the 'eval_rows' function defined earlier (in Cell 10 or Cell 18)
smoke_results = eval_rows(smoke_test_samples, COCO_IMG_ROOT, infer_fn_smoke_test_v2)
t1 = time.time()

print("\n--- Smoke Test Results (v2, Corrected Logic) ---")
pretty_results = {k: round(v * 100, 2) for k, v in smoke_results.items()}
print(f"Overall (300 samples): {pretty_results}")
print(f"\nTotal time: {t1 - t0:.1f}s")

Running smoke test (v2, Corrected Logic) on 300 samples...


                                                                                                                       


--- Smoke Test Results (v2, Corrected Logic) ---
Overall (300 samples): {'Accuracy': 84.33, 'Precision': 89.31, 'Recall': 78.0, 'F1': 83.27}

Total time: 74.9s




In [18]:
# ===== VCD-ONLY SMOKE TEST =====
import time
from PIL import Image
import torch
from tqdm import tqdm # Make sure tqdm is imported if not already

# 1) Pure VCD yes/no inference (no ASD, no steering vectors at all)
@torch.no_grad()
def infer_yesno_vcd_only(
    img_path: str,
    question: str,
    gamma: float = 1.0,            # VCD strength
    contrast_mode: str = "black",  # e.g. "black", "gaussian", "random_mask"
    mask_ratio: float = 0.9        # used by modes that support masking
):
    """
    Implements Visual Contrastive Decoding for yes/no POPE prompts:
      logits = logits(base image) - gamma * logits(contrast image)

    Assumes availability of:
      - yn_prompt(question)            -> str
      - _logits_last_token(image,prompt) -> Tensor [1, vocab]
      - _make_contrast_image(image, mode, mask_ratio) -> PIL.Image
      - YES_IDS, NO_IDS (lists of token ids)
    """
    # Prep prompt and images
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question)

    # Base (faithful) logits
    logits_base = _logits_last_token(image=img, prompt=prompt)   # [1, vocab]

    # Contrast (unfaithful) logits
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio)
    logits_con = _logits_last_token(image=cimg, prompt=prompt)   # [1, vocab]

    # VCD combination
    logits = logits_base - gamma * logits_con

    # Decode Yes/No
    probs = torch.softmax(logits, dim=-1)[0]
    p_yes = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no  = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    return "yes" if float(p_yes) >= float(p_no) else "no"


# 2) Wrap for the evaluator
def infer_fn_vcd_smoke(p, q):
    return infer_yesno_vcd_only(
        img_path=p,
        question=q,
        gamma=1.0,             # tweakable: try 0.5, 1.0, 2.0
        contrast_mode="black", # tweakable: "black", "random_mask", etc.
        mask_ratio=0.9
    )

# 3) 300-sample smoke set (100 from each POPE split)
smoke_test_samples = (
    pope["random"][:100] +
    pope["popular"][:100] +
    pope["adversarial"][:100]
)
print(f"Running VCD-only smoke test on {len(smoke_test_samples)} samples...")

# 4) Evaluate
t0 = time.time()
smoke_results = eval_rows(smoke_test_samples, COCO_IMG_ROOT, infer_fn_vcd_smoke)
t1 = time.time()

pretty = {k: round(v * 100, 2) for k, v in smoke_results.items()}
print("\n--- VCD-ONLY Smoke Test Results ---")
print(f"Overall (300 samples): {pretty}")
print(f"Total time: {t1 - t0:.1f}s")


from itertools import product
gammas = [0.5, 1.0, 2.0]
modes  = ["black", "random_mask"]   # add other modes your _make_contrast_image supports

for g, m in product(gammas, modes):
    def _infer(p, q, _g=g, _m=m):
        return infer_yesno_vcd_only(p, q, gamma=_g, contrast_mode=_m, mask_ratio=0.9)
    print(f"\n[SWEEP] VCD gamma={g}, mode='{m}'")
    t0 = time.time()
    r = eval_rows(smoke_test_samples, COCO_IMG_ROOT, _infer)
    t1 = time.time()
    r_pct = {k: round(v * 100, 2) for k, v in r.items()}
    print(f"Overall: {r_pct} | Time: {t1 - t0:.1f}s")

Running VCD-only smoke test on 300 samples...


                                                                                                                       


--- VCD-ONLY Smoke Test Results ---
Overall (300 samples): {'Accuracy': 60.67, 'Precision': 55.97, 'Recall': 100.0, 'F1': 71.77}
Total time: 72.0s

[SWEEP] VCD gamma=0.5, mode='black'


                                                                                                                       

Overall: {'Accuracy': 82.33, 'Precision': 78.03, 'Recall': 90.0, 'F1': 83.59} | Time: 72.0s

[SWEEP] VCD gamma=0.5, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 83.33, 'Precision': 82.89, 'Recall': 84.0, 'F1': 83.44} | Time: 73.0s

[SWEEP] VCD gamma=1.0, mode='black'


                                                                                                                       

Overall: {'Accuracy': 60.67, 'Precision': 55.97, 'Recall': 100.0, 'F1': 71.77} | Time: 72.1s

[SWEEP] VCD gamma=1.0, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 71.0, 'Precision': 66.67, 'Recall': 84.0, 'F1': 74.34} | Time: 71.9s

[SWEEP] VCD gamma=2.0, mode='black'


                                                                                                                       

Overall: {'Accuracy': 50.0, 'Precision': 50.0, 'Recall': 100.0, 'F1': 66.67} | Time: 71.8s

[SWEEP] VCD gamma=2.0, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 49.33, 'Precision': 49.66, 'Recall': 98.0, 'F1': 65.92} | Time: 72.0s




In [19]:
gammas = [0.2, 0, -0.5]
modes  = ["black", "random_mask"]   # add other modes your _make_contrast_image supports

for g, m in product(gammas, modes):
    def _infer(p, q, _g=g, _m=m):
        return infer_yesno_vcd_only(p, q, gamma=_g, contrast_mode=_m, mask_ratio=0.9)
    print(f"\n[SWEEP] VCD gamma={g}, mode='{m}'")
    t0 = time.time()
    r = eval_rows(smoke_test_samples, COCO_IMG_ROOT, _infer)
    t1 = time.time()
    r_pct = {k: round(v * 100, 2) for k, v in r.items()}
    print(f"Overall: {r_pct} | Time: {t1 - t0:.1f}s")


[SWEEP] VCD gamma=0.2, mode='black'


                                                                                                                       

Overall: {'Accuracy': 85.67, 'Precision': 86.9, 'Recall': 84.0, 'F1': 85.42} | Time: 72.8s

[SWEEP] VCD gamma=0.2, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 85.67, 'Precision': 88.49, 'Recall': 82.0, 'F1': 85.12} | Time: 72.7s

[SWEEP] VCD gamma=0, mode='black'


                                                                                                                       

Overall: {'Accuracy': 84.33, 'Precision': 89.31, 'Recall': 78.0, 'F1': 83.27} | Time: 72.5s

[SWEEP] VCD gamma=0, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 84.33, 'Precision': 89.31, 'Recall': 78.0, 'F1': 83.27} | Time: 73.0s

[SWEEP] VCD gamma=-0.5, mode='black'


                                                                                                                       

Overall: {'Accuracy': 50.0, 'Precision': 50.0, 'Recall': 100.0, 'F1': 66.67} | Time: 73.5s

[SWEEP] VCD gamma=-0.5, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 50.0, 'Precision': 50.0, 'Recall': 100.0, 'F1': 66.67} | Time: 72.6s




In [20]:
gammas = [0.15, 0.1, 0.05]
modes  = ["black", "random_mask"]   # add other modes your _make_contrast_image supports

for g, m in product(gammas, modes):
    def _infer(p, q, _g=g, _m=m):
        return infer_yesno_vcd_only(p, q, gamma=_g, contrast_mode=_m, mask_ratio=0.9)
    print(f"\n[SWEEP] VCD gamma={g}, mode='{m}'")
    t0 = time.time()
    r = eval_rows(smoke_test_samples, COCO_IMG_ROOT, _infer)
    t1 = time.time()
    r_pct = {k: round(v * 100, 2) for k, v in r.items()}
    print(f"Overall: {r_pct} | Time: {t1 - t0:.1f}s")


[SWEEP] VCD gamma=0.15, mode='black'


                                                                                                                       

Overall: {'Accuracy': 85.67, 'Precision': 86.9, 'Recall': 84.0, 'F1': 85.42} | Time: 72.2s

[SWEEP] VCD gamma=0.15, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 85.33, 'Precision': 89.55, 'Recall': 80.0, 'F1': 84.51} | Time: 72.5s

[SWEEP] VCD gamma=0.1, mode='black'


                                                                                                                       

Overall: {'Accuracy': 85.67, 'Precision': 88.49, 'Recall': 82.0, 'F1': 85.12} | Time: 74.7s

[SWEEP] VCD gamma=0.1, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 85.33, 'Precision': 89.55, 'Recall': 80.0, 'F1': 84.51} | Time: 74.9s

[SWEEP] VCD gamma=0.05, mode='black'


                                                                                                                       

Overall: {'Accuracy': 85.33, 'Precision': 89.55, 'Recall': 80.0, 'F1': 84.51} | Time: 76.2s

[SWEEP] VCD gamma=0.05, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 84.33, 'Precision': 89.31, 'Recall': 78.0, 'F1': 83.27} | Time: 75.1s




In [21]:
gammas = [0.25, 0.3, 0.4]
modes  = ["black", "random_mask"]   # add other modes your _make_contrast_image supports

for g, m in product(gammas, modes):
    def _infer(p, q, _g=g, _m=m):
        return infer_yesno_vcd_only(p, q, gamma=_g, contrast_mode=_m, mask_ratio=0.9)
    print(f"\n[SWEEP] VCD gamma={g}, mode='{m}'")
    t0 = time.time()
    r = eval_rows(smoke_test_samples, COCO_IMG_ROOT, _infer)
    t1 = time.time()
    r_pct = {k: round(v * 100, 2) for k, v in r.items()}
    print(f"Overall: {r_pct} | Time: {t1 - t0:.1f}s")


[SWEEP] VCD gamma=0.25, mode='black'


                                                                                                                       

Overall: {'Accuracy': 86.0, 'Precision': 86.0, 'Recall': 86.0, 'F1': 86.0} | Time: 72.3s

[SWEEP] VCD gamma=0.25, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 85.67, 'Precision': 88.49, 'Recall': 82.0, 'F1': 85.12} | Time: 72.5s

[SWEEP] VCD gamma=0.3, mode='black'


                                                                                                                       

Overall: {'Accuracy': 85.0, 'Precision': 84.31, 'Recall': 86.0, 'F1': 85.15} | Time: 72.6s

[SWEEP] VCD gamma=0.3, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 84.67, 'Precision': 86.62, 'Recall': 82.0, 'F1': 84.25} | Time: 72.7s

[SWEEP] VCD gamma=0.4, mode='black'


                                                                                                                       

Overall: {'Accuracy': 84.0, 'Precision': 81.48, 'Recall': 88.0, 'F1': 84.62} | Time: 73.1s

[SWEEP] VCD gamma=0.4, mode='random_mask'


                                                                                                                       

Overall: {'Accuracy': 84.0, 'Precision': 84.0, 'Recall': 84.0, 'F1': 84.0} | Time: 74.7s




In [41]:
import torch
from PIL import Image
from contextlib import ExitStack

def _find_decoder_layers(_model):
    """
    Return (layers_modulelist, qualified_name) for the *language decoder* layers.
    We scan modules and pick the longest ModuleList named 'layers'.
    """
    import torch.nn as nn
    candidates = []
    for name, mod in _model.named_modules():
        if hasattr(mod, "layers") and isinstance(getattr(mod, "layers"), nn.ModuleList):
            L = getattr(mod, "layers")
            if len(L) >= 8:  # avoid tiny LayerNorm stacks etc.
                candidates.append((name, L))
    if not candidates:
        raise AttributeError(
            "Could not find a '.layers' ModuleList inside the model. "
            "Run: print([n for n,_ in model.named_modules() if n.endswith('layers')])"
        )
    # choose the longest one (usually the language model decoder: length 32)
    name, layers = max(candidates, key=lambda x: len(x[1]))
    return layers, name

# Cache the decoder layers once
_DECODER_LAYERS, _DECODER_LAYERS_NAME = _find_decoder_layers(model)

# 1) Minimal ASD logits (inject alpha * v at chosen layer(s))
@torch.no_grad()
def _asd_logits(
    image,
    prompt,
    steer_layers,              # int or list[int], e.g., 28 or [26,27,28]
    steer_v: torch.Tensor,     # shape [hidden_size]
    lam_pos: float = 0.2,      # kept for API compat
    lam_neg: float = 0.35,     # kept for API compat
    alpha: float   = 1.0,
):
    # normalize v and shape to broadcast
    v = steer_v.to(model.device)
    v = v / (v.norm(p=2) + 1e-9)
    v = v.view(1, 1, -1)

    # ensure list of layer ids
    if isinstance(steer_layers, int):
        layer_ids = [steer_layers]
    else:
        layer_ids = list(steer_layers)

    per_layer_alpha = alpha / max(1, len(layer_ids))

    # inputs
    inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)

    def _hook_add_v(_module, _inp, out):
        h = out[0] if isinstance(out, tuple) else out
        return h + per_layer_alpha * v

    # register hooks on the located decoder layers
    with ExitStack() as stack:
        handles = []
        for L in layer_ids:
            handles.append(_DECODER_LAYERS[L].register_forward_hook(_hook_add_v))
        out = model(**inputs, output_hidden_states=False)
        for h in handles:
            h.remove()

    return out.logits[:, -1, :]  # [1, vocab]

# 2) VCD+ASD inference with your exact function name
@torch.no_grad()
def infer_yesno_vcd_asd(
    img_path, question,
    steer_layers, steer_v,
    lam_pos=0.2, lam_neg=0.35, alpha=1.0,     # ASD
    gamma=1.0, contrast_mode="black", mask_ratio=0.9  # VCD
):
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question)

    # Good path: ASD on the original
    logits_main = _asd_logits(
        image=img, prompt=prompt,
        steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )

    # Contrast path: base (UNSTEERED) on contrast image
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio)
    logits_con = _logits_last_token(image=cimg, prompt=prompt)

    # VCD combine
    logits = logits_main - gamma * logits_con

    # Decode yes/no with your existing IDs
    probs = torch.softmax(logits, dim=-1)[0]
    p_yes = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no  = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    return "yes" if float(p_yes) >= float(p_no) else "no"

# 3) A smoke wrapper that uses your exact variable names (STEER_LAYER_IDX, steer_v_mid)
def infer_fn_smoke_test(p, q):
    return infer_yesno_vcd_asd(
        img_path=p, question=q,
        steer_layers=STEER_LAYER_IDX, steer_v=steer_v_mid,
        lam_pos=0.2, lam_neg=0.35, alpha=1.0,
        gamma=0.9, contrast_mode="black", mask_ratio=0.9
    )

In [42]:
# ===== FULL POPE (3000) EVAL: ASD + VCD =====
import time, csv, os
from PIL import Image
import torch

# -------------------------
# Config (edit as needed)
# -------------------------
STEER_LAYERS = STEER_LAYER_IDX   # e.g., [26, 27, 28] or a single int like 27
STEER_V      = steer_v_mid        # your mid-layer steering vector (Tensor)

# ASD params (typical good starting points)
LAM_POS = 0.20
LAM_NEG = 0.35
ALPHA   = 4.0

# VCD params
GAMMA          = 0.2             # try 0.5–2.0
CONTRAST_MODE  = "black"          # "black", "gaussian", or "random_mask"
MASK_RATIO     = 0.90             # used when mode supports masking

# -------------------------
# Inference: ASD + VCD (Fixed)
# -------------------------
@torch.no_grad()
def infer_yesno_asd_plus_vcd(
    img_path: str,
    question: str,
    steer_layers,
    steer_v: torch.Tensor,
    lam_pos: float = 0.2,
    lam_neg: float = 0.35,
    alpha: float   = 1.0,
    gamma: float   = 1.0,
    contrast_mode: str = "black",
    mask_ratio: float  = 0.9,
):
    """
    Final logits = ASD(image) - gamma * Base(contrast_image).
    Contrast branch is intentionally unsteered.
    """
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question)

    # Good logits: ASD-steered on the original image
    logits_main = _asd_logits(
        image=img, prompt=prompt,
        steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )  # Tensor [1, vocab]

    # Bad logits: base model on contrast image (NO steering)
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio)
    logits_con = _logits_last_token(image=cimg, prompt=prompt)  # Tensor [1, vocab]

    # VCD combine
    logits = logits_main - gamma * logits_con

    # Decode Yes/No
    probs = torch.softmax(logits, dim=-1)[0]
    p_yes = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no  = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    return "yes" if float(p_yes) >= float(p_no) else "no"


# -------------------------
# Evaluator wrapper
# -------------------------
def infer_fn_asd_vcd(p, q):
    return infer_yesno_asd_plus_vcd(
        img_path=p, question=q,
        steer_layers=STEER_LAYERS, steer_v=STEER_V,
        lam_pos=LAM_POS, lam_neg=LAM_NEG, alpha=ALPHA,
        gamma=GAMMA, contrast_mode=CONTRAST_MODE, mask_ratio=MASK_RATIO
    )


# -------------------------
# Full run on all 3 splits
# -------------------------
all_rows = []
print("Preparing POPE rows...")
for split in ("random", "popular", "adversarial"):
    all_rows.extend(pope[split])

print(f"Starting full POPE eval with ASD+VCD on {len(all_rows)} items...")
t0 = time.time()

# Evaluate per-split for clarity
results = {}
for split in ("random", "popular", "adversarial"):
    print(f"\nRunning split: {split}")
    s = eval_rows(pope[split], COCO_IMG_ROOT, infer_fn_asd_vcd)
    results[split] = {k: round(v * 100, 2) for k, v in s.items()}
    print(f"{split} => {results[split]}")

# Overall
overall = eval_rows(all_rows, COCO_IMG_ROOT, infer_fn_asd_vcd)
overall_pct = {k: round(v * 100, 2) for k, v in overall.items()}

t1 = time.time()
print("\n=== ASD + VCD (Fixed) | FULL 3000 RESULTS ===")
for split in ("random", "popular", "adversarial"):
    print(f"{split.capitalize():>12}: {results[split]}")
print(f"{'Overall':>12}: {overall_pct}")
print(f"Total time: {t1 - t0:.1f}s")

Preparing POPE rows...
Starting full POPE eval with ASD+VCD on 9000 items...

Running split: random


                                                                                                                       

random => {'Accuracy': 88.87, 'Precision': 95.69, 'Recall': 81.4, 'F1': 87.97}

Running split: popular


                                                                                                                       

popular => {'Accuracy': 86.83, 'Precision': 91.32, 'Recall': 81.4, 'F1': 86.08}

Running split: adversarial


                                                                                                                       

adversarial => {'Accuracy': 83.8, 'Precision': 85.5, 'Recall': 81.4, 'F1': 83.4}


                                                                                                                       


=== ASD + VCD (Fixed) | FULL 3000 RESULTS ===
      Random: {'Accuracy': 88.87, 'Precision': 95.69, 'Recall': 81.4, 'F1': 87.97}
     Popular: {'Accuracy': 86.83, 'Precision': 91.32, 'Recall': 81.4, 'F1': 86.08}
 Adversarial: {'Accuracy': 83.8, 'Precision': 85.5, 'Recall': 81.4, 'F1': 83.4}
     Overall: {'Accuracy': 86.5, 'Precision': 90.65, 'Recall': 81.4, 'F1': 85.77}
Total time: 4360.8s




In [51]:
# ===== FULL POPE (3000) EVAL: ASD + VCD =====
import time, csv, os
from PIL import Image
import torch

# -------------------------
# Config (edit as needed)
# -------------------------
STEER_LAYERS = STEER_LAYER_IDX   # e.g., [26, 27, 28] or a single int like 27
STEER_V      = steer_v_mid        # your mid-layer steering vector (Tensor)

# ASD params (typical good starting points)
LAM_POS = 0.20
LAM_NEG = 0.35
ALPHA   = 1.0

# VCD params
GAMMA          = 0.25             # try 0.5–2.0
CONTRAST_MODE  = "black"          # "black", "gaussian", or "random_mask"
MASK_RATIO     = 0.90             # used when mode supports masking

# -------------------------
# Inference: ASD + VCD (Fixed)
# -------------------------
@torch.no_grad()
def infer_yesno_asd_plus_vcd(
    img_path: str,
    question: str,
    steer_layers,
    steer_v: torch.Tensor,
    lam_pos: float = 0.2,
    lam_neg: float = 0.35,
    alpha: float   = 1.0,
    gamma: float   = 1.0,
    contrast_mode: str = "black",
    mask_ratio: float  = 0.9,
):
    """
    Final logits = ASD(image) - gamma * Base(contrast_image).
    Contrast branch is intentionally unsteered.
    """
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question)

    # Good logits: ASD-steered on the original image
    logits_main = _asd_logits(
        image=img, prompt=prompt,
        steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )  # Tensor [1, vocab]

    # Bad logits: base model on contrast image (NO steering)
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio)
    logits_con = _logits_last_token(image=cimg, prompt=prompt)  # Tensor [1, vocab]

    # VCD combine
    logits = logits_main - gamma * logits_con

    # Decode Yes/No
    probs = torch.softmax(logits, dim=-1)[0]
    p_yes = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no  = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    return "yes" if float(p_yes) >= float(p_no) else "no"


# -------------------------
# Evaluator wrapper
# -------------------------
def infer_fn_asd_vcd(p, q):
    return infer_yesno_asd_plus_vcd(
        img_path=p, question=q,
        steer_layers=STEER_LAYERS, steer_v=STEER_V,
        lam_pos=LAM_POS, lam_neg=LAM_NEG, alpha=ALPHA,
        gamma=GAMMA, contrast_mode=CONTRAST_MODE, mask_ratio=MASK_RATIO
    )


# -------------------------
# Full run on all 3 splits
# -------------------------
all_rows = []
print("Preparing POPE rows...")
for split in ("random", "popular", "adversarial"):
    all_rows.extend(pope[split])

print(f"Starting full POPE eval with ASD+VCD on {len(all_rows)} items...")
t0 = time.time()

# Evaluate per-split for clarity
results = {}
for split in ("random", "popular", "adversarial"):
    print(f"\nRunning split: {split}")
    s = eval_rows(pope[split], COCO_IMG_ROOT, infer_fn_asd_vcd)
    results[split] = {k: round(v * 100, 2) for k, v in s.items()}
    print(f"{split} => {results[split]}")

# Overall
overall = eval_rows(all_rows, COCO_IMG_ROOT, infer_fn_asd_vcd)
overall_pct = {k: round(v * 100, 2) for k, v in overall.items()}

t1 = time.time()
print("\n=== ASD + VCD (Fixed) | FULL 3000 RESULTS ===")
for split in ("random", "popular", "adversarial"):
    print(f"{split.capitalize():>12}: {results[split]}")
print(f"{'Overall':>12}: {overall_pct}")
print(f"Total time: {t1 - t0:.1f}s")

Preparing POPE rows...
Starting full POPE eval with ASD+VCD on 9000 items...

Running split: random


                                                                                                                       

random => {'Accuracy': 89.73, 'Precision': 95.02, 'Recall': 83.87, 'F1': 89.09}

Running split: popular


                                                                                                                       

popular => {'Accuracy': 87.07, 'Precision': 89.6, 'Recall': 83.87, 'F1': 86.64}

Running split: adversarial


                                                                                                                       

adversarial => {'Accuracy': 83.17, 'Precision': 82.71, 'Recall': 83.87, 'F1': 83.28}


                                                                                                                       


=== ASD + VCD (Fixed) | FULL 3000 RESULTS ===
      Random: {'Accuracy': 89.73, 'Precision': 95.02, 'Recall': 83.87, 'F1': 89.09}
     Popular: {'Accuracy': 87.07, 'Precision': 89.6, 'Recall': 83.87, 'F1': 86.64}
 Adversarial: {'Accuracy': 83.17, 'Precision': 82.71, 'Recall': 83.87, 'F1': 83.28}
     Overall: {'Accuracy': 86.66, 'Precision': 88.82, 'Recall': 83.87, 'F1': 86.27}
Total time: 4462.1s




In [50]:
import time
from tqdm import tqdm # Make sure tqdm is imported if not already

# ===== 1) Define the CORRECTED inference function =====
# This function contrasts ASD(img) with BaseModel(cimg)
@torch.no_grad()
def infer_yesno_vcd_asd_FIXED(
    img_path, question,
    steer_layers, steer_v,
    lam_pos=0.2, lam_neg=0.35, alpha=1.0,    # ASD params
    gamma=0, contrast_mode="black", mask_ratio=0.9 # VCD params
):
    img = Image.open(img_path).convert("RGB")
    prompt = yn_prompt(question) # Uses yn_prompt from Cell 18
    
    # --- Good Logits (Main image + ASD steering) ---
    logits_main = _asd_logits(
        image=img, prompt=prompt, steer_layers=steer_layers, steer_v=steer_v,
        lam_pos=lam_pos, lam_neg=lam_neg, alpha=alpha
    )
    
    # --- Bad Logits (Contrast image, NO steering) ---
    cimg = _make_contrast_image(img, mode=contrast_mode, mask_ratio=mask_ratio) # Uses _make_contrast_image from Cell 18
    
    # [THIS IS THE FIX]
    # We use the base model's logits, not the ASD-steered logits
    logits_con = _logits_last_token(image=cimg, prompt=prompt) # Uses _logits_last_token from Cell 18
    
    # --- Final VCD Logits ---
    logits = logits_main - gamma * logits_con
    
    # Decode yes/no
    probs  = torch.softmax(logits, dim=-1)[0]
    p_yes  = probs[YES_IDS].sum() if YES_IDS else torch.tensor(0.0, device=probs.device)
    p_no   = probs[NO_IDS].sum()  if NO_IDS  else torch.tensor(0.0, device=probs.device)
    
    return "yes" if float(p_yes) >= float(p_no) else "no"


# ===== 2) Define the inference function for the evaluator =====
def infer_fn_smoke_test_v2(p, q):
    return infer_yesno_vcd_asd_FIXED(
        img_path=p, question=q,
        steer_layers=STEER_LAYER_IDX, steer_v=steer_v_mid, # Assumes these are in memory
        lam_pos=0.2, lam_neg=0.35, alpha=0.5,
        gamma=0.25, contrast_mode="black" # New gamma baseline
    )

# ===== 3) Create a 300-sample test set =====
smoke_test_samples = (
    pope["random"][:100] + 
    pope["popular"][:100] + 
    pope["adversarial"][:100]
)
print(f"Running smoke test (v2, Corrected Logic) on {len(smoke_test_samples)} samples...")

# ===== 4) Run the evaluation =====
t0 = time.time()

# Use the 'eval_rows' function defined earlier (in Cell 10 or Cell 18)
smoke_results = eval_rows(smoke_test_samples, COCO_IMG_ROOT, infer_fn_smoke_test_v2)
t1 = time.time()

print("\n--- Smoke Test Results (v2, Corrected Logic) ---")
pretty_results = {k: round(v * 100, 2) for k, v in smoke_results.items()}
print(f"Overall (300 samples): {pretty_results}")
print(f"\nTotal time: {t1 - t0:.1f}s")

Running smoke test (v2, Corrected Logic) on 300 samples...


                                                                                                                       


--- Smoke Test Results (v2, Corrected Logic) ---
Overall (300 samples): {'Accuracy': 86.67, 'Precision': 87.16, 'Recall': 86.0, 'F1': 86.58}

Total time: 72.9s


