Take LLaVA-1.5-7B as an example.

### Setup

In [None]:
import argparse
import json
import os
from PIL import Image
import pickle

import numpy as np
from scipy import stats
import tqdm

import torch
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_theme(style='whitegrid')

import warnings 
warnings.filterwarnings('ignore')

from model_manager import ModelManager
from utils import setup_seeds, disable_torch_init

parser = argparse.ArgumentParser(description="Case studies on LVLMs.")
parser.add_argument("--model", type=str, default='llava-1.5', help="model")
parser.add_argument(
    "--data-path",
    type=str,
    default="/path/to/COCO/val2014",
    help="data path",
)
parser.add_argument("--batch-size", type=int, default=1)
parser.add_argument("--beam", type=int, default=1) # 1 for Greedy Decoding
# parser.add_argument("--sample", action="store_true")
parser.add_argument("--max-tokens", type=int, default=512)
args = parser.parse_known_args()[0]

setup_seeds()
disable_torch_init()

# Load model
model_manager = ModelManager(args.model)


### VAR Distribution and Logit Contribution

In [None]:
# initialize CHAIR evaluator
from chair_utils import CHAIR
cache = 'chair.pkl'
if cache and os.path.exists(cache):
    evaluator = pickle.load(open(cache, 'rb'))
    print(f"loaded evaluator from cache: {cache}")
else:
    print(f"cache not setted or not exist yet, building from scratch...")
    evaluator = CHAIR('/path/to/COCO/annotations')
    pickle.dump(evaluator, open(cache, 'wb'))
    print(f"cached evaluator to: {cache}")

In [None]:
from chair_utils import chair_eval
from utils import set_act_get_hooks, remove_hooks
from utils import get_only_attn_out_contribution
from utils import attnw_over_vision_layer_head_selected_text

img_query_lists = [
    json.loads(line) for line in open('./examples/toy_img_query_list.jsonl')
]
real_attn_contribution_across_layers = []
visual_attn_weights = []
real_SVAR_5_18 = []
hallu_SVAR_5_18 = []

for img_query in tqdm(img_query_lists):
    # prepare inputs
    img_id = f"COCO_val2014_{str(img_query['image_id']).zfill(12)}.jpg"
    img_path = os.path.join(args.data_path, img_id)
    img = Image.open(img_path).convert('RGB')
    img = model_manager.image_processor(img, return_tensors="pt")
    img['pixel_values'] = img['pixel_values'].unsqueeze(0)

    query = [img_query['instruction']]
    questions, input_ids, kwargs = model_manager.prepare_inputs_for_model(query, img)

    # use hooks to get the attention sublayers' output
    hooks = set_act_get_hooks(model_manager.llm_model.model, attn_out=True)
    with torch.inference_mode():
        outputs = model_manager.llm_model.generate(
            input_ids,
            do_sample=False,
            num_beams=args.beam,
            max_new_tokens=args.max_tokens,
            use_cache=True,
            output_scores=True,
            output_hidden_states=True,
            output_attentions=True,
            return_dict_in_generate=True,
            **kwargs,
        )
    remove_hooks(hooks)

    answer = model_manager.tokenizer.batch_decode(outputs['sequences'], skip_special_tokens=True)[0].strip()
    img_info = chair_eval(evaluator, img_id, answer)

    # calc some constants
    vision_token_start = model_manager.img_start_idx
    vision_token_end = model_manager.img_end_idx
    input_token_len = (model_manager.llm_model.get_vision_tower().num_patches
                    + len(input_ids[0]) - 1 # -1 for the <image> token
    )
    gt_words = img_info['mscoco_gt_words']
    generated_words = img_info['mscoco_generated_words']

    # Real words Calculation
    for ri, real_word in enumerate(set(generated_words) & set(gt_words)):
        # calculate attn sublayer contribution for each real word
        try:
            # get attn sublayer contribution
            _records = get_only_attn_out_contribution(
                model_manager.llm_model, model_manager.tokenizer,
                outputs, real_word, input_token_len,
            )
            real_attn_contribution_across_layers.append(_records)

            # get visual attention weights
            real_word_attnw_matrix, _ = attnw_over_vision_layer_head_selected_text(
                    real_word, outputs, model_manager.tokenizer,
                    vision_token_start, vision_token_end
            )
            visual_attn_weights.append(real_word_attnw_matrix)
            real_word_layer_attnw = real_word_attnw_matrix.mean(axis=1)[::-1]
            real_SVAR_5_18.append(real_word_layer_attnw[5:19].sum())
        except:
            print(f"'{real_word}' not found in the generated text.")

    if len(img_info['mscoco_hallucinated_words']) == 0:
        continue

    # Hallucinated words Calculation
    hallucination_words = [
        item for sublist in img_info['mscoco_hallucinated_words'] for item in sublist
    ]
    for hi, hallu_word in enumerate(set(hallucination_words)):
        # calculate attn sublayer contribution for each hallu word
        try:
            # get visual attention weights
            hallu_word_attnw_matrix, _ = attnw_over_vision_layer_head_selected_text(
                    hallu_word, outputs, model_manager.tokenizer,
                    vision_token_start, vision_token_end
            )
            visual_attn_weights.append(hallu_word_attnw_matrix)
            hallu_word_layer_attnw = hallu_word_attnw_matrix.mean(axis=1)[::-1]
            hallu_SVAR_5_18.append(hallu_word_layer_attnw[5:19].sum())
        except:
            print(f"'{hallu_word}' not found in the generated text.")

#### attention map plot

In [None]:
# from utils import plot_VAR_heatmap
avg_visual_attn_weights = np.array(visual_attn_weights).mean(axis=0)
# sort heads
sorted_idx = np.argsort(-avg_visual_attn_weights, axis=-1)
avg_data = np.take_along_axis(avg_visual_attn_weights, sorted_idx, axis=-1)

# plot heatmap
fig, axes = plt.subplots(1, 1, figsize=(5, 5))
im = axes.imshow(
    avg_data, vmin=avg_data.min(),
    vmax=avg_data.max(), cmap='Blues'
)
n_layer, n_head = avg_data.shape
y_label_list = [str(i) for i in range(n_layer)]
axes.set_yticks(np.arange(0, n_layer, 2))
axes.set_yticklabels(y_label_list[::-1][::2])
axes.set_xlabel("Sorted Heads")
axes.set_ylabel("Layers")
fig.colorbar(im, ax=axes, shrink=0.4, location='bottom')
plt.xticks([])
# plt.savefig("VAR Distribution", dpi=400)
plt.show()

#### attention sublayers contribution plot

In [None]:
tensor_real_attn_contri = torch.tensor(real_attn_contribution_across_layers)
prob_contribution = tensor_real_attn_contri.mean(dim=0).numpy()
_, ax = plt.subplots(figsize=(5, 4))
ax.plot(np.arange(-1, len(prob_contribution) + 1),
        [0] * len(prob_contribution) + [0, 0],
        linestyle="--", color="#AAABA8",
        linewidth=2,
    )
ax.plot(
    prob_contribution,
    marker="o", color='#2A7AB9',
    label="Real Words",
    markersize=4,
    )
std = tensor_real_attn_contri.std(dim=0).numpy()
ax.fill_between(
    range(len(prob_contribution)),
    prob_contribution - std,
    prob_contribution + std,
    color="#66AAD2",
    alpha=0.25
)
ax.set_title(
    f"Mean Prob. Contribution of Attn Module with {len(real_attn_contribution_across_layers)} Real Objects",
    fontsize=10
)
ax.set_xlabel("Layer", fontsize=12)
ax.set_ylabel("Logit", fontsize=12)
ax.grid(alpha=0.9, linestyle='--', color='#AAAAAA')
ax.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(-1, len(prob_contribution))
plt.subplots_adjust(left=0.2, right=0.85, top=0.85, bottom=0.15)
# plt.savefig("attn_contribution.pdf", dpi=300)
plt.show()

#### $\text{SVAR}_{5\text{-}18}$ comparison of real and hallucinated object tokens

In [None]:
plt.figure(figsize=(4, 3), dpi=300)
sns.boxplot(
    data=[real_SVAR_5_18, hallu_SVAR_5_18],
    medianprops = {'linestyle':'--','color':'white', 'linewidth':1.5},
    meanprops = {'marker':'o',
                'markerfacecolor':'#C00000',
                'markeredgecolor':'black',
                'markersize':6},
    fliersize=4,
    linewidth=2,
    saturation=0.8,
    showfliers=True,
    showmeans=True
    )
plt.title(f"Sumed Attn in layers 5~18 with total {len(real_SVAR_5_18)} pairs", fontsize=10)
plt.xticks([0, 1], ['Real', 'Hallucinated'])
plt.ylabel("Sumed attention weights")
plt.xlabel("Object token type")
plt.show()

#### Qualitative results of $\text{SVAR}_{5\text{-}18}$ for detecting hallucinated object tokens

In [None]:
from sklearn.metrics import roc_curve, auc

# ROC curve
y_scores = np.concatenate([real_SVAR_5_18, hallu_SVAR_5_18])
y_true = np.concatenate([np.ones(len(real_SVAR_5_18)), 
                        np.zeros(len(hallu_SVAR_5_18))])
fpr, tpr, _ = roc_curve(y_true, y_scores)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(4, 3), dpi=300)
sns.lineplot(
    x=fpr, y=tpr, color='#5C73A2', lw=2, label=f'Our (AUROC = {roc_auc:.2f})'
)
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.show()

### Retrieve the text token via logit lens

In [None]:
from transformers.generation.logits_process import LogitsProcessorList, TopKLogitsWarper
from utils import logitLens_of_vision_tokens_with_discrete_range

img_path = os.path.join(args.data_path, f'COCO_val2014_000000499775.jpg')
img = Image.open(img_path).convert('RGB')

query = ["Please help me describe the image in detail."]
questions, input_ids, kwargs = model_manager.prepare_inputs_for_model(query, img)

with torch.inference_mode():
    outputs = model_manager.llm_model.generate(
        input_ids,
        do_sample=False,
        num_beams=args.beam,
        max_new_tokens=args.max_tokens,
        use_cache=True,
        output_scores=True,
        output_hidden_states=True,
        output_attentions=True,
        return_dict_in_generate=True,
        **kwargs,
    )

discrete_range = [
    [391, 396], # 1
    [415, 420], # 1
    [328, 330], # 2
    [352, 354], # 2
    [376, 378], # 2
    [379, 382], # 3
    [403, 406], # 3
]
range_id = [1, 1, 2, 2, 2, 3, 3] # range_id代表第几个区域
layer_range = [0, 5, 7, 10, 12, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 30, 31]
logits_warper = TopKLogitsWarper(top_k=50, filter_value=float('-inf'))
logits_processor = LogitsProcessorList([])

logitLens_of_vision_tokens_with_discrete_range(
    model_manager.llm_model, model_manager.tokenizer, input_ids, outputs,
    model_manager.img_start_idx, discrete_range,
    layer_range,
    logits_warper, logits_processor,
    savefig=False
)