In [None]:
"""

Integrated Gradients note book for VRDU models

"""

In [1]:
!pip install datasets seqeval evaluate transformers torch captum



In [2]:
from transformers import AutoProcessor, LayoutLMv3ForTokenClassification, set_seed
from PIL import Image,ImageDraw, ImageFont
from datasets import load_dataset
import torch
import pandas as pd
import evaluate
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

set_seed(0)


In [3]:
funsd = load_dataset("nielsr/funsd", trust_remote_code=True)
labels = funsd["train"].features["ner_tags"].feature.names
id2label = {v: k for v, k in enumerate(labels)}
label2id = {v: k for k, v in enumerate(labels)}
print(id2label)
print(label2id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


funsd.py:   0%|          | 0.00/4.54k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

{0: 'O', 1: 'B-HEADER', 2: 'I-HEADER', 3: 'B-QUESTION', 4: 'I-QUESTION', 5: 'B-ANSWER', 6: 'I-ANSWER'}
{'O': 0, 'B-HEADER': 1, 'I-HEADER': 2, 'B-QUESTION': 3, 'I-QUESTION': 4, 'B-ANSWER': 5, 'I-ANSWER': 6}


In [15]:
model = LayoutLMv3ForTokenClassification.from_pretrained("adamadam111/layoutlmv3-finetuned-funsd")
processor = AutoProcessor.from_pretrained("adamadam111/layoutlmv3-finetuned-funsd")

TypeError: 'Embedding' object is not subscriptable

In [6]:
for name, module in model.named_modules():
    if 'embed' in name:
        print(name)

layoutlmv3.embeddings
layoutlmv3.embeddings.word_embeddings
layoutlmv3.embeddings.token_type_embeddings
layoutlmv3.embeddings.LayerNorm
layoutlmv3.embeddings.dropout
layoutlmv3.embeddings.position_embeddings
layoutlmv3.embeddings.x_position_embeddings
layoutlmv3.embeddings.y_position_embeddings
layoutlmv3.embeddings.h_position_embeddings
layoutlmv3.embeddings.w_position_embeddings
layoutlmv3.patch_embed
layoutlmv3.patch_embed.proj


In [13]:
def unnormalize_box(bbox, width, height):
     return [
         width * (bbox[0] / 1000),
         height * (bbox[1] / 1000),
         width * (bbox[2] / 1000),
         height * (bbox[3] / 1000),
     ]

In [16]:
sample = funsd["test"][0]
image = Image.open(sample["image_path"]).convert("RGB")
encode_sample = processor(image, sample["words"], boxes=sample["bboxes"], word_labels=sample['ner_tags'], padding='max_length', return_tensors="pt")
outputs = model(**encode_sample)



In [17]:
logits = outputs.logits
pred_ids = logits.argmax(-1).squeeze()
pred_labels = [id2label[id.item()] for id in pred_ids]
print(len(pred_labels))

512


In [18]:
input_ids = encode_sample.input_ids.squeeze()
tokens = processor.tokenizer.convert_ids_to_tokens(input_ids)
print(len(tokens))

512


In [19]:
highest_logit = [max(token_logits) for token_logits in logits.squeeze()]
print(len(highest_logit))

512


In [21]:
tokens_labeled_question = [(t, idx, highest_logit[idx]) for idx, (t, l) in enumerate(zip(input_ids, pred_labels)) if l == "B-QUESTION"] # (token_id, idx)
print(len(tokens_labeled_question))

17


### Extract embeddings from model

In [23]:
embedding_list = []

def get_input_embeddings_hook(module, input_, output):
    embedding_list.append(output.detach())

hook = model.layoutlmv3.embeddings.register_forward_hook(get_input_embeddings_hook)

# Single forward pass to pupulate embedding list
foo = model(
    input_ids=encode_sample.input_ids,
    attention_mask=encode_sample.attention_mask,
    bbox=encode_sample.bbox,
    pixel_values=encode_sample.pixel_values,
)

hook.remove()



### Run IG over tokens labeled 'B-QUESTION'

In [31]:
from captum.attr import IntegratedGradients

label = "B-QUESTION"
question_label_id = label2id[label]
input_embeddings = embedding_list[0].requires_grad_()

for token_id, idx, scalar_logit in tokens_labeled_question:
  def model_forward_fn(embeds):
    out = model.layoutlmv3.encoder(
        inputs=embeds,
        attention_mask=encode_sample.attention_mask,
    )
    sequence_output = out.last_hidden_state
    logits = model.layoutlmv3.classifier(sequence_output)
    return out.logits.squeeze()[idx][question_label_id]

  # Compute attributions
  ig = IntegratedGradients(model_forward_fn)
  attributions = ig.attribute(
    inputs=input_embeddings,
    n_steps=50,
    target=question_label_id
  )

TypeError: LayoutLMv3Encoder.forward() got an unexpected keyword argument 'inputs'