In [3]:
import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
from captum.attr import IntegratedGradients, visualization
import numpy as np

In [19]:
tokenizer_distil = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model_distil = DistilBertForSequenceClassification.from_pretrained("./results/distilbert/checkpoint-170")
model_distil.eval()

device = torch.device("cpu")
model_distil.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [20]:
from captum.attr import IntegratedGradients, visualization
import numpy as np

In [21]:
def forward_func(input_ids, attention_mask):
    outputs = model_distil(input_ids=input_ids, attention_mask=attention_mask)
    return torch.softmax(outputs.logits, dim=1)[:, 1]  # class 1 score

In [22]:
text = 'Max Schneider, born in 2000, is a software engineer from Brooklyn, NYC. He works at Google and earns $123K per year.'

# Tokenize
inputs = tokenizer_distil(text, return_tensors="pt", truncation=True, max_length=128)
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)

In [23]:
ig = IntegratedGradients(forward_func)

# Compute attributions
attributions, delta = ig.attribute(
    inputs=input_ids,
    additional_forward_args=(attention_mask,),
    return_convergence_delta=True,
)

# Remove batch dim
attributions = attributions.squeeze(0).sum(dim=1)  # sum over embedding dim
tokens = tokenizer_distil.convert_ids_to_tokens(input_ids.squeeze(0))

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [ ]:
for token, score in zip(tokens, attributions.detach().cpu().numpy()):
    print(f"{token:15} --> {score:.4f}")