In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

from transformers import AutoModelForCausalLM, AutoTokenizer

sys.path.append("../..")
from interpreto.attributions.methods import OcclusionExplainer
from interpreto.commons.granularity import GranularityLevel
from interpreto.visualizations.attributions.classification_highlight import GenerationAttributionVisualization

In [3]:
# Create an explainer for a pre-trained model (e.g., GPT-2)
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

explainer = OcclusionExplainer(
    model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.ALL_TOKENS
)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [4]:
# Explain outputs generated from an input sentence
attribution_outputs = explainer.explain(model_inputs="Hi there, how are you?", generation_kwargs={"max_length": 10})

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [5]:
print("attribution tensor shape for the 1st sentence:", attribution_outputs[0].attributions.shape)
# attributions: 20x27
# 20 output tokens , 27 attribution value for each one (input + output)

attribution tensor shape for the 1st sentence: torch.Size([20, 27])


In [6]:
# Visualize the attribution results
viz = GenerationAttributionVisualization(attribution_output=attribution_outputs[0])
viz.display()

In [None]:
# viz.save("test_generation_attribution.html")