# Attribution Visualization Examples

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

import torch

sys.path.append("../..")
from interpreto.attributions.base import AttributionOutput, ModelTask

## Mono Class

### Basic Test

Here is a simple example of a attribution visualization.

In [3]:
# attributions (1 classe)
sentence = ["A", "B", "C", "one", "two", "three"]

# Simulate attributions for a single class classification task
attributions = torch.linspace(-10, 30, steps=len(sentence))
single_class_classification_output = AttributionOutput(elements=sentence, attributions=attributions, model_task=ModelTask.SINGLE_CLASS_CLASSIFICATION)

In [4]:
single_class_classification_output

AttributionOutput(attributions=tensor([-10.,  -2.,   6.,  14.,  22.,  30.]), elements=['A', 'B', 'C', 'one', 'two', 'three'], model_task='ModelTask.SINGLE_CLASS_CLASSIFICATION', classes=None)

In [5]:
# Default display
# from interpreto.visualizations.attributions.classification_highlight import SingleClassAttributionVisualization
from interpreto.visualizations.attributions.classification_highlight import HightlightAttributionVisualization

viz = HightlightAttributionVisualization(attribution_output=single_class_classification_output)
viz.display()

In [6]:
# Highlight the border
viz = HightlightAttributionVisualization(attribution_output=single_class_classification_output, highlight_border=True)
viz.display()

In [7]:
# Disable the normalization
viz = HightlightAttributionVisualization(attribution_output=single_class_classification_output, normalize=False)
viz.display()

In [8]:
# Add more space between the words
viz = HightlightAttributionVisualization(attribution_output=single_class_classification_output, margin_right="0.85em")
viz.display()

It is also possible to save the results of the visualization as an HTML file

In [9]:
# viz.save("attributions_monoclass.html")

### Occlusion Test on BERT outputs

A complete test on result from BERT

In [10]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from interpreto.attributions.methods.occlusion import Occlusion
from interpreto.attributions.perturbations.base import Granularity
from interpreto.model_wrapping.classification_inference_wrapper import ClassificationInferenceWrapper
from interpreto.visualizations.attributions.classification_highlight import HightlightAttributionVisualization

In [11]:
model_name = "textattack/bert-base-uncased-imdb"
test_sentences = ["Best movie ever", "Worst movie ever verylongword"]
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
inference_wrapper = ClassificationInferenceWrapper(model=model, batch_size=4)
exp = Occlusion(
    model=model,
    tokenizer=tokenizer,
    # inference_wrapper=inference_wrapper,
    batch_size=4,
    granularity=Granularity.WORD,
)
explaination = exp.explain(test_sentences)

for elem in explaination:
    print(elem.attributions, elem.elements)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


tensor([2.3434, 0.3201, 0.4798], device='cuda:0') ['best', 'movie', 'ever']
tensor([4.0143, 1.1912, 0.5362, 0.5000], device='cuda:0') ['worst', 'movie', 'ever', 'verylongword']


In [12]:
for sentence_explained in explaination:
    viz = HightlightAttributionVisualization(attribution_output=sentence_explained)
    viz.display()

In [13]:
# Add more space between the words
for sentence_explained in explaination:
    viz = HightlightAttributionVisualization(attribution_output=sentence_explained, margin_right="0.85em")
    viz.display()

## Multi Class

In [14]:
# attributions (2 classes)
nb_classes = 2
inputs_sentences = ["A", "B", "C", "one", "two", "three"]

# Simulate and attribution output for the 1st sentence
sentence = inputs_sentences
# attributions = torch.rand(nb_classes, len(sentence)) # (c, l)
attributions = torch.tensor([[0.1, 0.2, -0.3, -0.4, 0.5, 1.0], [0.6, 0.5, 0.4, 0.3, 0.2, -1]])
attribution_output = AttributionOutput(elements=sentence, attributions=attributions, model_task=ModelTask.MULTI_CLASS_CLASSIFICATION)

In [15]:
# Default display for the 1st sentence
from interpreto.visualizations.attributions.classification_highlight import HightlightAttributionVisualization

viz = HightlightAttributionVisualization(attribution_output=attribution_output, class_names=["class 1", "class 2"])
viz.display()

In [16]:
# Add more space between the words
viz = HightlightAttributionVisualization(
    attribution_output=attribution_output,
    class_names=["class 1", "class 2"],
    margin_right="0.85em",
)
viz.display()

## Generation

In [17]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [18]:
# Create an explainer for a pre-trained model (e.g., GPT-2)
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

explainer = Occlusion(model=model, batch_size=4, tokenizer=tokenizer, granularity=Granularity.ALL_TOKENS)

In [19]:
# Explain outputs generated from an input sentence
attribution_outputs = explainer.explain(model_inputs="Hi there, how are you?", generation_kwargs={"max_length": 10})

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [20]:
print("attribution tensor shape for the 1st sentence:", attribution_outputs[0].attributions.shape)
# attributions: 20x27
# 20 output tokens , 27 attribution value for each one (input + output)

attribution tensor shape for the 1st sentence: torch.Size([20, 27])


In [21]:
attribution_outputs[0].model_task

<ModelTask.GENERATION: 'generation'>

In [22]:
# Visualize the attribution results
from interpreto.visualizations.attributions.classification_highlight import HightlightAttributionVisualization

viz = HightlightAttributionVisualization(attribution_output=attribution_outputs[0])
viz.display()

In [23]:
# Add more space between the words
viz = HightlightAttributionVisualization(attribution_output=attribution_outputs[0], margin_right="1em")
viz.display()