# Attribution Visualization Examples

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
import sys

import torch

sys.path.append("../..")
from interpreto.attributions.base import AttributionOutput, ModelTask

## Mono Class

### Basic Test

Here is a simple example of a attribution visualization.

In [8]:
# attributions (1 classe)
sentence = ["A", "B", "C", "one", "two", "three"]

# Simulate attributions for a single class classification task
attributions = torch.linspace(-10, 30, steps=len(sentence))
single_class_classification_output = AttributionOutput(elements=sentence, attributions=attributions, model_task=ModelTask.SINGLE_CLASS_CLASSIFICATION, classes=[1])

In [9]:
# Default display
from interpreto import AttributionVisualization

class_names={0: "class A", 1: "class B"}
viz = AttributionVisualization(attribution_output=single_class_classification_output, class_names=class_names)
viz.display()

In [10]:
# Highlight the border
viz = AttributionVisualization(attribution_output=single_class_classification_output, highlight_border=True, class_names=class_names)
viz.display()

In [11]:
# Disable the normalization
viz = AttributionVisualization(attribution_output=single_class_classification_output, normalize=False, class_names=class_names)
viz.display()

In [12]:
# Add more space between the words
viz = AttributionVisualization(attribution_output=single_class_classification_output, margin_right="0.85em")
viz.display()

It is also possible to save the results of the visualization as an HTML file

In [13]:
# viz.save("attributions_monoclass.html")

### Occlusion Test on BERT outputs

A complete test on result from BERT

In [14]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from interpreto.attributions.methods.occlusion import Occlusion
from interpreto.attributions.perturbations.base import Granularity
from interpreto.model_wrapping.classification_inference_wrapper import ClassificationInferenceWrapper

In [15]:
model_name = "textattack/bert-base-uncased-imdb"
test_sentences = ["Best movie ever", "Worst movie ever verylongword"]
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
inference_wrapper = ClassificationInferenceWrapper(model=model, batch_size=4)
exp = Occlusion(
    model=model,
    tokenizer=tokenizer,
    # inference_wrapper=inference_wrapper,
    batch_size=4,
    granularity=Granularity.WORD,
)
explaination = exp.explain(test_sentences)

for elem in explaination:
    print(elem.attributions, elem.elements)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


tensor([2.3434, 0.3201, 0.4798], device='cuda:0') ['best', 'movie', 'ever']
tensor([4.0143, 1.1912, 0.5362, 0.5000], device='cuda:0') ['worst', 'movie', 'ever', 'verylongword']


In [16]:
for sentence_explained in explaination:
    viz = AttributionVisualization(attribution_output=sentence_explained, class_names=class_names)
    viz.display()

In [17]:
# Add more space between the words
for sentence_explained in explaination:
    viz = AttributionVisualization(attribution_output=sentence_explained, margin_right="0.85em")
    viz.display()

## Multi Class

In [18]:
# attributions (2 classes)
nb_classes = 2
inputs_sentences = ["A", "B", "C", "one", "two", "three"]

# Simulate and attribution output for the 1st sentence
sentence = inputs_sentences
# attributions = torch.rand(nb_classes, len(sentence)) # (c, l)
attributions = torch.tensor([[0.1, 0.2, -0.3, -0.4, 0.5, 1.0], [0.6, 0.5, 0.4, 0.3, 0.2, -1]])
attribution_output = AttributionOutput(elements=sentence, attributions=attributions, model_task=ModelTask.MULTI_CLASS_CLASSIFICATION, classes=[0, 1])

In [22]:
# Default display for the 1st sentence
viz = AttributionVisualization(attribution_output=attribution_output, class_names={0: "class A", 1: "class B"})
viz.display()

In [23]:
# Add more space between the words
viz = AttributionVisualization(
    attribution_output=attribution_output,
    class_names={0: "class 1", 1: "class 2"},
    margin_right="0.85em",
)
viz.display()

## Generation

In [24]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [25]:
# Create an explainer for a pre-trained model (e.g., GPT-2)
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

explainer = Occlusion(model=model, batch_size=4, tokenizer=tokenizer, granularity=Granularity.ALL_TOKENS)

In [26]:
# Explain outputs generated from an input sentence
attribution_outputs = explainer.explain(model_inputs="Hi there, how are you?", generation_kwargs={"max_length": 10})

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [27]:
print("attribution tensor shape for the 1st sentence:", attribution_outputs[0].attributions.shape)
# attributions: 20x27
# 20 output tokens , 27 attribution value for each one (input + output)

attribution tensor shape for the 1st sentence: torch.Size([20, 27])


In [28]:
# Visualize the attribution results
from interpreto.visualizations.attributions.attribution_highlight import AttributionVisualization

viz = AttributionVisualization(attribution_output=attribution_outputs[0])
viz.display()

In [29]:
# Add more space between the words
viz = AttributionVisualization(attribution_output=attribution_outputs[0], margin_right="1em")
viz.display()