In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer

from interpreto.attributions.methods import IntegratedGradients, OcclusionExplainer, Saliency, SmoothGrad
from interpreto.commons.granularity import GranularityLevel
from interpreto.visualizations.attributions.classification_highlight import (
    GenerationAttributionVisualization,
    MultiClassAttributionVisualization,
    SingleClassAttributionVisualization,
)

  from .autonotebook import tqdm as notebook_tqdm


Inference based methods:
- Occlusion
- LIME
- KernelSHAP
- Sobol


Gradients based methods:
- Saliency
- Integrated Gradients
- SmoothGrad



# Classification task

In [2]:
model_name = "textattack/bert-base-uncased-imdb"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

## Occlusion 

In [None]:
explainer = OcclusionExplainer(model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.WORD)

attribution_outputs = explainer.explain(
    model_inputs=["This is the best movie I have ever seen. The cinematography was uncharacteristically breathtaking."]
)

viz = SingleClassAttributionVisualization(attribution_output=attribution_outputs[0], margin_right="0.35em")
viz.display()

In [None]:
explainer = OcclusionExplainer(model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.WORD)

list_attribution_outputs = explainer.explain(
    model_inputs=[
        "This is the best movie I have ever seen.",
        "I hate this movie.",
        "This movie is super good. I love it.",
    ]
)

for attribution_outputs in list_attribution_outputs:
    viz = SingleClassAttributionVisualization(attribution_output=attribution_outputs, margin_right="0.35em")
    viz.display()

In [None]:
explainer = OcclusionExplainer(model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.WORD)

list_attribution_outputs = explainer.explain(
    model_inputs=[
        "This is the best movie I have ever seen.",
        "I hate this movie.",
        "This movie is super good. I love it.",
    ]
)

for attribution_outputs in list_attribution_outputs:
    viz = SingleClassAttributionVisualization(
        attribution_output=attribution_outputs,
        margin_right="0.35em",
    )
    viz.display()

In [21]:
explainer = OcclusionExplainer(
    model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.TOKEN
)

attribution_outputs = explainer.explain(
    model_inputs="This is the best movie I have ever seen.",
    targets=torch.tensor([[0, 1]]),  # (n, t), n=1, t=2
)

viz = MultiClassAttributionVisualization(
    attribution_output=attribution_outputs[0],
    class_names=["negative review", "positive review"],
    margin_right="0.35em",
)
viz.display()

## Saliency

In [23]:
explainer = Saliency(model=model, batch_size=4, tokenizer=tokenizer)

attribution_outputs = explainer.explain(
    model_inputs=["This is the best movie I have ever seen."],
)

viz = SingleClassAttributionVisualization(
    attribution_output=attribution_outputs[0],
    margin_right="0.35em",
)
viz.display()

## Integrated Gradient

In [25]:
explainer = IntegratedGradients(model=model, tokenizer=tokenizer, batch_size=4, n_interpolations=10)

attribution_outputs = explainer.explain(model_inputs="This is the best movie I have ever seen.")

viz = SingleClassAttributionVisualization(
    attribution_output=attribution_outputs[0],
    margin_right="0.35em",
)
viz.display()

## SmoothGrad

In [28]:
explainer = SmoothGrad(model=model, batch_size=4, tokenizer=tokenizer, n_interpolations=50, noise_level=0.01)

attribution_outputs = explainer.explain(
    model_inputs=["I love this movie"],
)

viz = SingleClassAttributionVisualization(
    attribution_output=attribution_outputs[0],
    margin_right="0.35em",
)
viz.display()

# Generation task

In [29]:
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

## Occlusion

In [30]:
explainer = OcclusionExplainer(model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.WORD)

attribution_outputs = explainer.explain(model_inputs="Hi there, how are you?", generation_kwargs={"max_length": 10})

viz = GenerationAttributionVisualization(
    attribution_output=attribution_outputs[0],
    highlight_border=False,
    normalize=True,
    margin_right="0.35em",
)
viz.display()

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [None]:
explainer = OcclusionExplainer(
    model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.ALL_TOKENS
)

attribution_outputs = explainer.explain(
    model_inputs="Hi there, how are you?",
    targets="I am fine, thank you",
    generation_kwargs={"max_length": 10},
)

viz = GenerationAttributionVisualization(
    attribution_output=attribution_outputs[0], highlight_border=False, normalize=True, margin_right="0.35em"
)
viz.display()



## Integrated Gradients

In [33]:
explainer = IntegratedGradients(model=model, tokenizer=tokenizer, batch_size=4, n_interpolations=10)

list_attribution_outputs = explainer.explain(
    model_inputs=["Hi there, how are you?", "What time is it?"], generation_kwargs={"max_length": 10}
)

for attribution_outputs in list_attribution_outputs:
    viz = GenerationAttributionVisualization(
        attribution_output=attribution_outputs,
        highlight_border=False,
        normalize=True,
        margin_right="0.35em",
    )
    viz.display()

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [None]:
explainer = IntegratedGradients(model=model, tokenizer=tokenizer, batch_size=4, n_interpolations=10)

list_attribution_outputs = explainer.explain(
    model_inputs=["Hi there, how are you?", "What time is it?"],
    targets=["fine.", "It is 9 pm."],
    generation_kwargs={"max_length": 10},
)

for attribution_outputs in list_attribution_outputs:
    viz = GenerationAttributionVisualization(
        attribution_output=attribution_outputs,
        highlight_border=False,
        normalize=True,
        margin_right="0.35em",
    )
    viz.display()