In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer

from interpreto.attributions.base import InferenceModes
from interpreto.attributions.methods import (
    IntegratedGradients,
    KernelShap,
    Lime,
    OcclusionExplainer,
    Saliency,
    SmoothGrad,
    SobolAttribution,
)
from interpreto.attributions.methods.lime import DistancesFromMask
from interpreto.attributions.methods.sobol_attribution import SobolIndicesOrders
from interpreto.commons.granularity import GranularityLevel
from interpreto.visualizations.attributions.classification_highlight import (
    GenerationAttributionVisualization,
    MultiClassAttributionVisualization,
    SingleClassAttributionVisualization,
)

  from .autonotebook import tqdm as notebook_tqdm


Inference based methods:
- Occlusion
- LIME
- KernelSHAP
- Sobol


Gradients based methods:
- Saliency
- Integrated Gradients
- SmoothGrad



# Classification task

In [13]:
model_name = "textattack/bert-base-uncased-imdb"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
explainer = OcclusionExplainer(
    model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.TOKEN
)

attribution_outputs = explainer.explain(
    model_inputs=["This is the best movie I have ever seen. The cinematography was uncharacteristically breathtaking."]
)

viz = SingleClassAttributionVisualization(attribution_output=attribution_outputs[0], margin_right="0.35em")
viz.display()

In [4]:
explainer = Lime(
    model=model,
    batch_size=4,
    tokenizer=tokenizer,
    n_perturbations=20,
    granularity_level=GranularityLevel.WORD,
    distance_function=DistancesFromMask.HAMMING,
    inference_mode=InferenceModes.SOFTMAX,
)

list_attribution_outputs = explainer.explain(
    model_inputs=[
        "This is the best movie I have ever seen.",
        "I hate this movie.",
        "This movie is super good. I love it.",
    ]
)

for attribution_outputs in list_attribution_outputs:
    viz = SingleClassAttributionVisualization(attribution_output=attribution_outputs, margin_right="0.35em")
    viz.display()

In [5]:
explainer = KernelShap(
    model=model,
    batch_size=4,
    tokenizer=tokenizer,
    n_perturbations=20,
    granularity_level=GranularityLevel.WORD,
    inference_mode=InferenceModes.SOFTMAX,
)

attribution_outputs = explainer.explain(
    model_inputs="This is the best movie I have ever seen.",
    targets=torch.tensor([[0, 1]]),  # (n, t), n=1, t=2
)

viz = MultiClassAttributionVisualization(
    attribution_output=attribution_outputs[0],
    class_names=["negative review", "positive review"],
    margin_right="0.35em",
)
viz.display()

In [6]:
explainer = SmoothGrad(model=model, batch_size=4, tokenizer=tokenizer, n_interpolations=50, noise_level=0.01)


list_attribution_outputs = explainer.explain(
    model_inputs=[
        "This is the best movie I have ever seen.",
        "I hate this movie.",
        "This movie is super good. I love it.",
    ],
    targets=torch.tensor([1, 0, 1]),
)

for attribution_outputs in list_attribution_outputs:
    viz = SingleClassAttributionVisualization(
        attribution_output=attribution_outputs,
        margin_right="0.35em",
    )
    viz.display()

# Generation task

In [7]:
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

In [8]:
explainer = SobolAttribution(
    model=model,
    batch_size=4,
    tokenizer=tokenizer,
    n_token_perturbations=10,
    granularity_level=GranularityLevel.WORD,
    sobol_indices_order=SobolIndicesOrders.TOTAL_ORDER,
)

attribution_outputs = explainer.explain(model_inputs="Hi there, how are you?", generation_kwargs={"max_length": 10})

viz = GenerationAttributionVisualization(
    attribution_output=attribution_outputs[0],
    highlight_border=False,
    normalize=True,
    margin_right="0.35em",
)
viz.display()

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  sample = self._random(n, workers=workers)


In [9]:
explainer = IntegratedGradients(model=model, tokenizer=tokenizer, batch_size=4, n_interpolations=10)

tokenized_inputs = [
    tokenizer("Hi there, how are you?", return_tensors="pt"),
    tokenizer("What time is it?", return_tensors="pt"),
]

list_attribution_outputs = explainer.explain(model_inputs=tokenized_inputs, generation_kwargs={"max_length": 10})

for attribution_outputs in list_attribution_outputs:
    viz = GenerationAttributionVisualization(
        attribution_output=attribution_outputs,
        highlight_border=False,
        normalize=True,
        margin_right="0.35em",
    )
    viz.display()

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [10]:
explainer = Saliency(model=model, batch_size=4, tokenizer=tokenizer)


list_attribution_outputs = explainer.explain(
    model_inputs=["Hi there, how are you?", "What time is it?"],
    targets=["I am fine.", "It is 9 pm."],
    generation_kwargs={"max_length": 10},
)

for attribution_outputs in list_attribution_outputs:
    viz = GenerationAttributionVisualization(
        attribution_output=attribution_outputs,
        highlight_border=False,
        normalize=True,
        margin_right="0.35em",
    )
    viz.display()