In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from interpreto.attributions.methods import (
    IntegratedGradients,
    KernelShap,
    Lime,
    OcclusionExplainer,
    Saliency,
    SmoothGrad,
    SobolAttribution,
)
from interpreto.commons.granularity import GranularityLevel
from interpreto.visualizations.attributions.classification_highlight import (
    MultiClassAttributionVisualization,
    SingleClassAttributionVisualization,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd

dataset_path = "/data/fanny.jourdan/interpreto_test/labeled_noise_text_dataset.txt"
model_name = "/data/fanny.jourdan/interpreto_test/distilbert_trivial_classifier"
tokenizer_name = "hf-internal-testing/tiny-random-distilbert"

model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
df = pd.read_csv(dataset_path)

In [3]:
list_explainers = [
    OcclusionExplainer(model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.WORD),
    IntegratedGradients(model=model, batch_size=4, tokenizer=tokenizer, n_interpolations=10),
    SmoothGrad(model=model, batch_size=4, tokenizer=tokenizer, n_interpolations=50, noise_level=0.01),
    Saliency(model=model, batch_size=4, tokenizer=tokenizer),
    Lime(model=model, batch_size=4, tokenizer=tokenizer, n_perturbations=100, granularity_level=GranularityLevel.WORD),
    SobolAttribution(
        model=model,
        batch_size=4,
        tokenizer=tokenizer,
        n_token_perturbations=100,
        granularity_level=GranularityLevel.WORD,
    ),
    KernelShap(
        model=model, batch_size=4, tokenizer=tokenizer, n_perturbations=100, granularity_level=GranularityLevel.WORD
    ),
]

k = 10

for explainer in list_explainers:
    print(f"Explaining with {explainer.__class__.__name__}")
    attribution_outputs = explainer.explain(model_inputs=[df["text"][k]])

    viz = SingleClassAttributionVisualization(
        attribution_output=attribution_outputs[0],
        css=".common-word-style { margin-right: 0.3em }",
    )
    viz.display()

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Explaining with ClassificationOcclusionExplainer


Explaining with ClassificationIntegratedGradients


Explaining with ClassificationSmoothGrad


Explaining with ClassificationSaliency


Explaining with ClassificationLime




Explaining with ClassificationSobolAttribution


  sample = self._random(n, workers=workers)


Explaining with ClassificationKernelShap


In [4]:
k = 10

for explainer in list_explainers:
    print(f"Explaining with {explainer.__class__.__name__}")
    attribution_outputs = explainer.explain(model_inputs=[df["text"][k]], targets=torch.tensor([[0, 1]]))

    viz = MultiClassAttributionVisualization(
        attribution_output=attribution_outputs[0],
        class_names=["A", "B"],
        css=".common-word-style { margin-right: 0.3em }",
    )
    viz.display()

Explaining with ClassificationOcclusionExplainer


Explaining with ClassificationIntegratedGradients


Explaining with ClassificationSmoothGrad


Explaining with ClassificationSaliency


Explaining with ClassificationLime


Explaining with ClassificationSobolAttribution


Explaining with ClassificationKernelShap
