Notebook to test displays.

Supports any model & tokenizer from huggingface

In [1]:
from transformers import AutoTokenizer, BertForSequenceClassification
from explainers.explanation_methods import SHAP, LIME
import shap
import torch


tokenizer = AutoTokenizer.from_pretrained(
    "textattack/bert-base-uncased-yelp-polarity")
model = BertForSequenceClassification.from_pretrained(
    "textattack/bert-base-uncased-yelp-polarity")
print('model loaded')

input = 'Hello, my dog is so terribly ugly'
tokenized_input = tokenizer(input, return_tensors="pt")
label = torch.tensor([0]).unsqueeze(0)  # Batch size 1

with torch.no_grad():
    output = model(**tokenized_input, labels=label)
    
loss, logits = output[:2]
predicted_class_id = logits.argmax().item()
predicted_class = model.config.id2label[predicted_class_id] 
true_class = model.config.id2label[label[0].item()]   

print(f'input: {input}\n logits: {logits} \n predicted class: {predicted_class} \n true class: {true_class}')


explainer = SHAP(model, tokenizer)
explanation = explainer.explain(input)
print('SHAP explanation: ', explanation)

shap.plots.text(explanation)


model loaded
input: Hello, my dog is so terribly ugly
 logits: tensor([[ 4.5232, -4.2648]]) 
 predicted class: LABEL_0 
 true class: LABEL_0
SHAP explanation:  .values =
array([[ 0.        ,  0.        ],
       [-0.10023548,  0.10023542],
       [ 0.011923  , -0.01192299],
       [-0.01269054,  0.01269055],
       [ 0.0038579 , -0.0038579 ],
       [ 0.02124363, -0.02124363],
       [ 0.04590302, -0.04590301],
       [ 0.02927343, -0.02927346],
       [ 0.02953256, -0.02953256],
       [ 0.        ,  0.        ]])

.base_values =
array([0.97103989, 0.02896013])

.data =
array(['', 'Hello', ', ', 'my ', 'dog ', 'is ', 'so ', 'terribly ',
       'ugly', ''], dtype=object)


In [4]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification


model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)
print('model loaded')

input = "i went and saw this movie last night after being coaxed to by a few friends of mine . i ' ll admit that i was reluctant to see it because from what i knew of ashton kutcher he was only able to do comedy . i was wrong . kutcher played the character of jake fischer very well , and kevin costner played ben randall with such professionalism . the sign of a good movie is that it can toy with our emotions . this one did exactly that . the entire theater ( which was sold out ) was overcome by laughter during the"

explainer = LIME(model, tokenizer)
explanation = explainer.explain(input)
print('explanation obtained')
shap.plots.text(explanation)

model loaded


KeyboardInterrupt: 

In [3]:
# [PAD] token maps to `0`
tokenizer('[PAD]')

{'input_ids': [101, 0, 102], 'token_type_ids': [0, 0, 0], 'attention_mask': [1, 1, 1]}