<a href="https://colab.research.google.com/github/TurkuNLP/textual-data-analysis-course/blob/main/model_explainability_qa_solved.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model load

* Now we load the model and repeat some of the imports so it is possible to run the notebook from this point onwards

In [None]:
!pip3 install transformers 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.0 tokenizers-0.13.2 transformers-4.26.1


In [None]:
!wget http://dl.turkunlp.org/TKO_8964_2023/english-binarized-weighted.model.tgz
!tar zxvf english-binarized-weighted.model.tgz

--2023-02-12 20:37:42--  http://dl.turkunlp.org/TKO_8964_2023/english-binarized-weighted.model.tgz
Resolving dl.turkunlp.org (dl.turkunlp.org)... 195.148.30.23
Connecting to dl.turkunlp.org (dl.turkunlp.org)|195.148.30.23|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 402134026 (384M) [application/octet-stream]
Saving to: ‘english-binarized-weighted.model.tgz’


2023-02-12 20:38:03 (18.5 MB/s) - ‘english-binarized-weighted.model.tgz’ saved [402134026/402134026]

english-binarized-weighted.model/
english-binarized-weighted.model/training_args.bin
english-binarized-weighted.model/pytorch_model.bin
english-binarized-weighted.model/tokenizer.json
english-binarized-weighted.model/vocab.txt
english-binarized-weighted.model/config.json
english-binarized-weighted.model/special_tokens_map.json
english-binarized-weighted.model/tokenizer_config.json


In [None]:
# relevant stuff repeated from above so you can run it from here onwards if you happen to have the model trained

from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
from transformers import Trainer

MODEL_NAME = 'english-binarized-weighted.model'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [None]:
import torch
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.to('cpu')    # simplifies input placement

label_names = [
    'no_answer',
    'does_answer'
]

def predict_qa(question,context):
    tokenized = tokenizer(text=question, text_pair=context, return_tensors='pt')
    pred = model(**tokenized)
    pred_idx = pred.logits.detach().numpy().argmax()
    return label_names[pred_idx]

try that out

In [None]:
example_pairs = [
    {"question":"When was University of Turku founded?",
     "context": """The University of Turku (Finnish: Turun yliopisto,
      in Swedish: Åbo universitet, shortened UTU), located in Turku in
      southwestern Finland, is the third largest university in the country
      as measured by student enrollment, after the University of Helsinki
      and Tampere University. It is a multidisciplinary university with 
      eight faculties. It was established in 1920 and also has facilities 
      at Rauma, Pori, Kevo and Seili. The university is a member of 
      the Coimbra Group and the European Campus of City-Universities (EC2U)."""
     }
]

for e in example_pairs:
    print(e["question"], '->', predict_qa(**e))

When was University of Turku founded? -> does_answer


# Model explainability with the captum library

* captum.ai is a nifty library with many of the explainability algorithms implemented
* we will use it here to try the techniques
* this is not the easiest of code, btw, many of the libraries are bleeding-edge, so you cut yourself here and there (the blood on the edge is yours)
* the code below is (quite substantial) adaptation of https://captum.ai/tutorials/Bert_SQUAD_Interpret

In [None]:
!pip install captum pandas matplotlib seaborn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting captum
  Downloading captum-0.6.0-py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: captum
Successfully installed captum-0.6.0


In [None]:
from captum.attr import visualization as viz
from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

In [None]:
#Tells the model that it is in evaluation mode, and zeroes out the gradients
model.eval()
model.zero_grad()

# Helper functions

In [None]:
# Forward on the model -> data in, prediction out, nothing fancy really
def predict(input_ids,token_type_ids,attention_mask):
    pred=model(input_ids,token_type_ids=token_type_ids,attention_mask=attention_mask)
    return pred.logits #return the output of the classification layer



In [None]:
def construct_input_ref_pair(question,context):
    inp=tokenizer(text=question,text_pair=context,return_tensors="pt")
    #how long is the context?
    context_tok=tokenizer(context,add_special_tokens=False)["input_ids"]
    ref=tokenizer(text=question,text_pair=" ".join(["[PAD]"]*len(context_tok)),return_tensors="pt")
    return (inp["input_ids"], inp["token_type_ids"], inp["attention_mask"]),\
    (ref["input_ids"], ref["token_type_ids"], ref["attention_mask"])



In [None]:
inp,ref=construct_input_ref_pair(**example_pairs[0])
p=predict(*inp)
print("p=",p)
print("p.shape",p.shape)

p= tensor([[-1.4038,  1.4566]], grad_fn=<AddmmBackward0>)
p.shape torch.Size([1, 2])


In [None]:
# Yay, now we finally made it to the attribution part
lig = LayerIntegratedGradients(predict, model.bert.embeddings) #attribute the output wrt to embeddings

In [None]:
# inputs: inputs
# baselines: the blank baseline
# target: which of the two classes in the output (pos/neg) to run the prediction against?



attrs, delta = lig.attribute(inputs=inp,
                                  baselines=ref,
                                  return_convergence_delta=True,target=1)
print("attrs shape",attrs.shape)

attrs shape torch.Size([1, 134, 768])


In [None]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.linalg.vector_norm(attributions,dim=0)
    return attributions

attrs_sum = summarize_attributions(attrs)
print("attrs_sum shape",attrs_sum.shape)

attrs_sum shape torch.Size([134])


In [None]:
print(attrs_sum)
print(tokenizer.convert_ids_to_tokens(inp[0][0]))

for a,t in zip(attrs_sum,tokenizer.convert_ids_to_tokens(inp[0][0])):
    print(float(a),t)

tensor([ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  5.1144e-02,  1.0043e-01,  1.0590e-01,  8.0828e-02,
         9.2856e-02,  1.0488e-01,  3.4153e-02,  6.0640e-02,  2.1290e-03,
         2.2661e-02,  2.8650e-02,  3.1066e-02,  2.8959e-02,  7.3983e-03,
         1.2038e-02, -1.3695e-03,  6.4360e-03,  1.1252e-02, -3.6065e-02,
         5.7909e-03,  1.5247e-02, -7.5268e-03,  2.1909e-03, -2.0554e-02,
        -9.2351e-04,  6.8407e-03,  2.5283e-03,  1.0588e-02, -1.2810e-02,
        -1.4111e-02, -1.4266e-02, -1.4134e-03,  2.4064e-02,  4.5502e-02,
         4.2195e-02,  6.4391e-02, -2.3645e-03,  2.5417e-02,  7.2869e-02,
        -5.6970e-04, -2.7238e-02,  5.2433e-03,  5.7280e-02, -1.0741e-02,
         6.3813e-02,  3.6112e-02,  2.1141e-02,  1.5774e-02,  3.3020e-02,
        -1.4722e-02,  2.1891e-02,  5.6451e-02,  4.4617e-02,  3.6945e-02,
         2.1848e-02, -9.2404e-03,  2.1187e-01,  2.9

Damn, that seems to work!

In [None]:
import captum
from IPython.core.display import HTML, display
x=captum.attr.visualization.format_word_importances(tokenizer.convert_ids_to_tokens(inp[0][0]),attrs_sum)
HTML(x)