<a href="https://colab.research.google.com/github/heinohen/Textual-Data-Analysis/blob/main/TDA_exercise8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Textual Data Analysis exercise 8

"Let us assume we train a model which receives a question and a text segment on its input, and predicts YES/NO whether the text segment contains the answer to the question. It should then be so that if the answer is YES, the explanation of the prediction should point out to the answer in the text."

## SETUP

In [1]:
!pip -q install transformers captum

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m89.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m83.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# dl the model

!wget http://dl.turkunlp.org/TKO_8964_2023/english-binarized-weighted.model.tgz

--2025-02-06 17:11:35--  http://dl.turkunlp.org/TKO_8964_2023/english-binarized-weighted.model.tgz
Resolving dl.turkunlp.org (dl.turkunlp.org)... 195.148.30.23
Connecting to dl.turkunlp.org (dl.turkunlp.org)|195.148.30.23|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 402134026 (384M) [application/octet-stream]
Saving to: ‘english-binarized-weighted.model.tgz’


2025-02-06 17:12:03 (13.6 MB/s) - ‘english-binarized-weighted.model.tgz’ saved [402134026/402134026]



In [3]:
# unzip the archive

!tar -xzvf english-binarized-weighted.model.tgz

english-binarized-weighted.model/
english-binarized-weighted.model/training_args.bin
english-binarized-weighted.model/pytorch_model.bin
english-binarized-weighted.model/tokenizer.json
english-binarized-weighted.model/vocab.txt
english-binarized-weighted.model/config.json
english-binarized-weighted.model/special_tokens_map.json
english-binarized-weighted.model/tokenizer_config.json


In [6]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

from captum.attr import visualization as viz
from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

import torch

### helper functions

In [7]:
# Forward on the model -> data in, prediction out, nothing fancy really
def predict(inputs, token_type_ids, attention_mask):
    pred=model(inputs, token_type_ids=token_type_ids, attention_mask=attention_mask)
    return pred.logits #return the output of the classification layer

In [8]:

# Given input texts, construct a pair of (text input, blank reference input as long as the text itself)
def construct_input_ref_pair(question:str, answer:str) -> tuple:
    inp=tokenizer(question, text_pair = answer, return_tensors="pt") # here we include the special tokens [CLS], [SEP]
    #let's have our blank be all padding
    #but how long is the text?
    #one way to do this:

    # modded for two parts
    tokenized_Q=tokenizer(question,add_special_tokens=False)["input_ids"] # add_special_tokens defaults to True
    tokenized_A=tokenizer(answer, add_special_tokens=False)["input_ids"] # add_special_tokens defaults to True

    blank_Q=" ".join(["[PAD]"]*len(tokenized_Q)) #and now make an ref question with this many [PAD] tokens
    blank_A=" ".join(["[PAD]"]*len(tokenized_A)) #and now make an ref answer with this many [PAD] tokens

    ref=tokenizer(blank_Q, text_pair=blank_A,return_tensors="pt") # join them in a single ref
    return (inp["input_ids"],inp["token_type_ids"],inp["attention_mask"]), (ref["input_ids"],ref["token_type_ids"],ref["attention_mask"]) # returns a tuple where first element holds the input and second holds the reference

## load the model

In [9]:
# load

path_to_the_model = 'english-binarized-weighted.model/' # load from this path
tokenizer = AutoTokenizer.from_pretrained(path_to_the_model) # load tokenizer
model = AutoModelForSequenceClassification.from_pretrained(path_to_the_model) # load model


In [10]:
# Let's try it!

# question
question = "When was University of Turku founded ?"
# answer
answer = "The University of Turku (Finnish: Turun yliopisto, in Swedish: Åbo universitet, shortened UTU), is located in Turku in southwestern Finland, is the third largest university in the country as measured by student enrollment, after the University of Helsinki and Tampere University. It is a multidisciplinary university with eight faculties. It was establised in 1920 and also has facilities at Rauma, Pori, Kevo and Seili. The university is a member of the Coimbra Group and the European Campus of City - Universities (EC2U)."


#input:
inp, ref = construct_input_ref_pair(question = question, answer = answer) # this is changed from the example notebook


# check that the lengths match and types of the tokens, especially that there is the [SEP] token between the q and a
print(f"INPUT with lenght of {len(inp[0][0])}") # question + answer
all_tokens = tokenizer.convert_ids_to_tokens(inp[0][0]) # ids => tokens
print(all_tokens)

print(f"REF with lenght of {len(ref[0][0])}") # reference PADs
ref_tokens = tokenizer.convert_ids_to_tokens(ref[0][0]) # ids => tokens
print(ref_tokens)

INPUT with lenght of 138
['[CLS]', 'When', 'was', 'University', 'of', 'Tu', '##rk', '##u', 'founded', '?', '[SEP]', 'The', 'University', 'of', 'Tu', '##rk', '##u', '(', 'Finnish', ':', 'Tu', '##run', 'y', '##lio', '##pis', '##to', ',', 'in', 'Swedish', ':', 'Å', '##bo', 'un', '##ivers', '##ite', '##t', ',', 'shortened', 'U', '##TU', ')', ',', 'is', 'located', 'in', 'Tu', '##rk', '##u', 'in', 'southwestern', 'Finland', ',', 'is', 'the', 'third', 'largest', 'university', 'in', 'the', 'country', 'as', 'measured', 'by', 'student', 'enrollment', ',', 'after', 'the', 'University', 'of', 'Helsinki', 'and', 'Tam', '##per', '##e', 'University', '.', 'It', 'is', 'a', 'multi', '##disciplinary', 'university', 'with', 'eight', 'faculties', '.', 'It', 'was', 'est', '##ab', '##lis', '##ed', 'in', '1920', 'and', 'also', 'has', 'facilities', 'at', 'Ra', '##uma', ',', 'Po', '##ri', ',', 'Ke', '##vo', 'and', 'Se', '##ili', '.', 'The', 'university', 'is', 'a', 'member', 'of', 'the', 'Co', '##im', '##bra',

In [11]:
p=predict(*inp) # predict based on input values "input_ids", "token_type_ids", "attention_mask"
print("p=",p)
print("p.shape",p.shape) # prediction is either negative or positive

p= tensor([[-1.1945,  1.2058]], grad_fn=<AddmmBackward0>)
p.shape torch.Size([1, 2])


In [12]:
# Yay, now we finally made it to the attribution part
lig = LayerIntegratedGradients(predict, model.bert.embeddings) #attribute the output wrt to embeddings

In [13]:

# inputs: inputs
# baselines: the blank baseline
# target: which of the two classes in the output (pos/neg) to run the prediction against?
attrs, delta = lig.attribute(inputs=inp,
                                  baselines=ref,
                                  return_convergence_delta=True,target=1) # this runs against 1 class
print("attrs shape",attrs.shape)

attrs shape torch.Size([1, 138, 768])


In [14]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0) # sums according to the last dimension
    attributions = attributions / torch.linalg.vector_norm(attributions,dim=0) # normalize
    return attributions

attrs_sum = summarize_attributions(attrs) # sums
print("attrs_sum shape",attrs_sum.shape) # attributes for all tokens

attrs_sum shape torch.Size([138])


In [15]:


#print(attrs_sum)
#print(tokenizer.convert_ids_to_tokens(inp[0][0]))

#for a,t in zip(attrs_sum,tokenizer.convert_ids_to_tokens(inp[0][0])):
#    print(float(a),t)

# too much clutter

In [16]:


# just visuals
import captum
from IPython.core.display import HTML, display
x=captum.attr.visualization.format_word_importances(tokenizer.convert_ids_to_tokens(inp[0][0]),attrs_sum)
HTML(x)



In [17]:

def predict_and_explain(model,question, answer):
    model.zero_grad() #to be safe perhaps it's not needed
    inp,ref = construct_input_ref_pair(question, answer) # this is the original change
    all_tokens = tokenizer.convert_ids_to_tokens(inp[0][0]) # collects all tokens

    lig = LayerIntegratedGradients(predict, model.bert.embeddings) # captum module used to compute attributions based on Integrated Gradients (IG) for a specific layer in a model.
    prediction=predict(*inp)[0] # predict using inputs
    prediction_cls=int(torch.argmax(prediction)) # prediction class is determined from argmax function and "rounded" to a integer
    print("Prediction:", ("negative","positive")[prediction_cls],"Weights:",prediction.tolist()) # prints the predicted class "negative" or "positive"
    attrs, delta = lig.attribute(inputs=inp,
                              baselines=ref,
                              return_convergence_delta=True,target=prediction_cls) # runs LayerIntegratedGradiends against the predicted class to gain information what token attributies to it
    attrs_sum = summarize_attributions(attrs) # summarizes the attributions in to a single values stored in list
    x=captum.attr.visualization.format_word_importances(all_tokens,attrs_sum) # visualize it
    display(HTML(x))
    print()


In [18]:
predict_and_explain(model, question, answer) # just call, used to be predict_and_explain(model,text)

Prediction: positive Weights: [-1.194491982460022, 1.205803632736206]





### try for full words

In [19]:
# idea:

## _if_ the attributes act in the direction to which the system sees the affect flow, can the subwords AND their attributions be stacked?

# step 1. calculate attributions towards the predicted class (larger of the two)
# step 2.
#       a ) combine the subwords in a loop
#       b ) in SAME loop, combine the attribution values ???
# step 3. use THESE to visualize in same manner than in above the subwords are present?
#
# soo....
# let's try
# see what types are they: tensor, list
# remove the ## and add the attribution
# collect the values to lists that can be returned

In [20]:
def collate_words_and_attributions(all_tokens, attrs_sum) -> tuple:

  # these will be returned
  full_words = list() # for the full words
  attributions_to_full = list() # for the summed attributions of the parts of the tokens' subwords

  # this is the one we seek, standard BERT format
  doublehash = "##"

  # this is the baseword
  baseword = "" # is initially empty for the CLS token to be inserted
  # this is the value of the attribution
  base_attr_value = 0 # this is just initialized as 0

  for token, attr in zip(all_tokens, attrs_sum): # loop through both parameters all_tokens == all of the tokens and subword tokens, attrs_sum == summed attributes for each concerned element in all_tokens
    if token.startswith(doublehash): # if it is a subword
      #print(f" {token} starts with ##")
      baseword += token.replace(doublehash, "") # add it to previous word
      base_attr_value += attr # add the value to the base value
    else:
      #print(f"{token} does not start with")
      if baseword: # if the baseword is not empty ===> then there is no subword
          full_words.append(baseword) # add the full word
          attributions_to_full.append(float(base_attr_value)) # add the summation of the parts attributions
      baseword = token # reset the baseword
      base_attr_value = attr #reset the baseword


  #print(full_words, attributions_to_full)
  return (full_words, attributions_to_full) # return lists as tuple


In [27]:
def workbench(model, question, answer) -> None:
    model.zero_grad() #to be safe perhaps it's not needed
    inp,ref = construct_input_ref_pair(question, answer)
    all_tokens = tokenizer.convert_ids_to_tokens(inp[0][0])

    lig = LayerIntegratedGradients(predict, model.bert.embeddings)
    prediction=predict(*inp)[0]
    prediction_cls=int(torch.argmax(prediction))
    print("Prediction:", ("negative","positive")[prediction_cls],"Weights:",prediction.tolist())
    attrs, delta = lig.attribute(inputs=inp,
                              baselines=ref,
                              return_convergence_delta=True,target=prediction_cls)

    attrs_sum = summarize_attributions(attrs)

  ## all above is same as in def predict_and_explain(model,question, answer):
  ####### WORKBENCH AREA

    #print(attrs_sum) # tensor
    print(f'len of attrs_sum {len(attrs_sum)}')
    print(f'len of all tokens {len(all_tokens)}')
    print(f'type of attrs_sum', attrs_sum.__class__) # tensor
    print(f'type of all_tokens', all_tokens.__class__) # list

    #for a, b in zip(attrs_sum, all_tokens): # that means i can zip them, lets see what they
    #    print(f'{float(a):.2f}, {b}')

    full_words, full_attrs = collate_words_and_attributions(all_tokens, attrs_sum) # get back full words list, full words attributions list

    # It is taking its input in the form "[CLS] question [SEP] context [SEP]" and the output has two logit values,
    # the first one is for the negative class (question not answered) and the second one for the positive class (question answered).:
    if prediction_cls == 0:
      print(f"The question was not answered correctly.")
    else:
      print(f"The question was answered correctly.")


    ########## WORKBENCH END


    x=captum.attr.visualization.format_word_importances(full_words,full_attrs)
    display(HTML(x))
    print()






In [25]:
# start small
work_question = "When was University of Turku founded ?"
work_answer = " It was establised in 1920"

workbench(model,work_question,work_answer)

Prediction: positive Weights: [-0.2669466733932495, -0.02393500506877899]
len of attrs_sum 20
len of all tokens 20
type of attrs_sum <class 'torch.Tensor'>
type of all_tokens <class 'list'>
The question was answered correctly.





In [26]:
# now with full small
#work_question = "When was University of Turku founded ?"
#work_answer = " It was establised in 1920"

workbench(model,question,answer)

Prediction: positive Weights: [-1.194491982460022, 1.205803632736206]
len of attrs_sum 138
len of all tokens 138
type of attrs_sum <class 'torch.Tensor'>
type of all_tokens <class 'list'>
The question was answered correctly.



