In [141]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from transformers import BertTokenizer, DistilBertForQuestionAnswering, BertConfig, DistilBertTokenizerFast, pipeline

from captum.attr import visualization as viz
from captum.attr import LayerConductance, LayerIntegratedGradients

In [142]:
device = torch.device("cpu")

In [143]:
model = DistilBertForQuestionAnswering.from_pretrained("../models/custom/")
model.eval()
model.zero_grad()

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

In [144]:
def predict(inputs, token_type_ids=None, position_ids=None, attention_mask=None):
    output = model(inputs)
    return output.start_logits, output.end_logits

In [145]:
def squad_pos_forward_func(inputs, token_type_ids=None, position_ids=None, attention_mask=None, position=0):
    pred = predict(inputs,
                   token_type_ids=token_type_ids,
                   position_ids=position_ids,
                   attention_mask=attention_mask)
    pred = pred[position]
    return pred.max(1).values

In [146]:
ref_token_id = tokenizer.pad_token_id # A token used for generating token reference
sep_token_id = tokenizer.sep_token_id # A token used as a separator between question and text and it is also added to the end of the text.
cls_token_id = tokenizer.cls_token_id # A token used for prepending to the concatenated question-text word sequence

In [147]:
def construct_input_ref_pair(question, text, ref_token_id, sep_token_id, cls_token_id):
    question_ids = tokenizer.encode(question, add_special_tokens=False)
    text_ids = tokenizer.encode(text, add_special_tokens=False)

    # construct input token ids
    input_ids = [cls_token_id] + question_ids + [sep_token_id] + text_ids + [sep_token_id]

    # construct reference token ids 
    ref_input_ids = [cls_token_id] + [ref_token_id] * len(question_ids) + [sep_token_id] + \
        [ref_token_id] * len(text_ids) + [sep_token_id]

    return torch.tensor([input_ids], device=device), torch.tensor([ref_input_ids], device=device), len(question_ids)

def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]], device=device)
    ref_token_type_ids = torch.zeros_like(token_type_ids, device=device)# * -1
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long, device=device)

    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    return torch.ones_like(input_ids)

def construct_whole_bert_embeddings(input_ids, ref_input_ids, \
                                    token_type_ids=None, ref_token_type_ids=None, \
                                    position_ids=None, ref_position_ids=None):
    input_embeddings = model.bert.embeddings(input_ids, position_ids=position_ids)
    ref_input_embeddings = model.bert.embeddings(ref_input_ids, position_ids=ref_position_ids)
    
    return input_embeddings, ref_input_embeddings

In [148]:
question = "Where is Charlie Jones from"

with open("../data/Passages/introduction_of_antagonist.txt", "r", encoding="utf8") as f:
    text = f.read()

In [149]:
input_ids, ref_input_ids, sep_id = construct_input_ref_pair(question, text, ref_token_id, sep_token_id, cls_token_id)
token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
attention_mask = construct_attention_mask(input_ids)

indices = input_ids[0].detach().tolist()
all_tokens = tokenizer.convert_ids_to_tokens(indices)

In [150]:
ground_truth = 'German'

ground_truth_tokens = tokenizer.encode(ground_truth, add_special_tokens=False)
ground_truth_end_ind = indices.index(ground_truth_tokens[-1])
ground_truth_start_ind = ground_truth_end_ind - len(ground_truth_tokens) + 1

In [151]:
start_scores, end_scores = predict(input_ids, \
                                   position_ids=position_ids, \
                                   attention_mask=attention_mask)


print('Question: ', question)
print('Predicted Answer: ', ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]))
print(all_tokens[torch.argmax(start_scores)])

Question:  Where is Charlie Jones from
Predicted Answer:  he was a benevolent little man , highly religious , and something of a philosopher
he


In [152]:
lig = LayerIntegratedGradients(squad_pos_forward_func, model.distilbert.embeddings)

attributions_start, delta_start = lig.attribute(inputs=input_ids,
                                  baselines=ref_input_ids,
                                  additional_forward_args=(token_type_ids, position_ids, attention_mask, 0),
                                  return_convergence_delta=True)
attributions_end, delta_end = lig.attribute(inputs=input_ids, baselines=ref_input_ids,
                                additional_forward_args=(token_type_ids, position_ids, attention_mask, 1),
                                return_convergence_delta=True)

In [153]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

In [154]:
attributions_start_sum = summarize_attributions(attributions_start)
attributions_end_sum = summarize_attributions(attributions_end)

In [155]:
start_position_vis = viz.VisualizationDataRecord(
                        attributions_start_sum,
                        torch.max(torch.softmax(start_scores[0], dim=0)),
                        torch.argmax(start_scores),
                        torch.argmax(start_scores),
                        str(ground_truth_start_ind),
                        attributions_start_sum.sum(),       
                        all_tokens,
                        delta_start)

end_position_vis = viz.VisualizationDataRecord(
                        attributions_end_sum,
                        torch.max(torch.softmax(end_scores[0], dim=0)),
                        torch.argmax(end_scores),
                        torch.argmax(end_scores),
                        str(ground_truth_end_ind),
                        attributions_end_sum.sum(),       
                        all_tokens,
                        delta_end)

print('\033[1m', 'Visualizations For Start Position', '\033[0m')
viz.visualize_text([start_position_vis])

print('\033[1m', 'Visualizations For End Position', '\033[0m')
viz.visualize_text([end_position_vis])

[1m Visualizations For Start Position [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
92.0,92 (0.02),72.0,6.69,"[CLS] where is charlie jones from [SEP] turner had gone below , grimly good - humor ##ed , to dress for dinner ; and i went aft to chat , as i often did , with the steer ##sman . on this occasion it happened to be charlie jones . jones was not his name , so far as i know . it was some in ##ord ##inate ##ly long and different german inheritance , and so , with the facility of the average crew , he had been called jones . he was a benevolent little man , highly religious , and something of a philosopher . and because i could understand german , and even essay it in a limited way , he was fond of me . “ set ##z du di ##ch , ” he said , and moved over so that i could sit on the gr ##ating on which he stood . “ the sky is fine to - night . wu ##nder ##sch ##on ! ” “ it always looks good to me , ” i observed , filling my pipe and passing my tobacco - bag to him . “ i may have my doubts now and then on land , charlie ; but here , between the sky and the sea , i ’ m a believer , right enough . ” “ ‘ in the beginning he created the heaven and the earth , ’ ” said charlie rev ##ere ##ntly . we were silent for a time . the ship rolled easily ; now and then she dipped her bows ##pr ##it with a soft sw ##ish of spray ; a school of dolphins played as ##tern , and the last of the land birds that had followed us out flew in circles around the mast ##s . “ sometimes , ” said charlie jones , “ i think the good man should have left it the way it was after the flood — just sky and water . what ’ s the land , any ##how ? noise and confusion , wicked ##ness and crime , robb ##ing the widow and the orphan , eat or be et . ” “ well , ” i argued , “ the sea ’ s that way . what are those fish out there flying for , but to get out of the way of bigger fish ? ” charlie jones surveyed me over his pipe . [SEP]"
,,,,


[1m Visualizations For End Position [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
106.0,106 (0.03),72.0,5.58,"[CLS] where is charlie jones from [SEP] turner had gone below , grimly good - humor ##ed , to dress for dinner ; and i went aft to chat , as i often did , with the steer ##sman . on this occasion it happened to be charlie jones . jones was not his name , so far as i know . it was some in ##ord ##inate ##ly long and different german inheritance , and so , with the facility of the average crew , he had been called jones . he was a benevolent little man , highly religious , and something of a philosopher . and because i could understand german , and even essay it in a limited way , he was fond of me . “ set ##z du di ##ch , ” he said , and moved over so that i could sit on the gr ##ating on which he stood . “ the sky is fine to - night . wu ##nder ##sch ##on ! ” “ it always looks good to me , ” i observed , filling my pipe and passing my tobacco - bag to him . “ i may have my doubts now and then on land , charlie ; but here , between the sky and the sea , i ’ m a believer , right enough . ” “ ‘ in the beginning he created the heaven and the earth , ’ ” said charlie rev ##ere ##ntly . we were silent for a time . the ship rolled easily ; now and then she dipped her bows ##pr ##it with a soft sw ##ish of spray ; a school of dolphins played as ##tern , and the last of the land birds that had followed us out flew in circles around the mast ##s . “ sometimes , ” said charlie jones , “ i think the good man should have left it the way it was after the flood — just sky and water . what ’ s the land , any ##how ? noise and confusion , wicked ##ness and crime , robb ##ing the widow and the orphan , eat or be et . ” “ well , ” i argued , “ the sea ’ s that way . what are those fish out there flying for , but to get out of the way of bigger fish ? ” charlie jones surveyed me over his pipe . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
106.0,106 (0.03),72.0,5.58,"[CLS] where is charlie jones from [SEP] turner had gone below , grimly good - humor ##ed , to dress for dinner ; and i went aft to chat , as i often did , with the steer ##sman . on this occasion it happened to be charlie jones . jones was not his name , so far as i know . it was some in ##ord ##inate ##ly long and different german inheritance , and so , with the facility of the average crew , he had been called jones . he was a benevolent little man , highly religious , and something of a philosopher . and because i could understand german , and even essay it in a limited way , he was fond of me . “ set ##z du di ##ch , ” he said , and moved over so that i could sit on the gr ##ating on which he stood . “ the sky is fine to - night . wu ##nder ##sch ##on ! ” “ it always looks good to me , ” i observed , filling my pipe and passing my tobacco - bag to him . “ i may have my doubts now and then on land , charlie ; but here , between the sky and the sea , i ’ m a believer , right enough . ” “ ‘ in the beginning he created the heaven and the earth , ’ ” said charlie rev ##ere ##ntly . we were silent for a time . the ship rolled easily ; now and then she dipped her bows ##pr ##it with a soft sw ##ish of spray ; a school of dolphins played as ##tern , and the last of the land birds that had followed us out flew in circles around the mast ##s . “ sometimes , ” said charlie jones , “ i think the good man should have left it the way it was after the flood — just sky and water . what ’ s the land , any ##how ? noise and confusion , wicked ##ness and crime , robb ##ing the widow and the orphan , eat or be et . ” “ well , ” i argued , “ the sea ’ s that way . what are those fish out there flying for , but to get out of the way of bigger fish ? ” charlie jones surveyed me over his pipe . [SEP]"
,,,,
