In [1]:
import os
import sys
import torch
from transformers import BertTokenizer
from captum.attr import visualization as viz
from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

In [2]:
sys.path.append(os.path.join(os.getcwd(), 'src'))
from utils.parameters import process_parameters_yaml
from models.k_bert import KBert
from models.bert import Bert

from utils.visualization import *

In [3]:
params = process_parameters_yaml()

In [4]:
class config:
    def __init__(self):
        self.dataset = 'agnews'
        self.model = 'kbert'
        self.batch_size = 2
        self.sample_data = 50
        self.overfit = False
#         self.checkpoint = 'model_best.ckpt'
        self.checkpoint = 'batch_size_4-max_epoch10-sample_None-overfit_False/epoch=6-step=167999-val_loss=0.24.ckpt'
cfg = config()

In [5]:
checkpoint_path = f'checkpoints/{cfg.model}/{cfg.checkpoint}'
model = KBert.load_from_checkpoint(checkpoint_path)
model.eval()

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
lig = LayerIntegratedGradients(model, model.BertModel.embeddings)

In [8]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

def set_device(data):
    for i, d in enumerate(data):
        data[i] = d.to('cuda')
    return data

In [6]:
# sentences = ["Wales were eliminated in the second round of Euro 2020 as they were thrashed by Denmark, whose inspiring journey at the tournament continues to the quarter-finals. Backed by a fervent and almost exclusively Danish crowd, Kasper Hjulmand's side led as Kasper Dolberg curled in a fine 27th-minute strike from the edge of the penalty area. Dolberg struck again in the 48th minute as he seized on a defensive error from Wales substitute Neco Williams to smash in from close range."]
# sentences = ["Microsoft has officially announced Windows 11, its new operating system which will replace the current version over the next few years. Among all the new features are two seemingly small but related things that jumped out. First - Microsoft Teams, the video-calling app which saw a boom during 2020's pandemic, will be integrated into Windows 11 by default. And second - Skype will not be, for the first time in years. That seems to suggest that Teams is the new favourite child, and many pundits think this is the beginning of the end for what was once the king of calling apps."]
sentences = ["On Thursday, the Federal Reserve announced that all 23 banks subject to this year’s stress test easily passed. This was good news, widely anticipated, and sent the KBW Bank Index up 6.9% for the week, its best run since early February. The index is up 30% for the year. What comes next, however, should be even better news: The banks are now free from Fed pandemic restrictions to return capital to shareholders. Analysts at Barclay’s, for instance, expect the 20 banks in its coverage universe to return as much as $200 billion to shareholders in the next four quarters—double what they paid last year—giving investors plenty of reason to stick with the sector."]
sent_idx = 0
out = model.kg.add_knowledge(sentences)
token_id, mask, soft_idx, visible_matrix, depth, sentence_tree = out
model, token_id, mask, soft_idx, visible_matrix = set_device([model, token_id, mask, soft_idx, visible_matrix])

output = model.BertModel(input_ids=token_id, attention_mask=mask, position_ids=soft_idx)
logits = model.fc(output['pooler_output'])
pred = logits.argmax()
print(pred)

tensor(2, device='cuda:0')


In [10]:
ligs = []
attributions_sum = []
for i in range(4):
    attributions, delta =  lig.attribute(inputs=token_id, target=i, internal_batch_size=2,n_steps=100,additional_forward_args=( mask, soft_idx, visible_matrix),return_convergence_delta=True)
    ligs.append([attributions, delta])
    # print(torch.sum(attributions))
    att_sum = summarize_attributions(attributions)
    attributions_sum.append(att_sum)

In [11]:
indices = token_id[sent_idx].detach().tolist()
all_tokens = tokenizer.convert_ids_to_tokens(indices)

In [12]:
score_visualization = []
print('\033[1m', 'Visualization For Score', '\033[0m')

for i in range(4):
    att_sum = attributions_sum[i]
    score_vis = viz.VisualizationDataRecord(att_sum,
                                            int(pred),                        
                                            int(pred),
                                            i,
                                            sentences,
                                            att_sum.sum(),       
                                            all_tokens,
                                            delta)
    score_visualization.append(score_vis)
    print(viz.visualize_text([score_vis]))

[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,2 (2.00),"['On Thursday, the Federal Reserve announced that all 23 banks subject to this year’s stress test easily passed. This was good news, widely anticipated, and sent the KBW Bank Index up 6.9% for the week, its best run since early February. The index is up 30% for the year. What comes next, however, should be even better news: The banks are now free from Fed pandemic restrictions to return capital to shareholders. Analysts at Barclay’s, for instance, expect the 20 banks in its coverage universe to return as much as $200 billion to shareholders in the next four quarters—double what they paid last year—giving investors plenty of reason to stick with the sector.']",1.39,[CLS] on thursday the federal reserve related to federal reserve board is a board synonym federal reserve system has context us is a central bank announced that all 23 banks is a plural form of bank receives action run by people subject to this year part of decade is a date related to rosary related to star related to [UNK] related to space ’ s stress test easily passed this was good news related to [UNK] form of [UNK] related to [UNK] at location newspaper at location floor synonym [UNK] widely anticipated and sent the kb synonym [UNK] related to unit related to [UNK] similar to [UNK] has context slang [UNK] related to commercial ##w bank related to money repository related to lending institution is a financial institution index related to prologue has context cycling related to routine related to [UNK] up 69 for the week related to part related to line in related to one piece related to period at location sentence related to [UNK] its best run related to [UNK] is a intelligent agent activity is a outdoor game related to charge related to bill someone related to credit transaction since early february the index related to table related to [UNK] used for holding vase [UNK] related to indicate related to indicative synonym argue is up 30 for the year related to one calendar synonym [UNK] year related to revolution derived from [UNK] what comes next however should be even better news related to date line synonym [UNK] related to in paper the banks has a loan officers has a [UNK] form of atm related to atm are now free from fed pan has context organic compound is a organic matter is a compound related to [UNK] related to hold related to three legged ##de ##mic restrictions form of restriction related to [UNK] related to limitation related to restriction related to state has context biology to return capital related to government used for control people used for management related to wash related to [UNK] related to wadi to shareholders related to shareholder at location publicly held company used for giving company money to operate form of shareholder used for voting by proxy used for investing in company analysts related to analyst is a researcher related to systems analyst form of analyst capable of study whether geography related to [UNK] at barclay ’ s for instance has context [UNK] [UNK] related to [UNK] related to [UNK] mood synonym for example synonym [UNK] [UNK] expect the 20 banks has a waiting lines related to hill is a smaller than mountain used for standing on in its coverage derived from cover related to entry used for protecting from snow related to strength derived from strong is a mechanical property universe related to [UNK] universe has context astronomy synonym known universe receives action made up of mostly dark matter to return as much as 200 billion to shareholders in the next four quarters form of quarter synonym [UNK] related to name related to [UNK] — double what they paid last year related to earth
,,,,


<IPython.core.display.HTML object>


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,2 (2.00),"['On Thursday, the Federal Reserve announced that all 23 banks subject to this year’s stress test easily passed. This was good news, widely anticipated, and sent the KBW Bank Index up 6.9% for the week, its best run since early February. The index is up 30% for the year. What comes next, however, should be even better news: The banks are now free from Fed pandemic restrictions to return capital to shareholders. Analysts at Barclay’s, for instance, expect the 20 banks in its coverage universe to return as much as $200 billion to shareholders in the next four quarters—double what they paid last year—giving investors plenty of reason to stick with the sector.']",-1.68,[CLS] on thursday the federal reserve related to federal reserve board is a board synonym federal reserve system has context us is a central bank announced that all 23 banks is a plural form of bank receives action run by people subject to this year part of decade is a date related to rosary related to star related to [UNK] related to space ’ s stress test easily passed this was good news related to [UNK] form of [UNK] related to [UNK] at location newspaper at location floor synonym [UNK] widely anticipated and sent the kb synonym [UNK] related to unit related to [UNK] similar to [UNK] has context slang [UNK] related to commercial ##w bank related to money repository related to lending institution is a financial institution index related to prologue has context cycling related to routine related to [UNK] up 69 for the week related to part related to line in related to one piece related to period at location sentence related to [UNK] its best run related to [UNK] is a intelligent agent activity is a outdoor game related to charge related to bill someone related to credit transaction since early february the index related to table related to [UNK] used for holding vase [UNK] related to indicate related to indicative synonym argue is up 30 for the year related to one calendar synonym [UNK] year related to revolution derived from [UNK] what comes next however should be even better news related to date line synonym [UNK] related to in paper the banks has a loan officers has a [UNK] form of atm related to atm are now free from fed pan has context organic compound is a organic matter is a compound related to [UNK] related to hold related to three legged ##de ##mic restrictions form of restriction related to [UNK] related to limitation related to restriction related to state has context biology to return capital related to government used for control people used for management related to wash related to [UNK] related to wadi to shareholders related to shareholder at location publicly held company used for giving company money to operate form of shareholder used for voting by proxy used for investing in company analysts related to analyst is a researcher related to systems analyst form of analyst capable of study whether geography related to [UNK] at barclay ’ s for instance has context [UNK] [UNK] related to [UNK] related to [UNK] mood synonym for example synonym [UNK] [UNK] expect the 20 banks has a waiting lines related to hill is a smaller than mountain used for standing on in its coverage derived from cover related to entry used for protecting from snow related to strength derived from strong is a mechanical property universe related to [UNK] universe has context astronomy synonym known universe receives action made up of mostly dark matter to return as much as 200 billion to shareholders in the next four quarters form of quarter synonym [UNK] related to name related to [UNK] — double what they paid last year related to earth
,,,,


<IPython.core.display.HTML object>


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
2.0,2 (2.00),"['On Thursday, the Federal Reserve announced that all 23 banks subject to this year’s stress test easily passed. This was good news, widely anticipated, and sent the KBW Bank Index up 6.9% for the week, its best run since early February. The index is up 30% for the year. What comes next, however, should be even better news: The banks are now free from Fed pandemic restrictions to return capital to shareholders. Analysts at Barclay’s, for instance, expect the 20 banks in its coverage universe to return as much as $200 billion to shareholders in the next four quarters—double what they paid last year—giving investors plenty of reason to stick with the sector.']",1.2,[CLS] on thursday the federal reserve related to federal reserve board is a board synonym federal reserve system has context us is a central bank announced that all 23 banks is a plural form of bank receives action run by people subject to this year part of decade is a date related to rosary related to star related to [UNK] related to space ’ s stress test easily passed this was good news related to [UNK] form of [UNK] related to [UNK] at location newspaper at location floor synonym [UNK] widely anticipated and sent the kb synonym [UNK] related to unit related to [UNK] similar to [UNK] has context slang [UNK] related to commercial ##w bank related to money repository related to lending institution is a financial institution index related to prologue has context cycling related to routine related to [UNK] up 69 for the week related to part related to line in related to one piece related to period at location sentence related to [UNK] its best run related to [UNK] is a intelligent agent activity is a outdoor game related to charge related to bill someone related to credit transaction since early february the index related to table related to [UNK] used for holding vase [UNK] related to indicate related to indicative synonym argue is up 30 for the year related to one calendar synonym [UNK] year related to revolution derived from [UNK] what comes next however should be even better news related to date line synonym [UNK] related to in paper the banks has a loan officers has a [UNK] form of atm related to atm are now free from fed pan has context organic compound is a organic matter is a compound related to [UNK] related to hold related to three legged ##de ##mic restrictions form of restriction related to [UNK] related to limitation related to restriction related to state has context biology to return capital related to government used for control people used for management related to wash related to [UNK] related to wadi to shareholders related to shareholder at location publicly held company used for giving company money to operate form of shareholder used for voting by proxy used for investing in company analysts related to analyst is a researcher related to systems analyst form of analyst capable of study whether geography related to [UNK] at barclay ’ s for instance has context [UNK] [UNK] related to [UNK] related to [UNK] mood synonym for example synonym [UNK] [UNK] expect the 20 banks has a waiting lines related to hill is a smaller than mountain used for standing on in its coverage derived from cover related to entry used for protecting from snow related to strength derived from strong is a mechanical property universe related to [UNK] universe has context astronomy synonym known universe receives action made up of mostly dark matter to return as much as 200 billion to shareholders in the next four quarters form of quarter synonym [UNK] related to name related to [UNK] — double what they paid last year related to earth
,,,,


<IPython.core.display.HTML object>


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
3.0,2 (2.00),"['On Thursday, the Federal Reserve announced that all 23 banks subject to this year’s stress test easily passed. This was good news, widely anticipated, and sent the KBW Bank Index up 6.9% for the week, its best run since early February. The index is up 30% for the year. What comes next, however, should be even better news: The banks are now free from Fed pandemic restrictions to return capital to shareholders. Analysts at Barclay’s, for instance, expect the 20 banks in its coverage universe to return as much as $200 billion to shareholders in the next four quarters—double what they paid last year—giving investors plenty of reason to stick with the sector.']",-1.56,[CLS] on thursday the federal reserve related to federal reserve board is a board synonym federal reserve system has context us is a central bank announced that all 23 banks is a plural form of bank receives action run by people subject to this year part of decade is a date related to rosary related to star related to [UNK] related to space ’ s stress test easily passed this was good news related to [UNK] form of [UNK] related to [UNK] at location newspaper at location floor synonym [UNK] widely anticipated and sent the kb synonym [UNK] related to unit related to [UNK] similar to [UNK] has context slang [UNK] related to commercial ##w bank related to money repository related to lending institution is a financial institution index related to prologue has context cycling related to routine related to [UNK] up 69 for the week related to part related to line in related to one piece related to period at location sentence related to [UNK] its best run related to [UNK] is a intelligent agent activity is a outdoor game related to charge related to bill someone related to credit transaction since early february the index related to table related to [UNK] used for holding vase [UNK] related to indicate related to indicative synonym argue is up 30 for the year related to one calendar synonym [UNK] year related to revolution derived from [UNK] what comes next however should be even better news related to date line synonym [UNK] related to in paper the banks has a loan officers has a [UNK] form of atm related to atm are now free from fed pan has context organic compound is a organic matter is a compound related to [UNK] related to hold related to three legged ##de ##mic restrictions form of restriction related to [UNK] related to limitation related to restriction related to state has context biology to return capital related to government used for control people used for management related to wash related to [UNK] related to wadi to shareholders related to shareholder at location publicly held company used for giving company money to operate form of shareholder used for voting by proxy used for investing in company analysts related to analyst is a researcher related to systems analyst form of analyst capable of study whether geography related to [UNK] at barclay ’ s for instance has context [UNK] [UNK] related to [UNK] related to [UNK] mood synonym for example synonym [UNK] [UNK] expect the 20 banks has a waiting lines related to hill is a smaller than mountain used for standing on in its coverage derived from cover related to entry used for protecting from snow related to strength derived from strong is a mechanical property universe related to [UNK] universe has context astronomy synonym known universe receives action made up of mostly dark matter to return as much as 200 billion to shareholders in the next four quarters form of quarter synonym [UNK] related to name related to [UNK] — double what they paid last year related to earth
,,,,


<IPython.core.display.HTML object>


In [None]:
compute_attributions_sum(all_tokens, attributions_sum, pred, depth[sent_idx])

In [None]:
# print(max(attributions_sum[pred]))
# print(min(attributions_sum[pred]))

tensor(0.6458, device='cuda:0', dtype=torch.float64)
tensor(-0.1262, device='cuda:0', dtype=torch.float64)


In [None]:
print_color_sentence_tree(all_tokens, depth[0], soft_idx[0], attributions_sum[pred])

In [None]:
dep_prev = 0
idx_prev = 0
token_list = ['[CLS]']
for token, dep, idx, att in zip(all_tokens, depth[0], soft_idx[0], attributions_sum[pred]):
    if token == '[PAD]':
        break
    token = att_color(token, att)
    # print(token)
    if dep == 0:
        print(token)
    elif dep == 1:
        print('\t'+token)
    elif dep == 2:
        print('\t\t'+token)

In [None]:
print(max(attributions_sum[pred]))
print(min(attributions_sum[pred]))

In [25]:
tree = tree_structure(all_tokens, attributions_sum[pred], soft_idx, depth, sent_idx=0, threshold=(0.1, -0.03))
t = Tree.fromstring(tree)
t.pretty_print()

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        