In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath('../'))

from transformers import LongformerTokenizer
from copy import deepcopy
import torch.nn as nn
import torch
import re

from captum.attr import visualization as viz
from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

from models.longformer.model import Longformer
from models.longformer.args import get_args
from datasets.scar_longformer import SCAR_Longformer

import torch

In [2]:
import sys
sys.argv = ['--target "need_emots_1"   --batch-size 1']

# Set Device as CPU as GPU is being used in background
# device = 'cpu'

model_name = "Longformer"
args = get_args()

# Set Device
if args.cuda:
    args.device = torch.device('cuda:0')
    print("Using a CUDA GPU, woot!")
else:
    args.device = 'cpu'
    print("Using a CPU, sad!")
    
# args.device = device

config = deepcopy(args)

scar_longformer = SCAR_Longformer(config)


# Set CUDA Blocking if needed, used when getting CUDA errors:
if args.cuda_block:
    os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
else:
    os.environ['CUDA_LAUNCH_BLOCKING'] = "0"

# Loss and optimizer
if args.imbalance_fix == 'loss_weight':
    target_perc = scar_longformer.get_class_balance()  # Percentage of targets = 1
    pos_weight = (1 - target_perc) / target_perc
    print(f"Weighting our Loss Function to Balance Target Classes\n"
            f"Training examples with target=1 will get a factor of: {round(pos_weight, 3)}")
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight))
elif args.imbalance_fix == 'none':
    loss_fn = nn.BCEWithLogitsLoss()
else:
    raise Exception("Invalid method to fix the class imbalance provided, or not yet implemented")



Using a CUDA GPU, woot!


Reading in C:\Users\jjnunez\PycharmProjects\scar_nlp_data\data\need_emots_1\train.tsv: 30953it [00:10, 3047.20it/s]
Reading in C:\Users\jjnunez\PycharmProjects\scar_nlp_data\data\need_emots_1\dev.tsv: 4459it [00:01, 3072.79it/s]
Reading in C:\Users\jjnunez\PycharmProjects\scar_nlp_data\data\need_emots_1\test.tsv: 4459it [00:01, 3037.21it/s]

Weighting our Loss Function to Balance Target Classes
Training examples with target=1 will get a factor of: 0.75





In [3]:
# Instantiate our Model
steps_per_epoch = scar_longformer.get_n_training()/config.batch_size
# model = Longformer(config, loss_fn, steps_per_epoch)

# Load from checkpoint
ckpt_path = r'C:\Users\jjnunez\PycharmProjects\scar_nlp\epoch=11-step=371435.ckpt'


model = Longformer.load_from_checkpoint(checkpoint_path=ckpt_path, config=config, 
                                               loss_fn=loss_fn, 
                                               steps_per_epoch=steps_per_epoch)
model.to(config.device)
model.eval()
model.zero_grad()

# load tokenizer
tokenizer = LongformerTokenizer.from_pretrained(config.pretrained_file)

print('Weve loaded up a tokenizer and the model')

Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerModel: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Weve loaded up a tokenizer and the model


In [4]:
def predict(inputs, attn_mask):
    return model(inputs, attn_mask)[1]

In [5]:
ref_token_id = tokenizer.pad_token_id # A token used for generating token reference
sep_token_id = tokenizer.sep_token_id # A token used as a separator between question and text and it is also added to the end of the text.
cls_token_id = tokenizer.cls_token_id # A token used for prepending to the concatenated question-text word sequence

In [6]:
def construct_input_ref_pair(text, ref_token_id, sep_token_id, cls_token_id):

    max_len = 2048 
    
    #text_ids = tokenizer.encode(
    #            text,
    #            None,
    #            add_special_tokens=True,
    #            max_length=max_len,
    #            padding='max_length',
    #            return_token_type_ids=False,
    #            #return_attention_mask=True,
    #            truncation=True,
    #            #return_tensors='pt'
    #    )
    #text_ids = tokenizer.encode(text, add_special_tokens=False)

    text_ids = tokenizer.encode(text,None,
                                add_special_tokens=True,
                                return_token_type_ids=False)
    
    # print(f'Here is the tokenized text: {tokenizer.convert_ids_to_tokens(text_ids)}')
    
    # construct input token ids
    input_ids = [cls_token_id] + text_ids + [sep_token_id]
    # construct reference token ids 
    ref_input_ids = [cls_token_id] + [ref_token_id] * len(text_ids) + [sep_token_id]

    return torch.tensor([input_ids], device=config.device), torch.tensor([ref_input_ids], device=config.device), len(text_ids)

def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]], device=config.device)
    ref_token_type_ids = torch.zeros_like(token_type_ids, device=config.device)# * -1
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=config.device)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=config.device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long, device=config.device)

    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    return torch.ones_like(input_ids)

In [7]:
def custom_forward(inputs):
    preds = predict(inputs, attention_mask)
    # return torch.softmax(preds, dim = 1)[0][0].unsqueeze(-1)
    return preds.squeeze(-1)

In [8]:
lig = LayerIntegratedGradients(custom_forward,
                               model.longformer.embeddings)

In [9]:
text = "Ms. Jones is a 64 year old woman with metastatic stage 4 lung cancer. She lives alone and does not work. She feels very sad. She has a lot of pain."
# text = 'Mr. Sanguine is a 45 year old man with stage 1 colon cancer who will only need surgery. He is optimistic. He feels well and continues to work. He has a wife and three children. He is rich.'

In [10]:
input_ids, ref_input_ids, sep_id = construct_input_ref_pair(text, ref_token_id, sep_token_id, cls_token_id)
token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
attention_mask = construct_attention_mask(input_ids)

indices = input_ids[0].detach().tolist()
all_tokens = [re.sub('Ġ', '',x) for x in tokenizer.convert_ids_to_tokens(indices)]

In [11]:
model(input_ids, attention_mask)

(0, tensor([[0.5287]], device='cuda:0', grad_fn=<SigmoidBackward>))

In [12]:
predict(input_ids, attention_mask)

tensor([[0.5287]], device='cuda:0', grad_fn=<SigmoidBackward>)

In [13]:
custom_forward(input_ids)

tensor([0.5287], device='cuda:0', grad_fn=<SqueezeBackward1>)

In [14]:
attributions, delta = lig.attribute(inputs=input_ids,
                                    baselines=ref_input_ids,
                                    return_convergence_delta=True,
                                    n_steps=10)

In [15]:
score = predict(input_ids, attention_mask)

def one_or_zero(score):
    return 1 if score>=0.50 else 0

print('Question: ', text)
print(f'Predicted Answer: Probability of having emotional need: {score.cpu().detach().numpy()[0][0]}. Predicted label: {one_or_zero(score)}')
#print('Predicted Answer: ' + str(torch.argmax(score[0]).numpy()) + ', prob having 1 emotional need: ' + str(torch.softmax(score, dim = 1)[0][0].detach().numpy()))

Question:  Ms. Jones is a 64 year old woman with metastatic stage 4 lung cancer. She lives alone and does not work. She feels very sad. She has a lot of pain.
Predicted Answer: Probability of having emotional need: 0.5286926031112671. Predicted label: 1


In [16]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions

In [17]:
attributions_sum = summarize_attributions(attributions)

In [18]:
# storing couple samples in an array for visualization purposes
print(score.cpu().detach().numpy()[0][0])
score_vis = viz.VisualizationDataRecord(
                        attributions_sum,
                        score.cpu().detach().numpy()[0][0],
                        int(one_or_zero(score)),
                        1,
                        text,
                        attributions_sum.sum(),       
                        all_tokens,
                        delta)

print('\033[1m', 'Visualization For Score', '\033[0m')
viz.visualize_text([score_vis])

0.5286926
[1m Visualization For Score [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.53),Ms. Jones is a 64 year old woman with metastatic stage 4 lung cancer. She lives alone and does not work. She feels very sad. She has a lot of pain.,0.97,#s #s Ms . Jones is a 64 year old woman with metast atic stage 4 lung cancer . She lives alone and does not work . She feels very sad . She has a lot of pain . #/s #/s
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,1 (0.53),Ms. Jones is a 64 year old woman with metastatic stage 4 lung cancer. She lives alone and does not work. She feels very sad. She has a lot of pain.,0.97,#s #s Ms . Jones is a 64 year old woman with metast atic stage 4 lung cancer . She lives alone and does not work . She feels very sad . She has a lot of pain . #/s #/s
,,,,
