### Evaluating Faithfulness on our model:

In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModel, AutoConfig
import torch
from datasets import load_dataset
import torch.nn.functional as F
import shap
import shap
from transformers import Pipeline

import os 
import numpy

torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
data_dir = "output/"
destination_dir = "./"
print(device)

cuda


In [2]:

test_short_path = "data/test_10_top50_short.csv"
labels_10_top50 = pd.read_csv('data/icd10_codes_top50.csv')
code_labels_10 = pd.read_csv("data/icd10_codes.csv")
print("dataset loaded?")

dataset loaded?


In [3]:
# Model Parameters
MAX_POSITION_EMBEDDINGS = 512
MODEL = "emilyalsentzer/Bio_ClinicalBERT"
CKPT = os.path.join(data_dir,"best_model_state.bin")

In [4]:
# Create class dictionaries
classes = [class_ for class_ in code_labels_10["icd_code"] if class_]
class2id = {class_: id for id, class_ in enumerate(classes)}
id2class = {id: class_ for class_, id in class2id.items()}

print("classes")

config, unused_kwargs = AutoConfig.from_pretrained(
    MODEL,
    num_labels=len(classes),
    id2label=id2class,
    label2id=class2id,
    problem_type="multi_label_classification",
    return_unused_kwargs=True,
)

tokenizer_bert = AutoTokenizer.from_pretrained(MODEL)
model_bert = AutoModel.from_pretrained(MODEL, config=config, cache_dir='./model_ckpt/')
print("bert model and tokenizer initialized")



classes
bert model and tokenizer initialized


In [5]:
class TokenizerWrapper:
    def __init__(self, tokenizer, length, classes):
        self.tokenizer = tokenizer
        self.max_length = length
        self.classes = classes
        self.class2id = {class_: id for id, class_ in enumerate(self.classes)}
        self.id2class = {id: class_ for class_, id in self.class2id.items()}
        
    def multi_labels_to_ids(self, labels: list[str]) -> list[float]:
        ids = [0.0] * len(self.class2id)  # BCELoss requires float as target type
        for label in labels:
            ids[self.class2id[label]] = 1.0
        return ids
    
    def tokenize_function(self, example):
        result = self.tokenizer(
            example["text"],
            max_length = self.max_length,
            padding = 'max_length',
            truncation = True,
            return_tensors='pt'
        ).to(device)
        result["label"] = torch.tensor([self.multi_labels_to_ids(eval(label)) for label in example["label"]])
        return result
        
data_files = {
        "test": test_short_path,
    }

tokenizer_wrapper = TokenizerWrapper(tokenizer_bert, MAX_POSITION_EMBEDDINGS, classes)
dataset = load_dataset("csv", data_files=data_files)
dataset = dataset.map(tokenizer_wrapper.tokenize_function, batched=True, num_proc=1)
dataset = dataset.with_format("torch")
print("dataset loaded")

dataset loaded


In [6]:

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.config = config
        self.device = device
        self.bert_model = model_bert
        self.can_generate = model_bert.can_generate
        self.base_model_prefix = model_bert.base_model_prefix
        self.get_input_embeddings = model_bert.get_input_embeddings
        self.dropout = torch.nn.Dropout(0.2)
        self.linear = torch.nn.Linear(self.bert_model.config.hidden_size, 50)
    
    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.bert_model(
            input_ids, 
            attention_mask=attn_mask, 
            token_type_ids=token_type_ids
        )
        output_dropout = self.dropout(output.pooler_output)
        output = self.linear(output_dropout)
        return output
    
model_bert = BERTClass()
model_bert.load_state_dict(torch.load(CKPT))
model_bert = model_bert.to(device)

### Pipeline Initialization

In [7]:
class BERT_ICD10_Pipeline(Pipeline):
    def _sanitize_parameters(self, **kwargs):
        preprocess_kwargs = {}
        if "maybe_arg" in kwargs:
            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
        return preprocess_kwargs, {}, {}

    def preprocess(self, text):
        return self.tokenizer(
            text,
            max_length = MAX_POSITION_EMBEDDINGS,
            padding = 'max_length',
            truncation = True,
            return_tensors='pt'
        ).to(self.device)

    def _forward(self, model_inputs):
        ids = model_inputs['input_ids'].to(self.device, dtype = torch.long)
        mask = model_inputs['attention_mask'].to(self.device, dtype = torch.long)
        token_type_ids = model_inputs['token_type_ids'].to(self.device, dtype = torch.long)
        outputs = self.model(ids, mask, token_type_ids).to(self.device)
        return outputs

    def postprocess(self, model_outputs):
        probs = F.sigmoid(model_outputs).detach().cpu().numpy() # if there's more than one possible diagnosis

        output = []
        for i, prob in enumerate(probs[0]):
            label = self.model.config.id2label[i]
            score = prob
            output.append({"label": label, "score": score})
        # print(output)
        return output

### Test code for faithfulness calculation

In [8]:

pipeline = BERT_ICD10_Pipeline(model=model_bert, tokenizer=tokenizer_bert, device = device)
print("pipeline initialized")


pipeline initialized


In [9]:
# shap.sample(shap_input, 2)
# shap_values = explainer(
#         shap_input,
#         batch_size=5,
#         outputs=shap.Explanation.argsort.flip[:2]
#         )
# print(shap_values)

In [10]:
def get_index_arrays_shap(inputs, pred_func, model, tokenizer, top_k = 5):
    """ Function to create the arrays corresponding to the shap 
    
    The output is in the format [[input_index_0, input_index_0, ... input_index_n, input_index_n], 
    [rationale_token_index_0 (for input 0), rationale_token_index_1 (for input 0), ... rationale_token_index_k-1 (for input n), rationale_token_index_k (for input n)]]. 
    This is used as an indexing array for masking.
    
    """
    
    # get the shap values over the inputs
    shap_values = explainer(inputs, batch_size=5)
    
    # get the mode inferences for the inputs
    inferences = pred_func(inputs, model, tokenizer)
    indices_array = None
    # get the longest 
    
    for i, val in enumerate(shap_values):
        # get the choosen labels
        print("Inferences: ", inferences)
        choosen_labels = np.where(inferences[i] > 0.5)
        choosen_labels = np.unique(choosen_labels)
        
        # convert the indices to labels
        choosen_labels = np.array(choosen_labels).astype(int)
        print("Choosen label: ", choosen_labels)
        choosen_labels = [id2class[label] for label in choosen_labels]
        print(id2class)
        print(choosen_labels)
        
        # for each shap value, index in via it's choosen labels
        total_top_k_indices = np.array([])
        top_val = min(top_k, len(choosen_labels))
        print(top_val)
        print(choosen_labels)
        
        
        for label in choosen_labels:
            # get the top k shap value indices
            print(label)
            top_k_indices = np.argsort(shap_values[i, :, label].values)[-top_val:]
            print(top_k_indices)
            total_top_k_indices = np.append(total_top_k_indices, top_k_indices)
        
        # sort the indices array to be in ascending order
        total_top_k_indices = np.sort(total_top_k_indices)
        # remove duplicates
        total_top_k_indices = np.unique(total_top_k_indices)
        # this might be wrong, it seems like shap returns indices outside of the token range
        # so I'm not sure if shap is using the same tokenization function as ours.
        total_top_k_indices = total_top_k_indices[total_top_k_indices < 2048]
        
        # create a array of the same shape of total_top_k_indices and fill with value i
        index_array = np.full(total_top_k_indices.shape, i)
        
        if i == 0:
            indices_array = [index_array.tolist(), total_top_k_indices.tolist()]
        else:
            # append index array to indices array[0]
            indices_array[0] = indices_array[0] + index_array.tolist()
            # append total_top_k_indices to indices array[1]
            indices_array[1] = indices_array[1] + total_top_k_indices.tolist()
    
    return np.array(indices_array).astype(int)

In [11]:
def predictor_model_token(texts, model, tokenizer_bert):
    # print(len(texts))
    # tk = tokenizer(texts, return_tensors="pt",truncation=True, padding=True, max_length=MAX_POSITION_EMBEDDINGS).to(device)
    # print(type(tokenizer(texts, return_tensors="pt",truncation=True, padding=True, max_length=MAX_POSITION_EMBEDDINGS)))
    # print("token_att: ", dir(tk))
    tk = tokenizer_bert(
            texts,
            max_length = MAX_POSITION_EMBEDDINGS,
            padding = 'max_length',
            truncation = True,
            return_tensors='pt'
        ).to(device)
    ids = tk['input_ids'].to(device, dtype = torch.long)
    mask = tk['attention_mask'].to(device, dtype = torch.long)
    token_type_ids = tk['token_type_ids'].to(device, dtype = torch.long)
    outputs = model_bert(ids, mask, token_type_ids)
    # tensor_logits = outputs[0]
    # probas = tensor_logits.sigmoid().detach().cpu().numpy()
    probas = F.sigmoid(outputs).detach().cpu().numpy()
    return probas

def predictor_model_no_token(texts, model, tokenizer_bert):
    # print(len(texts))
    # tk = tokenizer(texts, return_tensors="pt",truncation=True, padding=True, max_length=MAX_POSITION_EMBEDDINGS).to(device)
    # tokenization is removed but still need to set texts to device
    # i'm not sure why this is a list and don't have time to debug
    # print("Texts_type:", type(texts))
    # print("Texts_dir:",  dir(texts))
    # texts.to(device)
    # outputs = model(**texts)
    # tensor_logits = outputs[0]
    # probas = tensor_logits.sigmoid().detach().cpu().numpy()
    ids = texts['input_ids'].to(device, dtype = torch.long)
    mask = texts['attention_mask'].to(device, dtype = torch.long)
    token_type_ids = texts['token_type_ids'].to(device, dtype = torch.long)
    outputs = model_bert(ids, mask, token_type_ids)
    tensor_logits = outputs
    probas = tensor_logits.sigmoid().detach().cpu().numpy()
    return probas

In [12]:
import faithfulness
# this reimports the library for easy testing in the notebook
import importlib
import numpy as np
importlib.reload(faithfulness)

MAX_LEN=512


# tokenize the test dataset
test_data =  dataset['test']['text'][0:1]
print(len(test_data))
print(len(test_data[0]))

masker = shap.maskers.Text(pipeline.tokenizer)
explainer = shap.Explainer(pipeline, masker)
print("shap computed")

inputs = tokenizer_bert(test_data, max_length=MAX_LEN, padding='max_length', truncation=True, return_tensors='pt')
print("input type: ", type(inputs))

indices_array = get_index_arrays_shap(test_data, predictor_model_token, model_bert, tokenizer_bert)
print("indices_array:", indices_array)

inputs_rationale_removed = faithfulness.remove_rationale_words(inputs, indices_array, join=False, tokenized=True)
inputs_other_removed = faithfulness.remove_other_words(inputs, indices_array, join=False, tokenized=True)

# print("rational removed: ", inputs_rationale_removed)
# print("other removed: ", inputs_other_removed)
print("rational removed type: ", type(inputs_rationale_removed))
print("other removed type: ", type(inputs_other_removed))

ind, faith = faithfulness.calculate_faithfulness(inputs, [inputs_rationale_removed], [inputs_other_removed ], model_bert, tokenizer_bert, predictor_model_no_token, tokenized=True)

1
11849
shap computed
input type:  <class 'transformers.tokenization_utils_base.BatchEncoding'>


--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/h/u15/c9/00/lokejuan/Documents/CSC413-Project/venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/h/u15/c9/00/lokejuan/Documents/CSC413-Project/venv/lib/python3.10/site-packa

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 2it [00:46, 46.85s/it]               
IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



[289]
indices_array: [[  0]
 [289]]
instance other removed typed:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
instance other removed two typed:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
rational removed type:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
other removed type:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
batch type:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
batch type:  <class 'transformers.tokenization_utils_base.BatchEncoding'>
Currently interpreting instance:  0
Calculating Sufficiency
{'input_ids': tensor([[  101,  2673,   131,   175,  1555,   131, 24928, 11955,  6385, 26206,
          1183,  1155,  1200, 19310,   131,  1185,  1227,  1155,  1200, 19310,
           120, 16798,  3850,  9535,   168,   168,   168,   119,  2705, 12522,
           131, 16320,  1116,  1558, 13467,  1137, 19849,  7791,   131,   168,
           168,   168,   118,  4841, 13335,  6617, 18965,  1348,   172,

In [13]:
print(ind)
print(faith)

0
[0.07150787]
