# Inputs
Input variables to the first cell.

* `PATH_TO_MODEL`: Path to the trained model
* `PATH_TO_DATA`: Path to the labelled dataset that should be used to evalaute the model. T
    1. Prepare the data
        * The dataset should be a list of dictionaries. Each dictionary should correspond to one sample. The keys of the dictionary are "tokens", which stores a list of strings corresponding to the tokenized version of the input sentence, and "tags", the IOB tag for each token.
            ```python
            data = [
                        {"tokens":['token_a','token_b'],"tags":['tag_a','tag_b']},
                        {"tokens":['token_a','token_b','token_c'],"tags":['tag_a','tag_b','tag_c']},
                        ...
                    ]
            ```
        * IOB tags: `"O", "B_ORG", "I_ORG", "B_GRT", "I_GRT"`
        * Tokenization of the input: Please make sure you tokenize the input with:
        ```python
        from transformers import PreTrainedTokenizerFast
        tokenizer = PreTrainedTokenizerFast.from_pretrained('bert-base-cased')
        ```
        If a word is split into subwords, make sure to tag it appropriately. Example:
            * word: `"word_xyz"`, tag: `"O"`, tokenized: `"word", "##_", "##xyz"`
                * Corresponding tags for the tokenized wordpieces: `"O",-100,-100`
        Hence, assign the tag to the first WordPiece, and assing the integer -100 to the rest of the wordpieces
    2. Pickle the data
    ```python
with open(path_to_bert_validation_data,'wb') as f:
            pickle.dump(data,f)
    ```
    3. How will the data be unpickled here?
    ```python
with open(path_to_bert_validation_data,'rb') as f:
            data=pickle.load(f)
        
    * This notebook does not include support for sequences longer than 512, even though they are included in the evaluation of the original research

# Outputs
Results printed at the last cell.

In [None]:
#INPUTS
PATH_TO_MODEL = "bert_sc_ner.pt"
PATH_TO_DATA = "bert_validation_data.pkl" 

In [None]:
############LIBRARIES################
import pandas as pd
import pickle
import random
import numpy as np
from torch.utils.data import DataLoader
from transformers import BertTokenizerFast
import torch
from datasets import load_metric
from seqeval.scheme import IOB2
from seqeval.metrics import classification_report

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")

In [None]:
############FUNCTIONS################
#From the example GitHub Notebook
def compute_metrics(p,id2tag):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)
    # Remove ignored index (special tokens)
    true_predictions = [
        [id2tag[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [id2tag[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    print("\t\tORG Precision: ",results['_ORG']['precision'])
    print("\t\tORG Recall: ",results['_ORG']['recall'])
    print("\t\tORG F1: ",results['_ORG']['f1'])
    print("\t\tGRT Precision: ",results['_GRT']['precision'])
    print("\t\tGRT Recall: ",results['_GRT']['recall'])
    print("\t\tGRT F1: ",results['_GRT']['f1'])
    
    
#Evaluate the model on the train_monitor set
def eval_on_valid(model, train_monitor_loader,id2tag):
    #Accumulate the predictions here
    val_preds = np.zeros((0,512,5))
    #Accumulate the labels here
    val_lbls = np.zeros((0,512))
    #Accumulate the oss here
    val_loss = 0
    #Loop over minibatches
    for i_val, batch_val in enumerate(train_monitor_loader):
        print(i_val,"/",len(train_monitor_loader))
        #Get the max length in this batch and crop based on that
        seq_lens = batch_val['seq_len']
        max_len_for_batch = max(seq_lens.cpu().detach().numpy())
        #Get inputs and labels for that batch and crop
        input_ids_val = torch.tensor(batch_val['input_ids'][:,:max_len_for_batch].detach().numpy()).to(device)
        attention_mask_val = torch.tensor(batch_val['attention_mask'][:,:max_len_for_batch].detach().numpy()).to(device)
        labels_val = torch.tensor(batch_val['labels'][:,:max_len_for_batch].detach().numpy()).to(device)
        #Do a forward pass
        outputs_val = model(input_ids_val, attention_mask=attention_mask_val, labels=labels_val)
        #First index is the loss. Since the output loss is the mean over minibatch samples,
        #we multiply it with batch size. Later, we divide it by the number of samples
        val_loss += outputs_val[0].item()
        #Save the loss and labels
        these_preds = outputs_val[1].cpu().detach().numpy()
        these_labels= labels_val.cpu().detach().numpy()
        #Pad the predictions again
        new_preds = np.ones((len(input_ids_val),512,5)) * -100
        new_labels= np.ones((len(input_ids_val),512)) * -100
        new_preds[:,:max_len_for_batch,:] = these_preds
        new_labels[:,:max_len_for_batch] = these_labels
        #Store in array
        val_preds = np.concatenate([val_preds,new_preds],axis=0)
        val_lbls = np.concatenate([val_lbls,new_labels],axis=0)
    print("\tValidation Loss: ",val_loss/len(train_monitor_loader))
    p = (val_preds, val_lbls)
    print("\tValidation Results: ")
    compute_metrics(p,id2tag)
    return val_preds


#Class for funding bodies dataset
class FB_Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels,at_mask,seq_lens):
        self.encodings = encodings
        self.labels = labels
        self.at_mask = at_mask
        self.seq_lens = seq_lens

    def __getitem__(self, idx):
        item = dict()
        item['input_ids'] = torch.tensor(self.encodings[idx])
        item['attention_mask'] = torch.tensor(self.at_mask[idx])
        item['labels'] = torch.tensor(self.labels[idx])
        item['seq_len'] =self.seq_lens[idx]
        return item

    def __len__(self):
        return len(self.labels)
    
#Add [CLS] and [SEP] tokens, pad until "pad_len" chars.
def add_and_pad(lst,pad_len,cls,sep,pad):
    new_lst = []
    for item in lst:
        new_item = [cls] + item + [sep]
        while len(new_item) != pad_len:
            new_item.append(pad)
        new_lst.append(new_item)
    return new_lst
    
def convert_to_fb_dataset(original_input,tokenizer,max_len=512):
    tag2id = {'I_GRT':0, 'O':1,'B_GRT':2, 'B_ORG':3, 'I_ORG':4, -100:-100}
    texts = [x['tokens'] for x in original_input]
    tags = [x['tags'] for x in original_input]
    encodings = tokenizer(texts, is_split_into_words=True,add_special_tokens =False)['input_ids']
    seq_lens = [len(x)+2 for x in encodings]
    encodings = add_and_pad(encodings,max_len,101,102,0)
    attention_mask = [[0 if num==0 else 1 for num in lst]  for lst in encodings]
    labels = add_and_pad(tags,max_len,-100,-100,-100)
    labels = [[tag2id[x] for x in label]  for label in labels]
    
    return FB_Dataset(encodings, labels,attention_mask,seq_lens)

In [None]:
############VARIABLES################
id2tag = {0: 'I_GRT', 1: 'O', 2: 'B_GRT', 3: 'B_ORG', 4: 'I_ORG'}
tag2id = {'I_GRT':0,  'O':1,  'B_GRT':2,  'B_ORG':3, 'I_ORG':4,-100:-100}
with open(PATH_TO_DATA,"rb") as f:
    val_dataset_orig=pickle.load(f)
    
val_dataset = convert_to_fb_dataset(val_dataset_orig,tokenizer)
#Load metric for evaluation
metric = load_metric("seqeval")
#Load the model and put in evaluation mode
model = torch.load(PATH_TO_MODEL)
model.eval()

In [None]:
############RUN THE MODEL################
device = torch.device('cuda')

#Put model to device
model.to(device)

#Set batch size
batch_size=32

#Initialize the valid loader without shuffling
valid_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

#Get the predictions and preliminary results
with torch.no_grad():
    val_preds = eval_on_valid(model, valid_loader,id2tag)

In [None]:
############PREPROCESS THE OUTPUT################

#Get predicted label index
val_preds2 = np.argmax(val_preds,axis=2)
#Get the labels
valid_labels = val_dataset.labels
#Get predicted label and discard -100 tags
val_preds_tagged = []
for i in range(len(valid_labels)):
    lbls = valid_labels[i]
    preds = val_preds2[i]
    new_preds = []
    for j in range(len(lbls)):
        lbl = lbls[j]
        pred = preds[j]
        if lbl != -100:
            new_preds.append(id2tag[pred])
    val_preds_tagged.append(new_preds)


In [None]:
#####GET THE RESULTS########
print(classification_report([x['tags'] for x in val_dataset_orig],val_preds_tagged,scheme=IOB2,
                           digits=5,mode='default'))