# Install, Imports & `Init`

In [1]:
!pip install transformers datasets evaluate accelerate seqeval mlflow ipywidgets tqdm gensim span_marker eli5 lime -qq


# Runs but takes too much time
# ! pip install fasttext -qq
# !wget https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.zip -O model.zip
# !unzip model.zip
# !rm model.zip

In [259]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import json

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from collections import Counter

import gensim


from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
from transformers import pipeline
import evaluate
from datasets import Dataset

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import Perceptron, SGDClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score


# It'll give error so make changes in eli5.lime.sampler.py according to this link: https://github.com/eli5-org/eli5/pull/30/files
from eli5.lime import TextExplainer
from eli5.lime.samplers import MaskingTextSampler

import random
import os
import mlflow
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

os.environ["MLFLOW_EXPERIMENT_NAME"] = "NER Task"
os.environ["MLFLOW_FLATTEN_PARAMS"] = "1"

seqeval = evaluate.load("seqeval")
device = 'cuda' if torch.cuda.is_available() else 'cpu'

SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


# Helpers

In [2]:
def load_split_data(file, SEED, return_splits = True):
    """
    Load the data from text file and return it in List of List format with train test split
    """
    with open(file,"r") as f: raw_data = [x.strip().split("\t") for x in f.readlines()]

    tweets_list = []
    entities_list = []

    temp_ent = []
    temp_words = []

    for index, lis in enumerate(raw_data):
        try:
            if lis == [""]:
                assert len(temp_words) == len(temp_ent), "Sanity Check: Irregular Length"
                tweets_list.append(temp_words)
                entities_list.append(temp_ent)

                temp_words = []
                temp_ent = []
            else:
                (word,entity) = lis

                word = word.strip()
                if not len(word): continue

                entity = entity.strip()

                temp_words.append(word)
                temp_ent.append(entity)


        except Exception as e:
            print(e, index, lis)

    assert len(tweets_list) == len(entities_list), "entity text length mismatch"

    if return_splits: return train_test_split(tweets_list, entities_list, test_size=0.2, random_state = SEED)
    return tweets_list, entities_list


def load_test_data(file = "./test.txt", return_sentence:bool = False):
    """
    Load Test Data
    """
    with open(file) as f: test_data = [x.strip().split("\t") for x in f.readlines()]

    sentences = []
    temp_words = []

    for index, lis in enumerate(test_data):
        if lis == [""]:

            if return_sentence:sentence = " ".join(temp_words)
            else: sentence = temp_words

            sentences.append(sentence)
            temp_words = []
        else:
            temp_words.append(lis[0].strip())

    return sentences


def convert_label_to_int(entities_list, label2id):
  """
  Convert ["O", "I-PER"...] to their respective ids
  """
  return [[label2id[label] for label in label_list] for label_list in entities_list]


def aligned_tokenization_for_NER(input_data, label_all_tokens = False):

    tokenized_inputs = tokenizer(input_data["tokens"], truncation = True, is_split_into_words = True, )#padding = 'max_length', max_length = MAX_LEN)

    labels = []
    for i, label in enumerate(input_data["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100 to be ignored in the loss function.
            if word_idx is None:
                label_ids.append(-100)

            elif word_idx != previous_word_idx: # label ONLY for the first token of each word
                label_ids.append(label[word_idx])

            else: # For the other tokens in a word, set the label to either the current label or -100, depending on
                label_ids.append(label[word_idx] if label_all_tokens else -100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs


def compute_custom_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)]

    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)

    flattened_results = {
        "overall_precision": results["overall_precision"],
        "overall_recall": results["overall_recall"],
        "overall_f1": results["overall_f1"],
        "overall_accuracy": results["overall_accuracy"],
    }

    for k in results.keys(): # piece taken from https://www.freecodecamp.org/news/getting-started-with-ner-models-using-huggingface/
      if(k not in flattened_results.keys()):
        flattened_results[k+"_f1"]=results[k]["f1"]

    return flattened_results

# Data Pre-Processing, Sanity checking

In [3]:
train_tokens, val_tokens, train_ent, val_ent = load_split_data("./train.txt", SEED)
train_tokens.pop(183), train_ent.pop(183) # found out during sanity checking that index 183 is somehow []

# Sanity Checking
len(train_tokens) == len(train_ent), "Sanity Check failed"
for i in range(len(train_tokens)):
  assert len(train_tokens[i]) == len(train_ent[i]), "Sanity Check Failed"


# Quick Baseline with Linear Models

Uses individual Token Classification

## `sklearn` Linear Models

In [None]:
def run_validate_baseline(train_tokens, val_tokens, train_ent, val_ent, model):

    X_train, y_train, X_val, y_val = [], [], [], []
    for index, tokens in enumerate(train_tokens):
        X_train.extend([{"Word":i} for i in tokens])
        y_train.extend(train_ent[index])

    for index, tokens in enumerate(val_tokens):
        X_val.extend([{"Word":i} for i in tokens])
        y_val.extend(val_ent[index])

    y_train = np.array(y_train)
    y_val = np.array(y_val)

    v = DictVectorizer(sparse=False)
    X_train = v.fit_transform(X_train)
    X_val = v.transform(X_val)

    assert (len(X_train) == len(y_train)) & (len(X_val) == len(y_val)) & (X_train.shape[1] == X_val.shape[1]), "Sanity Failed"


    classes = np.unique(y_train)
    classes = classes.tolist()

    model.partial_fit(X_train, y_train, classes)

    preds = model.predict(X_val)

    new_classes = classes.copy()
    new_classes.pop() # remove "O" for better visibility

    print(classification_report(y_pred=preds, y_true=y_val, labels=new_classes), "\n","-"*50)
    return preds

In [None]:
model1 = Perceptron(n_jobs=-1, max_iter=10)
model2 = MultinomialNB(alpha=0.01)


preds1 = run_validate_baseline(train_tokens, val_tokens, train_ent, val_ent, model1)
preds2 = run_validate_baseline(train_tokens, val_tokens, train_ent, val_ent, model2)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                 precision    recall  f1-score   support

  B-corporation       0.54      0.42      0.47        36
B-creative-work       0.00      0.00      0.00        23
        B-group       0.36      0.13      0.19        68
     B-location       0.75      0.35      0.48       112
       B-person       0.73      0.27      0.40       147
      B-product       0.20      0.07      0.11        28
  I-corporation       0.00      0.00      0.00        11
I-creative-work       0.50      0.11      0.18        37
        I-group       0.00      0.00      0.00        28
     I-location       0.52      0.27      0.35        45
       I-person       0.72      0.14      0.24        91
      I-product       0.00      0.00      0.00        42

      micro avg       0.60      0.20      0.30       668
      macro avg       0.36      0.15      0.20       668
   weighted avg       0.52      0.20      0.28       668
 
 --------------------------------------------------
                 precision    re

# Attempt to improvement using `FastText` with above approach

Surprisingly **Worse Results** maybe due to non-linear embeddings useed in Linear models

In [None]:
# import fasttext

# fast_embeddings = fasttext.load_model('/content/wiki.en.bin') # Takes a lot of space. Almost blow memory

# def run_validate_fasttext_baseline(train_tokens, val_tokens, train_ent, val_ent, model):

#     X_train, y_train, X_val, y_val = [], [], [], []
#     for index, tokens in enumerate(train_tokens):

#         X_train.extend([fast_embeddings.get_word_vector(i) for i in tokens])
#         y_train.extend(train_ent[index])

#     for index, tokens in enumerate(val_tokens):
#         X_val.extend([fast_embeddings.get_word_vector(i) for i in tokens])
#         y_val.extend(val_ent[index])

#     X_train = np.array(X_train)
#     X_val = np.array(X_val)
#     y_train = np.array(y_train)
#     y_val = np.array(y_val)

#     assert (len(X_train) == len(y_train)) & (len(X_val) == len(y_val)) & (X_train.shape[1] == X_val.shape[1]), "Sanity Failed"


#     classes = np.unique(y_train)
#     classes = classes.tolist()

#     model.partial_fit(X_train, y_train, classes)

#     preds = model.predict(X_val)

#     new_classes = classes.copy()
#     new_classes.pop() # remove "O" for better visibility

#     print(classification_report(y_pred=preds, y_true=y_val, labels=new_classes), "\n","-"*50)
#     return preds


# run_validate_fasttext_baseline(train_tokens, val_tokens, train_ent, val_ent, Perceptron(n_jobs=1, max_iter=10))
# run_validate_fasttext_baseline(train_tokens, val_tokens, train_ent, val_ent, SGDClassifier())


# del fast_embeddings

# Final Model: Fine tuning `distilbert` using HF Trainer

Fast training, less latency, CPU deployable and comparable results than the rest with less than 2% impact on the `F-1` from the largest fine tuned model

**NOTE**: `Uncased` worked better than cased. Given lot of names etc in Title, upper case, it's interesting

## Data Processing

In [4]:
label_list = ['O','B-corporation','I-corporation','B-creative-work','I-creative-work','B-group','I-group','B-location','I-location','B-person','I-person','B-product','I-product']
label2id = dict(zip(label_list, range(len(label_list))))
id2label = {v:k for k,v in label2id.items()}

NUM_LABELS = len(label_list)
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 5
LEARNING_RATE = 2e-05
# MODEL_NAME = "distilbert-base-cased"
MODEL_NAME = "distilbert-base-uncased"

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME, num_labels= NUM_LABELS, id2label=id2label, label2id=label2id)
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
train_labels = convert_label_to_int(train_ent, label2id)
val_labels = convert_label_to_int(val_ent, label2id)

train_data_dict = {"tokens": train_tokens, "ner_tags":train_labels}
val_data_dict = {"tokens": val_tokens, "ner_tags":val_labels}

train_data = Dataset.from_dict(train_data_dict)
val_data = Dataset.from_dict(val_data_dict)

tokenized_train = train_data.map(aligned_tokenization_for_NER, batched=True)
tokenized_val = val_data.map(aligned_tokenization_for_NER, batched=True)

Map:   0%|          | 0/2715 [00:00<?, ? examples/s]

Map:   0%|          | 0/679 [00:00<?, ? examples/s]

## Training

In [7]:
training_args = TrainingArguments(
    output_dir="TEST_MODEL",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    lr_scheduler_type = "cosine",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_custom_metrics,
)

trainer.train()
mlflow.end_run()

trainer.save_model("./final_model")

2023/08/12 09:31:05 INFO mlflow.tracking.fluent: Experiment with name 'NER Task' does not exist. Creating a new experiment.
You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,Corporation F1,Creative-work F1,Group F1,Location F1,Person F1,Product F1
1,No log,0.164196,0.363372,0.301932,0.329815,0.960154,0.0,0.0,0.105263,0.396104,0.418118,0.0
2,No log,0.123781,0.538847,0.519324,0.528905,0.969368,0.45283,0.129032,0.247619,0.556391,0.679128,0.27027
3,0.182300,0.117446,0.567237,0.560386,0.563791,0.971573,0.486486,0.129032,0.373134,0.628571,0.718644,0.181818
4,0.182300,0.118178,0.566586,0.565217,0.565901,0.971809,0.450704,0.146341,0.385965,0.626016,0.696774,0.355556
5,0.182300,0.119642,0.581047,0.562802,0.571779,0.971888,0.470588,0.166667,0.393162,0.620408,0.703947,0.355556


  _warn_prf(average, modifier, msg_start, len(result))


## Predict on Test Data

### using `Pipeline`

In [230]:
text = "Empire States of Mind by Jay-Z in the USA Tour with Sony Xperia"
classifier = pipeline("ner", model="/content/final_model")
classifier(text)

[{'entity': 'B-group',
  'score': 0.23371804,
  'index': 1,
  'word': 'empire',
  'start': 0,
  'end': 6},
 {'entity': 'I-creative-work',
  'score': 0.29482737,
  'index': 2,
  'word': 'states',
  'start': 7,
  'end': 13},
 {'entity': 'I-creative-work',
  'score': 0.37392557,
  'index': 3,
  'word': 'of',
  'start': 14,
  'end': 16},
 {'entity': 'B-person',
  'score': 0.7261472,
  'index': 6,
  'word': 'jay',
  'start': 25,
  'end': 28},
 {'entity': 'I-person',
  'score': 0.38153428,
  'index': 7,
  'word': '-',
  'start': 28,
  'end': 29},
 {'entity': 'I-person',
  'score': 0.5824178,
  'index': 8,
  'word': 'z',
  'start': 29,
  'end': 30},
 {'entity': 'B-product',
  'score': 0.33122858,
  'index': 14,
  'word': 'sony',
  'start': 52,
  'end': 56},
 {'entity': 'I-product',
  'score': 0.4648264,
  'index': 15,
  'word': 'xp',
  'start': 57,
  'end': 59},
 {'entity': 'I-product',
  'score': 0.41460556,
  'index': 16,
  'word': '##eria',
  'start': 59,
  'end': 63}]

In [None]:
# Using Pipeline Module on sentences

# test_sentences = load_test_data("./test.txt", True) # Load sentences to use with pipeline style
# model_name = "/content/final_model"
# model = pipeline("ner", model=model_name, aggregation_strategy = 'first')

# result = {}
# for index, text in tqdm(enumerate(test_sentences)):
#   result[index] = [text, model(text)]

# pd.DataFrame(result.values(), columns = ["Text", "predictions_dict"]).to_csv("./result.csv", index = None)

### Using Modular Token by Token

In [None]:
model_name = "/content/final_model"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

def convert_to_original(inp, offset):
  res = []
  for index, (word, tag, prob) in enumerate(inp):
    start, end = offset[index]
    if start == 0: res.append([word,tag,prob])
    else:
      if word.startswith("##"):res[-1][0] += word[2:]
      else: res[-1][0] += word

      res[-1][-1] *= prob

  return res


def test_result():
  output = {}
  for sent_index, tokenized_text in tqdm(enumerate(load_test_data("./test.txt"))):

    tokenized_input = tokenizer(tokenized_text, return_tensors="pt", truncation = True, is_split_into_words = True, return_offsets_mapping = True).to(model.device)
    offset = tokenized_input["offset_mapping"].squeeze().cpu().numpy()[1:-1]
    del tokenized_input["offset_mapping"]
    logits = model(**tokenized_input).logits

    soft_logits = torch.nn.functional.softmax(logits, dim = -1) #  Apply Softmax
    topk = torch.topk(soft_logits, k=1, axis=-1) # Indices, Values (Probabilities or Logits)

    predicted_labels, probs = topk.indices.squeeze(), topk.values.squeeze().detach().cpu().numpy()

    out_res = [[tokenizer.decode([token]), id2label[label.item()], float(probs[idx])] for idx, (token, label) in enumerate(zip(tokenized_input["input_ids"][0], predicted_labels)) if token not in [101,102]]
    output[sent_index] = convert_to_original(out_res, offset)

    # Some [UNK] can't be processed back to system so there might be a difference
    # if [i[0] for i in output[sent_index]] != [i.lower() for i in tokenized_text]: print(sent_index)

  return output

outputs = test_result()
with open("result.json","w") as f: json.dump(outputs,f)

# Future Work: [Spanning of entities based approach `Span Marker`](https://github.com/deshwalmahesh/DataScience-StudyMaterial/blob/main/annotated_papers/SpanMarker_thesis.pdf)

Without the need of `BIO, BIOU` etc. Original entity name is used for any no of tokens in that entity. Also, seperate entity tokens are created in this approach where two same names have different Span tokens so tokens are not shared between entities.

Training Data:

SENTENCE: `["I'm", 'at', 'Bowl', 'Long', 'Island', '(', '138', 'West', 'Ave', ',', 'Patchogue)', '.', 'http://4sq.com/aX6N26']`

 LABELS: `[0, 0, 4, 4, 4, 0, 0, 0, 0, 0, 4, 0, 0]` (`4` = `Location`)



 **NOTE**: Restart Runtime


In [None]:
fine_label_list = ['O','B-corporation','I-corporation','B-creative-work','I-creative-work','B-group','I-group','B-location','I-location','B-person','I-person','B-product','I-product']
fine_label2id = dict(zip(fine_label_list, range(len(fine_label_list))))

coarse_label_list = ['O','corporation','creative-work','group','location','person','product']
coarse_label2id = dict(zip(coarse_label_list, range(len(coarse_label_list))))


train_fine_labels = convert_label_to_int(train_ent, fine_label2id)
val_fine_labels = convert_label_to_int(val_ent, fine_label2id)

train_coarse_labels = [[coarse_label2id[label.replace("I-","").replace("B-","")] for label in label_list] for label_list in train_ent]
val_coarse_labels = [[coarse_label2id[label.replace("I-","").replace("B-","")] for label in label_list] for label_list in val_ent]


train_data_dict = {"tokens": train_tokens, "ner_tags":train_coarse_labels, "fine_ner_tags": train_fine_labels, "id":list(range(len(train_tokens)))}
val_data_dict = {"tokens": val_tokens, "ner_tags":val_coarse_labels, "fine_ner_tags": val_fine_labels, "id":list(range(len(val_tokens)))}

train_data = Dataset.from_dict(train_data_dict)
val_data = Dataset.from_dict(val_data_dict)

In [None]:
from span_marker import SpanMarkerModel
from span_marker import Trainer as SpanTrainer

ENTITY_MAX_LEN = 10
MARKER_MAX_LEN = 128 # This is one of the two two model hyper params
MODEL_MAX_LENGTH = 256 # Different from MAX_LENGTH it is specific to the reeserch paper
MODEL_NAME = "bert-base-cased" # can't use DistilBERT, T5, DistilRoBERTa, ALBERT & BART


model = SpanMarkerModel.from_pretrained(MODEL_NAME, labels=coarse_label_list,
                                        model_max_length = MODEL_MAX_LENGTH, entity_max_length = ENTITY_MAX_LEN, marker_max_length = MARKER_MAX_LEN)


args = TrainingArguments(
    output_dir="./span-marker/bert-cased/",
    learning_rate=5e-5,
    gradient_accumulation_steps=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=500,
    push_to_hub=False,
    logging_steps=50,
    warmup_ratio=0.1,
    dataloader_num_workers=2,
    load_best_model_at_end=True,
)


trainer = SpanTrainer(
    model=model,
    args=args,
    train_dataset=train_data,
    eval_dataset=val_data,
)


trainer.train()

trainer.save_model("./final_span_marker/")

INFO:span_marker.label_normalizer:No labeling scheme detected: all label IDs belong to individual entity classes.


Label normalizing the train dataset:   0%|          | 0/2715 [00:00<?, ? examples/s]

Tokenizing the train dataset:   0%|          | 0/2715 [00:00<?, ? examples/s]

These are the frequencies of the missed entities due to maximum entity length out of 1547 total entities:
- 1 missed entities with 11 words (0.064641%)
- 1 missed entities with 12 words (0.064641%)
- 1 missed entities with 14 words (0.064641%)


Spreading data between multiple samples:   0%|          | 0/2715 [00:00<?, ? examples/s]

INFO:span_marker.trainer:Spread 2715 sentences across 3014 samples, a 11.012891% increase. You can increase `model_max_length` or `marker_max_length` to decrease the number of samples, but recognize that longer samples are slower.


Step,Training Loss,Validation Loss


In [None]:
model = SpanMarkerModel.from_pretrained("./final_span_marker/")
model.predict("Empire states of Mind by Jay-Z")



[{'span': 'Empire',
  'label': 'corporation',
  'score': 0.5469484329223633,
  'char_start_index': 0,
  'char_end_index': 6},
 {'span': 'Jay',
  'label': 'person',
  'score': 0.933920681476593,
  'char_start_index': 25,
  'char_end_index': 28}]

# Interpretability and Explainability using `LIME`

Try to see what affects a model in giving some prediction

In [260]:
class NERExplainer(object):

    def __init__(self, model_name, max_replace=0.5, n_samples = 50):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForTokenClassification.from_pretrained(model_name)
        sampler = MaskingTextSampler(replacement="REP", max_replace=max_replace, token_pattern=None, bow=False)
        self.explainer = TextExplainer(n_samples = n_samples, sampler=sampler, position_dependent=True,random_state=42)

        self.label_list = ['O','B-corporation','I-corporation','B-creative-work','I-creative-work','B-group','I-group','B-location','I-location','B-person','I-person','B-product','I-product']


    def explain(self, text, word_index, top_labels = 5, display_index_mapping = False):
      """
      Display Explainable results for a word at given index
      """
      tokenized_input = tokenizer(text.split(), return_tensors="pt", truncation = True, is_split_into_words = True, return_offsets_mapping = False)
      mapping = {idx:tokenizer.decode(token) for idx, token in enumerate(tokenized_input["input_ids"][0])}
      print("-"*50)
      if display_index_mapping:
        print("Index - Token Mapping:")
        print(mapping,"\n")

      print(f"Currently displaying result for word '{mapping[word_index]}' at index {word_index}")
      print("-"*50)

      predict_func = self.get_predict_function(word_index=word_index)
      self.explainer.fit(text, predict_func)
      return self.explainer.explain_prediction(target_names=label_list,top_targets=top_labels)


    def get_predict_function(self, word_index):
        def predict_func(texts):
            output = []
            for text in texts:
                tokenized_text = text.split()
                tokenized_input = tokenizer(tokenized_text, return_tensors="pt", truncation = True, is_split_into_words = True, return_offsets_mapping = False).to(model.device)

                # offset = tokenized_input["offset_mapping"].squeeze().cpu().numpy()[1:-1]
                # del tokenized_input["offset_mapping"]

                logits = model(**tokenized_input).logits.detach()
                soft_logits = torch.nn.functional.softmax(logits, dim = -1) # Apply Softmax
                temp = soft_logits[0][word_index,:] # Append the result for First Value  (0 == first element of single batch)  and n_th word_index
                output.append(temp)

                # print(logits.shape)
                # print(temp.shape)

            return torch.stack(output).numpy()

        return predict_func


explainer = NERExplainer("/content/final_model")

In [265]:
text = "Empire States of Mind by Jay-Z from The Blueprint 3 Tour"
explainer.explain(text, word_index = 6, top_labels = 5, display_index_mapping = True) # word_index: 6: Jay, 7 = - , 8 = Z  etc etc

--------------------------------------------------
Index - Token Mapping:
{0: '[CLS]', 1: 'empire', 2: 'states', 3: 'of', 4: 'mind', 5: 'by', 6: 'jay', 7: '-', 8: 'z', 9: 'from', 10: 'the', 11: 'blue', 12: '##print', 13: '3', 14: 'tour', 15: '[SEP]'} 

Currently displaying result for word 'jay' at index 6
--------------------------------------------------


Contribution?,Feature
5.87,Highlighted in text (sum)
-6.037,<BIAS>

Contribution?,Feature
1.868,Highlighted in text (sum)
-3.803,<BIAS>

Contribution?,Feature
3.762,Highlighted in text (sum)
-5.937,<BIAS>

Contribution?,Feature
2.525,Highlighted in text (sum)
-5.001,<BIAS>

Contribution?,Feature
4.305,Highlighted in text (sum)
-7.029,<BIAS>


# Improvement Ideas
1. Handling spellings, keyboard typing errors, Contractions, Emoji, Emotiocons, Links, Mentions and hashtags handling
2. Class Weight Penalization and using `Focal Loss` can help
3. Freezing Few layers might help
4. Data Augmentation like Sampling and swapping `entities` from the distributions, char deletion, introducing keyboard spelling errors, Synonyms etc can make robust error