**1. Set up environment in step below**

In [None]:
!pip install torch==1.11.0 transformers==4.19.1 sacremoses==0.0.53

**2. Run the cell below once**

In [35]:
import torch
from transformers import *
import operator
from collections import OrderedDict
import sys
import traceback
import argparse
import string


import logging

#DEFAULT_MODEL_PATH='bert-large-cased'
#DEFAULT_MODEL_PATH='bert-base-cased' #works best for names
#DEFAULT_MODEL_PATH='bert-base-uncased'
DEFAULT_MODEL_PATH='./'
DEFAULT_TO_LOWER=False
DEFAULT_TOP_K = 20
ACCRUE_THRESHOLD = 1

def init_model(model_path,to_lower):
    logging.basicConfig(level=logging.INFO)
    print("******* MODEL[path] is:",model_path," lower casing is set to:",to_lower)
    tokenizer = BertTokenizer.from_pretrained(model_path,do_lower_case=to_lower)
    model = BertForMaskedLM.from_pretrained(model_path)
    #tokenizer = RobertaTokenizer.from_pretrained(model_path,do_lower_case=to_lower)
    #model = RobertaForMaskedLM.from_pretrained(model_path)
    model.eval()
    return model,tokenizer




def perform_task(model,tokenizer,top_k,accrue_threshold,text,patched):
    text = '[CLS] ' + text + ' [SEP]' 
    tokenized_text = tokenizer.tokenize(text)
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

    # Create the segments tensors.
    segments_ids = [0] * len(tokenized_text)

    print(tokenized_text)

    tokens_tensor = torch.tensor([indexed_tokens])
    segments_tensors = torch.tensor([segments_ids])


    with torch.no_grad():
        predictions = model(tokens_tensor, segments_tensors)
        for i in range(len(tokenized_text)):
                #if (i != 0 and i != len(tokenized_text) - 1):
                #    continue
                results_dict = {}
                masked_index = i
                neighs_dict = {}
                if (patched):
                    for j in range(len(predictions[0][0][0,masked_index])):
                        if (float(predictions[0][0][0,masked_index][j].tolist()) > accrue_threshold):
                            tok = tokenizer.convert_ids_to_tokens([j])[0]
                            results_dict[tok] = float(predictions[0][0][0,masked_index][j].tolist())
                        tok = tokenizer.convert_ids_to_tokens([j])[0]
                        if (tok in tokenized_text):
                            neighs_dict[tok] = float(predictions[0][0][0,masked_index][j].tolist())
                else:
                    for j in range(len(predictions[0][0][masked_index])):
                        if (float(predictions[0][0][masked_index][j].tolist()) > accrue_threshold):
                            tok = tokenizer.convert_ids_to_tokens([j])[0]
                            results_dict[tok] = float(predictions[0][0][masked_index][j].tolist())
                        tok = tokenizer.convert_ids_to_tokens([j])[0]
                        if (tok in tokenized_text):
                            neighs_dict[tok] = float(predictions[0][0][masked_index][j].tolist())
                k = 0
                sorted_d = OrderedDict(sorted(results_dict.items(), key=lambda kv: kv[1], reverse=True))
                print("********* Top predictions for token: ",tokenized_text[i])
                for index in sorted_d:
                    if (index in string.punctuation or index.startswith('##') or len(index) == 1 or index.startswith('.') or index.startswith('[')):
                        continue
                    print(index,round(float(sorted_d[index]),4))
                    k += 1
                    if (k > top_k):
                        break
                print("********* Closest sentence neighbors in output to the token :  ",tokenized_text[i])
                sorted_d = OrderedDict(sorted(neighs_dict.items(), key=lambda kv: kv[1], reverse=True))
                for index in sorted_d:
                    if (index in string.punctuation or index.startswith('##') or len(index) == 1 or index.startswith('.') or index.startswith('[')):
                        continue
                    print(index,round(float(sorted_d[index]),4))
                print()
                print()
                #break





**3. Modify params below and execute for results**

In [None]:

model = "bert-base-cased"
topk = DEFAULT_TOP_K
tolower=False
patched = False
threshold = ACCRUE_THRESHOLD
model,tokenizer = init_model(model,tolower)
text = "John flew from New York to [MASK]"
perform_task(model,tokenizer,topk,threshold,text,patched)

******* MODEL[path] is: bert-base-cased  lower casing is set to: False


loading file https://huggingface.co/bert-base-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/6508e60ab3c1200bffa26c95f4b58ac6b6d95fba4db1f195f632fa3cd7bc64cc.437aa611e89f6fc6675a049d2b5545390adbc617e7d655286421c191d2be2791
loading file https://huggingface.co/bert-base-cased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/bert-base-cased/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/bert-base-cased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/ec84e86ee39bfe112543192cf981deebf7e6cbe8c91b8f7f8f63c9be44366158.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f
loading configuration file https://huggingface.co/bert-base-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/a803e0468a8fe090683bdc453f4fac622804f49de86d7cecaee92365d4a0f829.a64a22196690e0e82ead56f388a3ef3a50de93335926ccfa20610217

['[CLS]', 'John', 'flew', 'from', 'New', 'York', 'to', '[MASK]', '[SEP]']
********* Top predictions for token:  [CLS]
the 4.7987
and 4.2494
of 4.1367
to 4.0774
in 3.8398
was 3.3081
it 3.05
her 3.0426
him 3.0382
that 2.9599
on 2.9331
for 2.8487
as 2.7878
his 2.7741
families 2.7639
with 2.7462
is 2.7374
me 2.7371
The 2.6937
he 2.6935
from 2.6255
********* Closest sentence neighbors in output to the token :   [CLS]
to 4.0774
from 2.6255
York 1.2461
New 0.9595
John 0.464
flew -1.9623


********* Top predictions for token:  John
the 5.5664
and 5.0206
of 4.8988
to 4.6027
in 4.449
was 3.9431
as 3.7773
The 3.6805
he 3.6125
on 3.5829
is 3.5353
with 3.5117
that 3.4356
for 3.3814
her 3.3356
his 3.3321
at 3.3007
it 3.2901
by 3.0797
He 3.0736
from 3.0672
********* Closest sentence neighbors in output to the token :   John
to 4.6027
from 3.0672
John 1.3122
New 1.029
York 0.77
flew -2.4839


********* Top predictions for token:  flew
flew 17.0532
flying 10.6339
wrote 10.3467
went 10.3463
flown 10.203