In [7]:
pip install torch transformers bertviz



In [34]:
import torch
from transformers import BertTokenizer, BertModel
from bertviz import head_view, model_view, neuron_view

In [35]:
model_name = "bert-base-uncased"

tokenizer = BertTokenizer.from_pretrained(model_name)

model = BertModel.from_pretrained(
    model_name,
    output_attentions=True,
    attn_implementation="eager"
).eval()

In [37]:
sentence = "میرا نام عابد ہے"

inputs    = tokenizer(sentence, return_tensors="pt")
input_ids = inputs["input_ids"]
tokens    = tokenizer.convert_ids_to_tokens(input_ids[0])

print("Tokens:", tokens)

Tokens: ['[CLS]', 'م', '##ی', '##ر', '##ا', 'ن', '##ا', '##م', 'ع', '##ا', '##ب', '##د', 'ہ', '##ے', '[SEP]']


In [38]:
with torch.no_grad():
    outputs     = model(**inputs)
    attentions  = outputs.attentions

print("Got", len(attentions), "layers of attention")
print("Shape of one layer:", attentions[0].shape)

Got 12 layers of attention
Shape of one layer: torch.Size([1, 12, 15, 15])


In [39]:

layer0_head0 = attentions[0][0, 0]

idx = 5
top_j = layer0_head0[idx].argmax().item()
print(f"'{tokens[idx]}' attends most to → '{tokens[top_j]}'")


'ن' attends most to → 'ع'


In [40]:
row = layer0_head0[idx]
for j, w in enumerate(row.tolist()):
    print(f"{tokens[j]:>6} : {w:.2f}")

 [CLS] : 0.03
     م : 0.08
   ##ی : 0.07
   ##ر : 0.08
   ##ا : 0.09
     ن : 0.05
   ##ا : 0.08
   ##م : 0.07
     ع : 0.10
   ##ا : 0.09
   ##ب : 0.06
   ##د : 0.08
     ہ : 0.06
   ##ے : 0.03
 [SEP] : 0.04


In [41]:
layer = 2          # try a later layer
for head in range(12):
    weights = attentions[layer][0, head][idx]  # idx = 5 for 'jumps'
    top = weights.argmax().item()
    print(f"Layer {layer}  Head {head:2d} : 'jumps' → '{tokens[top]}'  (weight={weights[top]:.2f})")


Layer 2  Head  0 : 'jumps' → '##ا'  (weight=1.00)
Layer 2  Head  1 : 'jumps' → '[CLS]'  (weight=0.93)
Layer 2  Head  2 : 'jumps' → '[CLS]'  (weight=0.78)
Layer 2  Head  3 : 'jumps' → '[CLS]'  (weight=0.94)
Layer 2  Head  4 : 'jumps' → '[CLS]'  (weight=0.95)
Layer 2  Head  5 : 'jumps' → '[CLS]'  (weight=0.67)
Layer 2  Head  6 : 'jumps' → 'ن'  (weight=0.67)
Layer 2  Head  7 : 'jumps' → '[CLS]'  (weight=0.90)
Layer 2  Head  8 : 'jumps' → '[CLS]'  (weight=0.93)
Layer 2  Head  9 : 'jumps' → '##ا'  (weight=1.00)
Layer 2  Head 10 : 'jumps' → '[CLS]'  (weight=0.93)
Layer 2  Head 11 : 'jumps' → '[CLS]'  (weight=0.99)


In [42]:
head_view(attentions, tokens)

<IPython.core.display.Javascript object>

In [44]:
model_view(attentions, tokens)

<IPython.core.display.Javascript object>

In [43]:
import torch
from bertviz.transformers_neuron_view import BertTokenizer, BertModel
from bertviz.neuron_view import show

model_name = "bert-base-uncased"
tokenizer  = BertTokenizer.from_pretrained(model_name, do_lower_case=True)
model      = BertModel.from_pretrained(model_name).eval()


sentence = "میرا نام عابد ہے"


show(model,
     model_type='bert',
     tokenizer=tokenizer,
     sentence_a=sentence,
     sentence_b='',
     layer=2,
     head=0)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>