In [None]:
import numpy as np
import pandas as pd
import torch

from tqdm import tqdm
from transformers import AutoModelForMaskedLM, AutoTokenizer

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
tokenizer.vocab_size

tokenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

119547

# Dataset

In [None]:
data = pd.read_csv(
    "drive/My Drive/bert_attn_experiments/result_dataset.csv",
    usecols=["base", "polypers", "was_changed", "without_object_base", "without_object_polypers"]
)
data = data[data["was_changed"] == True]

In [None]:
data["without_object_polypers"] = data["without_object_polypers"].str.replace(r"MASK", "[MASK]", regex=True)
data["without_object_base"] = data["without_object_base"].str.replace(r"MASK", "[MASK]", regex=True)
data = data[data["without_object_base"].str.contains("[MASK]")]
data = data[data["without_object_polypers"].str.contains("[MASK]")]

In [None]:
data.to_csv("cleaned_result_dataset.csv", index=False)

In [None]:
base_sentences = data["without_object_base"].to_list()
sentences = data["without_object_polypers"].to_list()
sentences[:5]

['Путник вынулет [MASK] из сумки и положил на стол .',
 'Хозяин взялет [MASK] в руку и долго смотрел на узор из сухих завитков .',
 'Мы работаем на них , а они забираютет [MASK] у наших отцов и жен .',
 'Мы прогонимете [MASK] обратно в море , из которого они пришли !',
 'Райот разбивалет мотыгой [MASK] .']

# Attention

Each row sums to 1, therefore, the first index (indicating the number of the row) corresponds to the token with the QUERY. The row appears to be the attention weights after softmax.

The second index (indicating the column number) has to correspond to the second token (whose VALUE is to be multiplied with the softmax outputs).

In our case, we want to find out if the masked token "pays attention" to the polypersonal token => the score we are interested in is ```attn_matrix[mask_idx][polypers_idx]```.

I will save such scores (for the first occurrence of the MASK and all polypersonal tokens) for each LAYER and each HEAD of the model (12 layers, each has 12 heads, 144 in total) into the ```result``` variable.

In [None]:
def find_diff_idx(arr1, arr2):
    diff1 = []
    i = 0
    j = 0

    while i < arr1.size and j < arr2.size:
        if arr1[i] == arr2[j]:
            i += 1
            j += 1
        else:
            # check if arr1[i] exists later in arr2
            if arr1[i] in arr2[j:]:
                j += 1
            else:
                diff1.append(i)
                i += 1

    # add remaining elements from list1 if any
    while i < arr1.size:
        diff1.append(i)
        i += 1

    return diff1

In [None]:
def get_polypers_attns(model, tokenizer, sentences):
    mask_id = tokenizer.mask_token_id
    result = np.zeros((len(sentences), 12, 12))

    for i, sequence in enumerate(tqdm(sentences)):
        # for polypersonal
        inputs = tokenizer(sequence, return_tensors="pt")
        tokenized_sequence = inputs["input_ids"][0].numpy()

        #for base
        base_inputs = tokenizer(base_sentences[i], return_tensors="pt")
        base_tokenized_sequence = base_inputs["input_ids"][0].numpy()

        mask_idx = int(np.where(tokenized_sequence == mask_id)[0][0])
        polypers_idx = find_diff_idx(tokenized_sequence, base_tokenized_sequence)

        with torch.inference_mode():
            outputs = model(**inputs.to(DEVICE))

        # iterate over encoder layers (12 in total)
        for layer_idx, layer_attns in enumerate(outputs.attentions):
            # iterate over 12 heads in each encoder layer
            for head_idx, attn_matrix in enumerate(layer_attns.squeeze()):
                result[i][layer_idx][head_idx] = \
                attn_matrix[mask_idx][polypers_idx].cpu().numpy().mean()

    return result

### Default BERT

In [None]:
default_model = AutoModelForMaskedLM.from_pretrained(
    "DeepPavlov/rubert-base-cased",
    output_attentions=True
)
default_model.eval()
default_model.to(DEVICE);

In [None]:
default_attns = get_polypers_attns(default_model, tokenizer, sentences)

100%|██████████| 56730/56730 [28:36<00:00, 33.04it/s]


In [None]:
with open("drive/My Drive/bert_attn_experiments/default_attns.npy", "wb") as f:
    np.save(f, default_attns)

In [None]:
default_attns.mean()

np.float64(0.07079979360643363)

### Finetuned BERT (regular Russian)

In [None]:
finetuned_regular_path = "drive/My Drive/bert_attn_experiments/model_epoch_10(1).pt"

finetuned_regular_model = AutoModelForMaskedLM.from_pretrained(
    "DeepPavlov/rubert-base-cased",
    output_attentions=True
)

finetuned_regular_checkpoint = torch.load(finetuned_regular_path, map_location=DEVICE)
finetuned_regular_model.load_state_dict(finetuned_regular_checkpoint["model_state_dict"])
finetuned_regular_model.eval()
finetuned_regular_model.to(DEVICE);

In [None]:
finetuned_regular_attns = get_polypers_attns(finetuned_regular_model, tokenizer, sentences)

100%|██████████| 56730/56730 [31:30<00:00, 30.01it/s]


In [None]:
with open("drive/My Drive/bert_attn_experiments/finetuned_regular_attns.npy", "wb") as f:
    np.save(f, finetuned_regular_attns)

In [None]:
finetuned_regular_attns.mean()

np.float64(0.05940084803590907)

### Finetuned BERT (polypersonal Russian)

In [None]:
finetuned_polypers_model = AutoModelForMaskedLM.from_pretrained(
    "anonymous-LREC2026-submission/rubert-base-cased-polypersonal",
    output_attentions=True
)
finetuned_polypers_model.eval()
finetuned_polypers_model.to(DEVICE);

In [None]:
finetuned_polypers_attns = get_polypers_attns(finetuned_polypers_model, tokenizer, sentences)

100%|██████████| 56730/56730 [27:01<00:00, 34.99it/s]


In [None]:
with open("drive/My Drive/bert_attn_experiments/finetuned_polypers_attns.npy", "wb") as f:
    np.save(f, finetuned_polypers_attns)

In [None]:
finetuned_polypers_attns.mean()

np.float64(0.0691735937755352)

### Playground

In [None]:
results = []

all_attns = []
all_tokenized = []
all_base_tokenized = []

for i, sequence in enumerate(tqdm(sentences[:10])):

  # for polypersonal
  inputs = tokenizer(sequence, return_tensors="pt")
  all_tokenized.append(inputs["input_ids"][0].numpy())

  #for base
  base_inputs = tokenizer(base_sentences[i], return_tensors="pt")
  all_base_tokenized.append(base_inputs["input_ids"][0].numpy())

  with torch.no_grad():
      outputs = default_model(**inputs.to(DEVICE))
      attns = outputs.attentions

  all_attns.append([layer_attn.cpu().numpy() for layer_attn in attns])

100%|██████████| 10/10 [00:00<00:00, 72.17it/s]


In [None]:
def get_polypers_attns(all_attns, all_tokenized, all_base_tokenized):
    # sequence, layer, head
    result = np.zeros((len(all_attns), 12, 12))
    # iterate over sequences
    for sequence_idx, attn in enumerate(all_attns):
        mask_idx = int(
            np.where(all_tokenized[sequence_idx] == \
                    tokenizer.mask_token_id)[0][0]
        )
        polypers_idx = find_diff_idx(
            all_tokenized[sequence_idx],
            all_base_tokenized[sequence_idx]
        )
        # iterate over encoder layers (12 in total)
        for layer_idx, layer_attns in enumerate(attn):
            # iterate over 12 heads in each encoder layer
            for head_idx, attn_matrix in enumerate(layer_attns.squeeze()):
                result[sequence_idx][layer_idx][head_idx] = attn_matrix[mask_idx][polypers_idx].mean()
    return result

In [None]:
test_attns = get_polypers_attns(all_attns, all_tokenized, all_base_tokenized)

In [None]:
np.array_equal(default_attns, test_attns)

False

In [None]:
len(all_attns)

10

In [None]:
len(all_attns[0])

12

In [None]:
all_attns[0][0].shape

(1, 12, 14, 14)

## Look at individual sentences

In [None]:
tokenizer.convert_ids_to_tokens(all_tokenized[6])

['[CLS]',
 'Канс',
 '##амах',
 'был',
 'добр',
 ',',
 'он',
 'принесет',
 '##е',
 '[MASK]',
 'риса',
 'в',
 'деревянной',
 'чаш',
 '##ке',
 'и',
 'подал',
 'девочке',
 '.',
 '[SEP]']

In [None]:
# sentence, layer, 0, head, mask_idx, polypers_idx
all_attns[6][5][0][4][9][[7, 8]].mean()

np.float32(0.3334376)

In [None]:
# sentence, layer, head
default_attns[6][5][4]

np.float64(0.33343759179115295)