In [4]:
%cd ..

/home/serikoo/AgreementProbing-1


In [5]:
import pandas as pd
import torch
from tqdm.auto import tqdm
from statsmodels.stats.multitest import multipletests

from transformers import AutoModel, AutoTokenizer, AutoConfig, AutoModelForCausalLM

In [6]:
def decode_tokens(example):
    idx = 1
    enc =[(x, tokenizer.encode(x, add_special_tokens=False)) for x in example.split()]

    desired_output = []

    for x, token in enc:
        tokenoutput = []
        for ids in token:
            tokenoutput.append(idx)
            idx +=1
        desired_output.append(tokenoutput)

    return desired_output

def get_attention_scores(
    sentence:str,
    attention,
    subj:int = 0,
    obj:int = 2,
    verb_b:int = 3,
    verb_e:int = 5
):
    words = decode_tokens(sentence)
    subj_word = words[subj]
    obj_word = words[obj]
    verb = [idx for x in words[verb_b:verb_e] for idx in x]

    subj_attention = attention[:,verb[0]:verb[-1]+1,subj_word[0]:subj_word[-1]+1].mean(axis=[1,2])
    obj_attention = attention[:,verb[0]:verb[-1]+1,obj_word[0]:obj_word[-1]+1].mean(axis=[1,2])

    return subj_attention, obj_attention

def return_batch_attention(sentences, attentions, emb_size=12):
    subj_att_weights = torch.zeros(len(sentences), emb_size)
    obj_att_weights = torch.zeros(len(sentences), emb_size)
    for num, (sentence, attention) in enumerate(zip(sentences, attentions)):
        subj_W, obj_W = get_attention_scores(sentence, attention)
        subj_att_weights[num] = subj_W
        obj_att_weights[num] = obj_W
    return subj_att_weights, obj_att_weights

In [7]:
from utils import load_model_tokenizer_config

BERT=False
if BERT:
    #MODEL_NAME = "DeepPavlov/rubert-base-cased"
    #MODEL_NAME = "bert-base-multilingual-cased"
    MODEL_NAME = 'microsoft/mdeberta-v3-base'

    model = AutoModel.from_pretrained(MODEL_NAME, output_attentions=True)
    config = AutoConfig.from_pretrained(MODEL_NAME)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
else:
    MODEL_NAME = 'ai-forever/ruGPT-3.5-13B'
    model, tokenizer, config = load_model_tokenizer_config(MODEL_NAME, output_attentions=True)

Loading checkpoint shards:  33%|███▎      | 2/6 [00:19<00:39,  9.77s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 100.00 MiB (GPU 0; 19.70 GiB total capacity; 6.17 GiB already allocated; 83.19 MiB free; 6.32 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
stimuli = pd.read_csv("stimuli.csv")
sentences = stimuli.Sentence.to_list()
data_loader = torch.utils.data.DataLoader(sentences, batch_size=4)
device = "cpu"
subj_predictions = []
obj_predictions = []
for sent_batch in tqdm(data_loader):
    enc = tokenizer(sent_batch, padding=True, truncation=True,
                max_length=512, return_tensors='pt')
    with torch.no_grad():
        enc = enc.to(device)
        output = model(**enc, return_dict=True)
        subj, obj = return_batch_attention(sent_batch, output["attentions"][-1])
        subj_predictions.append(subj)
        obj_predictions.append(obj)
subj_attention = torch.cat(subj_predictions)
obj_attention = torch.cat(obj_predictions)
sss = stimuli.index[stimuli['Code'] == 'S_S-S'].tolist()
pss = stimuli.index[stimuli['Code'] == 'P_S-S'].tolist()
ssp = stimuli.index[stimuli['Code'] == 'S_S-P'].tolist()
sps = stimuli.index[stimuli['Code'] == 'S_P-S'].tolist()
pps = stimuli.index[stimuli['Code'] == 'P_P-S'].tolist()
spp = stimuli.index[stimuli['Code'] == 'S_P-P'].tolist()
ppp = stimuli.index[stimuli['Code'] == 'P_P-P'].tolist()
psp = stimuli.index[stimuli['Code'] == 'P_S-P'].tolist()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from bertviz import model_view, head_view
from itertools import combinations


sns.set_style("darkgrid", {"grid.color": ".3", "grid.linestyle": ":"})
sns.color_palette("mako", as_cmap=True)

fig, ax = plt.subplots(4, 2, figsize=(20, 15), sharey=True)

type2title_and_ax = [
    (sss, "S_S-S", ax[0,0]),
    (ssp, "S_S-P", ax[0,1]),
    (sps, "S_P-S", ax[1,0]),
    (spp, "S_P-P", ax[1,1]),
    (psp, "P_S-P", ax[2,0]),
    (pss, "P_S-S", ax[2,1]),
    (ppp, "P_P-P", ax[3,0]),
    (pps, "P_P-S", ax[3,1])
]

for (type, title, ax_) in type2title_and_ax:
    sns.lineplot(subj_attention[type].mean(axis=0), ax=ax_, color="green")
    sns.lineplot(obj_attention[type].mean(axis=0), ax=ax_, color="purple")
    ax_.set_title(title)


In [None]:
from scipy.stats import ttest_rel
cases = {title: type_ for type_, title, _ in type2title_and_ax}

In [None]:
pvals = []
names = []
for case_1, case_2 in combinations(cases.items(), 2):
    res = ttest_rel(subj_attention[case_1[1]].mean(0), subj_attention[case_2[1]].mean(0))
    pvals.append(res.pvalue)
    names.append(f"{case_1[0]} - {case_2[0]}")

pvals = multipletests(pvals, method="bonferroni")
import pandas as pd

subj_attention = pd.DataFrame({"comparison": names, "p-value": pvals[1], "significant": pvals[0]})
subj_attention.to_csv(f"{MODEL_NAME.split('/')[-1]}_subj_attention.csv")

In [None]:
pvals = []
names = []
for case_1, case_2 in combinations(cases.items(), 2):
    res = ttest_rel(obj_attention[case_1[1]].mean(0), obj_attention[case_2[1]].mean(0))

    pvals.append(res.pvalue)
    names.append(f"{case_1[0]} - {case_2[0]}")
pvals = multipletests(pvals, method="bonferroni")
obj_attention = pd.DataFrame({"comparison": names, "p-value": pvals[1], "significant": pvals[0]})
obj_attention.to_csv(f"{MODEL_NAME.split('/')[-1]}_obj_attention.csv")

In [None]:
sentence = sentences[0]

enc = tokenizer(sentence, padding=True, truncation=True,
                max_length=512, return_tensors='pt')
with torch.no_grad():
    enc = enc.to(device)
    output = model(**enc, return_dict=True)

attention = output["attentions"]
tokens = tokenizer.convert_ids_to_tokens(enc["input_ids"][0])  # Convert input ids to token strings

In [None]:
model_view(attention, tokens)  # Display model view

In [None]:
head_view(attention, tokens)