# Locate & return indices

In [1]:
import os
# workdir = '/data/hyeryung/mucoco'
workdir = '/home/hyeryungson/mucoco'
print("Current dir: ", os.getcwd())
print("Changing directory...")
os.chdir(workdir)
print("Current dir: ", os.getcwd())
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import torch
from torch.nn import Softmax
from torch.utils.data import DataLoader

from notebooks.utils.load_ckpt import define_model

import json
import argparse
import time
from itertools import chain
from tqdm import tqdm


Current dir:  /home/hyeryungson/mucoco/notebooks/locate
Changing directory...
Current dir:  /home/hyeryungson/mucoco


In [2]:
BATCH_SIZE=64
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("device: ", DEVICE)

device:  cuda


In [3]:
print("num available devices: ", torch.cuda.device_count())

num available devices:  1


### Prototype

In [3]:
# 원본 데이터 로드
samples=pd.read_csv('./notebooks/results/test_mucoco+add_preds.csv')
# label이 1인 데이터만 사용
sample1=samples.loc[samples['toxicity']>0.5].copy()

In [7]:
# load trained model
ckpt_path='/home/hyeryungson/mucoco/models/models_balanced/roberta-base-jigsaw-toxicity-classifier-with-gpt2-large-embeds/checkpoint_best/pytorch_model.bin'
# ckpt_path='/home/hyeryungson/mucoco/models_bak_contd/roberta-base-jigsaw-toxicity-classifier-with-gpt2-large-embeds/checkpoint_best/pytorch_model.bin'
model, tokenizer = define_model(ckpt_path, output_attentions=True)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2-large and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializin

DEVICE:  cuda


In [5]:
# # 논문에서는 gpt-2의 tokenizer를 사용하였으므로, mask token이 기존에는 없었음
# tokenizer.all_special_ids, tokenizer.all_special_tokens, tokenizer.vocab_size

([50256], ['<|endoftext|>'], 50257)

In [6]:
# # tokenizer에 mask token 추가
# SPECIAL_TOKENS = {"mask_token": "<mask>"}
# tokenizer.add_special_tokens(SPECIAL_TOKENS)

1

In [7]:
# # 논문에서는 gpt-2의 tokenizer를 사용하였으므로, mask token이 기존에는 없었음
# tokenizer.all_special_ids, tokenizer.all_special_tokens, tokenizer.vocab_size

([50256, 50257], ['<|endoftext|>', '<mask>'], 50257)

In [8]:
# verify if the code is correct
test_sent = sample1['text'].tolist()[0:10]
batch = tokenizer(test_sent, padding=True, return_tensors="pt", truncation=True)

In [9]:
# cls token이 따로 없는데 잘 학습이 된게 맞을까? -> 상관없다고 하심 (교수님)
tokenizer.decode(batch['input_ids'][0])

"Ha ha, HILLARY LOST. All you hillary fools need to be rounded up and put in prison along with your leader. You are all anti--American scum. Perhaps we should dump you in Mexico along with all the illegal trash you let in....if you love 'em so much, go live with 'em. But you are not welcome here, you hate filled Marxist morons.<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoft

In [10]:
classifier=model

In [11]:
torch.cuda.empty_cache()
# forward
classifier_output = classifier.forward(
    batch["input_ids"].cuda(),
    attention_mask=batch["attention_mask"].cuda(),
)
torch.cuda.empty_cache()

In [12]:
# get attentions
attentions = classifier_output["attentions"]
# attention_mask에서 1의 개수를 셈
lengths = [i.tolist().count(1) for i in batch["attention_mask"]]
# 보고자 하는 attention layer 만 가져옴
attentions = attentions[
    10 # originally 10
]

In [18]:
cls_attns = attentions.max(1)[0][:, 0]

In [22]:
locate_ixes=[]
for i, attn in enumerate(cls_attns):
    # attention_mask가 1인 곳 까지의 attention을 보고, start of sentence와 end of sentence에 해당하는 token을 제거하고, softmax를 취한다.
    # current_attn = attn[: lengths[i]][1:-1].softmax(-1)
    current_attn = attn[: lengths[i]].softmax(-1) # <- current tokenizer does not add <s> and </s> to the sentence.
    # 이 값의 평균을 구한다.
    avg_value = current_attn.view(-1).mean().item()
    # 이 값 중에 평균보다 큰 값을 지니는 위치를 찾는다. (+1 because we skipped the first token)
    # top_masks = ((current_attn > avg_value).nonzero().view(-1)) + 1
    top_masks = ((current_attn > avg_value).nonzero().view(-1))
    torch.cuda.empty_cache()
    top_masks = top_masks.cpu().tolist()
    # attention 값이 평균보다 큰 토큰의 수가 6 또는 문장 전체 토큰 수의 1/3 보다 크면  
    if len(top_masks) > min((lengths[i] - 2) // 3, 6):
        # 그냥 attention 값 기준 top k 개 (k = 6 또는 토큰 수/3)를 뽑는다.
        top_masks = (
            # current_attn.topk(min((lengths[i] - 2) // 3, 6))[1] + 1
            current_attn.topk(min((lengths[i] - 2) // 3, 6))[1]
        )
        top_masks = top_masks.cpu().tolist()
    # 현재 문장의 input id를 가져온다.
    current_sent = batch["input_ids"][i][: lengths[i]]
    count = 0
    top_masks_final = []
    # top_masks에 뽑힌 index를 돌면서
    for index in top_masks:
        # mask해야 할 토큰이 and, of, or, so 에 해당하지 않으면
        if tokenizer.decode(current_sent[index]) not in [ ## maybe add more!
            " and",
            " of",
            " or",
            " so",
        ]:
            # token을 mask 한다.
            # current_sent[index] = mask_token
            top_masks_final.append(index)
            # count 수를 늘린다.
            count += 1
        else:
            # 만약에 and, of, or, so 에 해당하면 아무것도 하지 않는다.
            pass
    locate_ixes.append(top_masks_final)

In [23]:
locate_ixes

[[78, 75, 77, 79, 76, 80],
 [2, 0, 3, 1],
 [1, 2],
 [16, 0, 17, 10, 7, 8],
 [0, 2, 10, 13],
 [52, 19, 54, 17, 47, 53],
 [39, 40, 36, 34, 41, 30],
 [0, 13, 22, 25, 26, 27],
 [31, 30, 0, 29, 2, 28],
 [137, 135, 136, 133, 1, 0]]

### Find indices for bak_outputs.txt.init
- added on 23/07/18
- edited on 23/07/25 after debugging why indices from this code is different from running locate inside decode_new.py
    - 확인한 내용. 같은 Text를 기존에 추론했던 ids 와 다르게 tokenize할수가 있다.. 그래서, locate 코드에서도 input_ids를 불러와서 해당 값으로 locate 코드를 돌려서 다시 확인해봐야겠다.

In [6]:
# load trained model
ckpt_path='models/roberta-base-jigsaw-toxicity-classifier-with-gpt2-large-embeds/checkpoint_best/pytorch_model.bin'
model, tokenizer = define_model(ckpt_path, output_attentions=True, device=DEVICE)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2-large and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializin

DEVICE:  cuda


In [44]:
# samples=pd.read_json('outputs/toxicity/save-init-gen/bak_outputs.txt.init', lines=True)
# test_sent = samples['generation'].tolist()
# batch = tokenizer(test_sent, padding=True, return_tensors="pt", truncation=True)

In [96]:
batch = joblib.load('/home/hyeryungson/mucoco/outputs/toxicity/save-init-gen/bak_outputs.txt.init_ids.pkl')

In [97]:
batch_list = []
for items in batch.values():
    batch_list += items

In [98]:
attention_masks = []

In [99]:
for ix, item in enumerate(batch_list):
    attention_mask = torch.ones(item.size())
    if item.size()[1] < 20:
        pad_length = 20 - item.size()[1]
        item = torch.cat([item, torch.LongTensor([tokenizer.pad_token_id] * pad_length).to(DEVICE).unsqueeze(0)], dim=-1)
        batch_list[ix] = item 
        attention_mask = torch.cat([attention_mask, torch.zeros((1, pad_length))], dim=-1)
    attention_mask = attention_mask.to(DEVICE)
    attention_masks.append(attention_mask)

In [100]:
input_ids = torch.cat(batch_list, dim=0)

In [101]:
attention_masks = torch.cat(attention_masks, dim=0).to(DEVICE)

In [102]:
batch = {"input_ids": input_ids, "attention_mask": attention_masks}

In [103]:
batch

{'input_ids': tensor([[ 1770,    13,  2486,  ...,   736,   656,   262],
         [  262,  1103,    12,  ...,  4858,  1667, 29233],
         [ 2679,  7912,   508,  ...,   743,   307,   550],
         ...,
         [   13,  5514,   257,  ...,  7045,    11,   290],
         [  832,   379,  1551,  ...,    16,    13,    22],
         [  329,   379,  1551,  ...,    11,   517,   661]], device='cuda:0'),
 'attention_mask': tensor([[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]], device='cuda:0')}

In [104]:
# cls token이 따로 없는데 잘 학습이 된게 맞을까? -> 상관없다고 하심 (교수님)
tokenizer.decode(batch['input_ids'][0])

' Mr. Obama, and have been viewed by some as a bid to entice people back into the'

In [105]:
# torch.cuda.empty_cache()
# forward
model.eval()
with torch.no_grad():
    classifier_output = model.forward(**batch)
    torch.cuda.empty_cache()

In [106]:
# get attentions
attentions = classifier_output["attentions"]
# attention_mask에서 1의 개수를 셈
lengths = [i.tolist().count(1) for i in batch["attention_mask"]]
# 보고자 하는 attention layer 만 가져옴
attentions = attentions[
    10 # originally 10
]

In [107]:
cls_attns = attentions.max(1)[0][:, 0]

In [108]:
locate_ixes=[]
for i, attn in tqdm(enumerate(cls_attns)):
    # attention_mask가 1인 곳 까지의 attention을 보고, start of sentence와 end of sentence에 해당하는 token을 제거하고, softmax를 취한다.
    # current_attn = attn[: lengths[i]][1:-1].softmax(-1)
    current_attn = attn[: lengths[i]].softmax(-1) # <- current tokenizer does not add <s> and </s> to the sentence.
    # 이 값의 평균을 구한다.
    avg_value = current_attn.view(-1).mean().item()
    # 이 값 중에 평균보다 큰 값을 지니는 위치를 찾는다. (+1 because we skipped the first token)
    # top_masks = ((current_attn >= avg_value).nonzero().view(-1)) + 1
    top_masks = ((current_attn >= avg_value).nonzero().view(-1)) ## fixed to reflect that sometimes the sequence length is 1.
    torch.cuda.empty_cache()
    top_masks = top_masks.cpu().tolist()
    if len(current_attn)==1:
        print(current_attn, top_masks, lengths[i])
    
    # attention 값이 평균보다 큰 토큰의 수가 6 또는 문장 전체 토큰 수의 1/3 보다 크면  
#     if len(top_masks) > min((lengths[i] - 2) // 3, 6):
    if len(top_masks) > min((lengths[i]) // 3, 6):
        # 그냥 attention 값 기준 top k 개 (k = 6 또는 토큰 수/3)를 뽑는다.
        top_masks = (
            # current_attn.topk(min((lengths[i] - 2) // 3, 6))[1] + 1
#             current_attn.topk(min((lengths[i] - 2) // 3, 6))[1]
            current_attn.topk(max(min((lengths[i]) // 3, 6), 1))[1]
        )
        top_masks = top_masks.cpu().tolist()
        if len(current_attn) == 1:
            print("top_masks after selecting top k", top_masks)
    # 현재 문장의 input id를 가져온다.
    current_sent = batch["input_ids"][i][: lengths[i]]
    count = 0
    top_masks_final = []
    # top_masks에 뽑힌 index를 돌면서
    for index in top_masks:
        # mask해야 할 토큰이 and, of, or, so 에 해당하지 않으면
        if tokenizer.decode(current_sent[index]) not in [ ## maybe add more!
            " and",
            " of",
            " or",
            " so",
        ]:
            # token을 mask 한다.
            # current_sent[index] = mask_token
            top_masks_final.append(index)
            # count 수를 늘린다.
            count += 1
        else:
            # 만약에 and, of, or, so 에 해당하면 아무것도 하지 않는다.
            pass
    locate_ixes.append(top_masks_final)

1329it [00:00, 3564.57it/s]

tensor([1.], device='cuda:0') [0] 1
top_masks after selecting top k [0]


2375it [00:00, 3532.19it/s]


In [35]:
locate_ixes

[[2, 5, 1, 7, 0, 19],
 [0, 12, 13, 16, 17, 18],
 [0, 1, 3, 9, 16, 19],
 [0, 1, 2, 8, 17],
 [0, 1, 2, 14, 18, 19],
 [0, 3, 7, 9],
 [0, 10, 11, 15, 16],
 [7, 0, 18, 16, 1, 11],
 [17, 19, 13, 0, 12, 18],
 [0, 1, 2, 3],
 [0, 1, 2, 3],
 [0, 3, 4, 5, 7, 10],
 [0, 1, 8, 9, 10],
 [8, 1, 0, 6, 11, 18],
 [0, 1, 10, 2, 12, 13],
 [0, 1, 7, 10, 17, 19],
 [0, 3, 8, 15, 16, 17],
 [0, 11, 4, 3, 16, 15],
 [0, 8, 12, 14, 15, 19],
 [4, 1, 6, 3, 19, 5],
 [0, 1, 7, 11, 12],
 [0, 3, 4, 18, 17],
 [3, 1, 18, 16, 15],
 [0, 2, 3, 10, 12],
 [2, 0, 4, 1, 3, 17],
 [3, 4, 0, 5, 13, 18],
 [14, 5, 3, 0, 12, 1],
 [0, 5, 14, 11, 1, 13],
 [0, 2, 4, 5, 17, 18],
 [3, 4, 0, 7, 6],
 [0, 1, 2, 4, 9, 17],
 [19, 11, 13, 14, 10, 18],
 [0, 1, 4, 6, 17, 19],
 [0, 1, 4, 11],
 [19, 2, 11, 0, 10, 9],
 [0, 11, 3, 4, 15, 5],
 [0, 1, 2, 3, 4, 19],
 [0, 3, 6, 17, 18],
 [0, 3, 10, 17],
 [6, 18, 0, 7, 17],
 [0, 1, 6, 9, 15],
 [10, 0, 3, 2, 1, 5],
 [5, 1, 13, 2, 4, 0],
 [0, 2, 13, 14, 15, 16],
 [2, 3, 0, 12, 13, 10],
 [0, 1, 5, 6, 11, 19],

In [109]:
locate_ixes

[[2, 5, 1, 7, 0, 19],
 [0, 12, 13, 16, 17, 18],
 [0, 1, 3, 9, 16, 19],
 [1, 8, 2, 18, 0, 17],
 [0, 1, 2, 14, 18, 19],
 [0, 3, 7, 9],
 [0, 10, 11, 15, 16],
 [7, 0, 18, 16, 1, 11],
 [17, 19, 13, 0, 12, 18],
 [0, 1, 2, 3],
 [0, 1, 2, 3],
 [0, 3, 4, 5, 7, 10],
 [0, 1, 8, 9, 10],
 [8, 1, 0, 6, 11, 18],
 [0, 1, 10, 2, 12, 13],
 [0, 1, 7, 10, 17, 19],
 [0, 3, 8, 15, 16, 17],
 [0, 11, 4, 3, 16, 15],
 [0, 8, 12, 14, 15, 19],
 [4, 1, 6, 3, 19, 5],
 [0, 1, 7, 11, 12],
 [0, 3, 4, 18, 17],
 [3, 1, 18, 16, 15],
 [0, 2, 3, 10, 12],
 [2, 0, 4, 1, 3, 17],
 [3, 4, 0, 5, 13, 18],
 [14, 5, 3, 0, 12, 1],
 [0, 5, 14, 11, 1, 13],
 [0, 2, 4, 5, 17, 18],
 [3, 4, 0, 7, 6],
 [0, 1, 2, 4, 9, 17],
 [19, 11, 13, 14, 10, 18],
 [0, 1, 4, 6, 17, 19],
 [0, 1, 4, 11],
 [19, 2, 11, 0, 10, 9],
 [0, 11, 3, 4, 15, 5],
 [0, 1, 2, 3, 4, 19],
 [0, 3, 6, 17, 18],
 [0, 3, 10, 17, 18, 19],
 [6, 18, 0, 7, 17],
 [0, 1, 6, 9, 15],
 [10, 0, 3, 2, 1, 5],
 [5, 1, 13, 2, 4, 0],
 [0, 2, 13, 14, 15, 16],
 [2, 3, 0, 12, 13, 10],
 [0, 1, 5,

In [110]:
samples=pd.read_json('outputs/toxicity/save-init-gen/bak_outputs.txt.init', lines=True)

In [111]:
samples['indices'] = locate_ixes

In [112]:
samples

Unnamed: 0,prompt,generation,indices
0,These stunts are the latest in a long line of ...,"Mr. Obama, and have been viewed by some as a ...","[2, 5, 1, 7, 0, 19]"
1,These stunts are the latest in a long line of ...,"the real-life Eubank Khorshid, or Mr Disappea...","[0, 12, 13, 16, 17, 18]"
2,These stunts are the latest in a long line of ...,German artists who demand visibility for grap...,"[0, 1, 3, 9, 16, 19]"
3,These stunts are the latest in a long line of ...,"these dolphin hunters who hunt these ""native""...","[1, 8, 2, 18, 0, 17]"
4,These stunts are the latest in a long line of ...,"her female adversaries. Last year, Women in t...","[0, 1, 2, 14, 18, 19]"
...,...,...,...
2370,"With 2.3 billion adherents, Christians are the...",in the future. This rapid growth was the deci...,"[0, 1, 2, 3, 16, 17]"
2371,"With 2.3 billion adherents, Christians are the...",as hundreds of millions more Christians go on...,"[5, 9, 12, 13, 14, 18]"
2372,"With 2.3 billion adherents, Christians are the...",. Only a bit more than 1 in 4 Buddhists and Hi...,"[17, 9, 16, 12, 1, 6]"
2373,"With 2.3 billion adherents, Christians are the...",through at least 2050—17 million more per yea...,"[16, 2, 0, 7, 4, 3]"


In [113]:
# samples.to_json('outputs/toxicity/save-init-gen/bak_outputs.txt.init.widx', orient="records", lines=True)
samples.to_json('outputs/toxicity/save-init-gen/bak_outputs.txt.init.widx.debugged', orient="records", lines=True)

In [114]:
## check if it matches the indices extracted when locate code was part of decode_new.py

indices_decode_new = pd.read_json('/home/hyeryungson/mucoco/outputs/toxicity/locate-edit3/outputs.txt.intermediate.indices',
                                 lines=True)

In [117]:
indices_decode_new['indices'] = indices_decode_new['indices'].apply(lambda x: x[0])

In [126]:
indices_decode_new['indices'] = indices_decode_new['indices'].apply(lambda x: set(x))

In [127]:
indices_decode_new

Unnamed: 0,prompt,sample_id,original_text,original_loss0,original_loss1,indices
0,These stunts are the latest in a long line of ...,0,"Mr. Obama, and have been viewed by some as a ...",42.759918,-4.273815,"{0, 1, 2, 5, 7, 19}"
1,These stunts are the latest in a long line of ...,1,"the real-life Eubank Khorshid, or Mr Disappea...",42.759918,-4.273815,"{0, 12, 13, 16, 17, 18}"
2,These stunts are the latest in a long line of ...,2,German artists who demand visibility for grap...,42.759918,-4.273815,"{0, 1, 3, 9, 16, 19}"
3,These stunts are the latest in a long line of ...,3,"these dolphin hunters who hunt these ""native""...",42.759918,-4.273815,"{0, 1, 2, 8, 17, 18}"
4,These stunts are the latest in a long line of ...,4,"her female adversaries. Last year, Women in t...",42.759918,-4.273815,"{0, 1, 2, 14, 18, 19}"
...,...,...,...,...,...,...
2370,"With 2.3 billion adherents, Christians are the...",20,in the future. This rapid growth was the deci...,79.884628,1.240509,"{0, 1, 2, 3, 16, 17}"
2371,"With 2.3 billion adherents, Christians are the...",21,as hundreds of millions more Christians go on...,79.884628,1.240509,"{5, 9, 12, 13, 14, 18}"
2372,"With 2.3 billion adherents, Christians are the...",22,. Only a bit more than 1 in 4 Buddhists and Hi...,79.884628,1.240509,"{1, 6, 9, 12, 16, 17}"
2373,"With 2.3 billion adherents, Christians are the...",23,through at least 2050—17 million more per yea...,79.884628,1.240509,"{0, 2, 3, 4, 7, 16}"


In [128]:
samples['indices'] = samples['indices'].apply(set)

In [130]:
comparison = indices_decode_new.merge(samples, left_on=["prompt", "original_text"], right_on=["prompt", "generation"],
                                     how="left")

In [131]:
comparison['indices_comparison'] = comparison['indices_x'] == comparison['indices_y']

In [132]:
comparison.loc[comparison['indices_comparison']!=True]

Unnamed: 0,prompt,sample_id,original_text,original_loss0,original_loss1,indices_x,generation,indices_y,indices_comparison
90,"“Only in America can a first-generation, India...",15,show and make it look like he understands who...,57.054005,-1.441327,"{0, 6, 13, 15, 17}",show and make it look like he understands who...,"{0, 17, 13, 15}",False
264,“And the second is to make it easy to,14,"use! Please, try it out!!““(The last part of ...",56.384895,-2.944067,"{0, 1, 2, 3, 7, 11}","use! Please, try it out!!““(The last part of ...","{0, 1, 3, 6, 7, 11}",False
293,“The one that I guess got my attention the mos...,18,very strongly about the community.\n\n\n5. Wh...,67.32621,-3.749603,"{0, 3, 8, 13, 14, 17}",very strongly about the community.\n\n\n5. Wh...,"{0, 8, 9, 13, 14, 17}",False
333,Reitzell created an atmospheric and unusual sc...,8,"a fast-paced, engaging TV series with an into...",94.367851,-1.195642,"{0, 4, 7, 10, 11, 19}","a fast-paced, engaging TV series with an into...","{0, 4, 6, 7, 10, 19}",False
412,"On Tuesday, curious to know who the kindly man",12,leading the Vancouver Police Oversight Review...,85.855728,-2.423911,"{0, 14, 15, 16, 17, 18}",leading the Vancouver Police Oversight Review...,"{0, 11, 14, 15, 17, 18}",False
423,"On Tuesday, curious to know who the kindly man",23,who was telling them not to listen to their s...,85.855728,-2.423911,"{1, 2, 6, 8, 11, 12}",who was telling them not to listen to their s...,"{0, 1, 2, 6, 11, 12}",False
729,They also repeated that the takeover is a full...,4,"pipe industry, and said the State wants to en...",104.619522,-3.510099,"{0, 4, 6}","pipe industry, and said the State wants to en...","{0, 4, 12, 6}",False
798,Malaysian authorities announced yesterday that...,23,of people for whom they had no idea who had c...,77.792786,-3.431863,"{16, 12, 13, 6}",of people for whom they had no idea who had c...,"{16, 17, 12, 6}",False
875,One of the people who hit me said:,0,"'You might lose the f**k of it, what are you ...",104.310158,-3.060915,"{0, 1, 2, 5, 6, 9}","'You might lose the f**k of it, what are you ...","{0, 1, 2, 5, 6}",False
939,"Because, of course I want to spend Labor Day w...",14,doll.\n\nOnce I've blended the first layers i...,145.073959,-1.569925,"{0, 1, 2, 4, 14, 17}",doll.\n\nOnce I've blended the first layers i...,"{0, 1, 2, 4, 17}",False


In [134]:
cls_attns[90]

tensor([0.2294, 0.0851, 0.0333, 0.0638, 0.0632, 0.0210, 0.1336, 0.0483, 0.0145,
        0.0643, 0.1032, 0.1283, 0.1025, 0.4143, 0.0835, 0.4900, 0.1232, 0.1428,
        0.1023, 0.0759], device='cuda:0')

In [139]:
tokenizer.decode(batch['input_ids'][90])

' show and make it look like he understands who he is; his same-sex parents are worthy people'

In [140]:
attn = cls_attns[90]

In [141]:
current_attn = attn[: lengths[i]].softmax(-1) # <- current tokenizer does not add <s> and </s> to the sentence.
# 이 값의 평균을 구한다.
avg_value = current_attn.view(-1).mean().item()
# 이 값 중에 평균보다 큰 값을 지니는 위치를 찾는다. (+1 because we skipped the first token)
# top_masks = ((current_attn >= avg_value).nonzero().view(-1)) + 1
top_masks = ((current_attn >= avg_value).nonzero().view(-1)) ## fixed to reflect that sometimes the sequence length is 1.
torch.cuda.empty_cache()
top_masks = top_masks.cpu().tolist()


In [142]:
top_masks

[0, 13, 15, 17]

In [143]:
if len(current_attn)==1:
    print(current_attn, top_masks, lengths[i])

# attention 값이 평균보다 큰 토큰의 수가 6 또는 문장 전체 토큰 수의 1/3 보다 크면  
#     if len(top_masks) > min((lengths[i] - 2) // 3, 6):
if len(top_masks) > min((lengths[i]) // 3, 6):
    # 그냥 attention 값 기준 top k 개 (k = 6 또는 토큰 수/3)를 뽑는다.
    top_masks = (
        # current_attn.topk(min((lengths[i] - 2) // 3, 6))[1] + 1
#             current_attn.topk(min((lengths[i] - 2) // 3, 6))[1]
        current_attn.topk(max(min((lengths[i]) // 3, 6), 1))[1]
    )
    top_masks = top_masks.cpu().tolist()
    if len(current_attn) == 1:
        print("top_masks after selecting top k", top_masks)


In [144]:
top_masks

[0, 13, 15, 17]

In [None]:
# 현재 문장의 input id를 가져온다.
current_sent = batch["input_ids"][i][: lengths[i]]
count = 0
top_masks_final = []
# top_masks에 뽑힌 index를 돌면서
for index in top_masks:
    # mask해야 할 토큰이 and, of, or, so 에 해당하지 않으면
    if tokenizer.decode(current_sent[index]) not in [ ## maybe add more!
        " and",
        " of",
        " or",
        " so",
    ]:
        # token을 mask 한다.
        # current_sent[index] = mask_token
        top_masks_final.append(index)
        # count 수를 늘린다.
        count += 1
    else:
        # 만약에 and, of, or, so 에 해당하면 아무것도 하지 않는다.
        pass

In [146]:
locate_func_result = locate(model, tokenizer, batch)

1570it [00:00, 3895.90it/s]

tensor([1.], device='cuda:0') [0] 1
top_masks after selecting top k [0]


2375it [00:00, 3886.61it/s]


In [149]:
locate_func_result[90]

[0, 13, 15, 17]

In [153]:
batch['attention_mask'][90]

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.], device='cuda:0')

In [156]:
input_ids = batch['input_ids'][90]

In [161]:
tmp_batch = {"input_ids": input_ids.unsqueeze(0)}

In [166]:
locate(model, tokenizer, tmp_batch)

1it [00:00, 1096.55it/s]


[[0, 13, 15, 17]]

In [18]:
ids_indices = pd.read_json('/home/hyeryungson/mucoco/outputs/toxicity/locate-edit4/outputs.txt.intermediate', lines=True)

In [25]:
ids_indices.loc[412,:]

prompt               On Tuesday, curious to know who the kindly man
sample_id                                                        12
original_text      leading the Vancouver Police Oversight Review...
original_loss0                                            85.857346
original_loss1                                            -2.423151
input_ids         [[3756, 262, 10930, 4287, 35968, 6602, 5926, 3...
indices                                   [[18, 0, 14, 17, 15, 16]]
Name: 412, dtype: object

In [20]:
ids_indices_3 = pd.read_json('/home/hyeryungson/mucoco/outputs/toxicity/locate-edit3/outputs.txt.intermediate.indices', lines=True)

In [24]:
ids_indices_3.loc[412, :]

prompt               On Tuesday, curious to know who the kindly man
sample_id                                                        12
original_text      leading the Vancouver Police Oversight Review...
original_loss0                                            85.855728
original_loss1                                            -2.423911
indices                                   [[18, 0, 14, 17, 15, 16]]
Name: 412, dtype: object

### 함수화
- added on 23/07/28

In [165]:
# model 의 forward 함수에서 정의를 output_attentions=True를 넘길 수 있게 되어 있다.
def locate(model, tokenizer, batch, num_layer=10):
    # torch.cuda.empty_cache()
    # forward
    model.eval()
    with torch.no_grad():
        classifier_output = model.forward(**batch, output_attentions=True)
        torch.cuda.empty_cache()
        
    # get attentions
    attentions = classifier_output["attentions"]
    # attention_mask에서 1의 개수를 셈
#     lengths = [i.tolist().count(1) for i in batch["attention_mask"]]
    lengths = [len(i) for i in batch["input_ids"]]
    # 보고자 하는 attention layer 만 가져옴
    attentions = attentions[
        num_layer # originally 10
    ]
    cls_attns = attentions.max(1)[0][:, 0]
    
    locate_ixes=[]
    for i, attn in tqdm(enumerate(cls_attns)):
        # attention_mask가 1인 곳 까지의 attention을 보고, start of sentence와 end of sentence에 해당하는 token을 제거하고, softmax를 취한다.
        # current_attn = attn[: lengths[i]][1:-1].softmax(-1)
        current_attn = attn[: lengths[i]].softmax(-1) # <- current tokenizer does not add <s> and </s> to the sentence.
        # 이 값의 평균을 구한다.
        avg_value = current_attn.view(-1).mean().item()
        # 이 값 중에 평균보다 큰 값을 지니는 위치를 찾는다. (+1 because we skipped the first token)
        # top_masks = ((current_attn >= avg_value).nonzero().view(-1)) + 1
        top_masks = ((current_attn >= avg_value).nonzero().view(-1)) ## fixed to reflect that sometimes the sequence length is 1.
        torch.cuda.empty_cache()
        top_masks = top_masks.cpu().tolist()
        if len(current_attn)==1:
            print(current_attn, top_masks, lengths[i])
        
        # attention 값이 평균보다 큰 토큰의 수가 6 또는 문장 전체 토큰 수의 1/3 보다 크면  
    #     if len(top_masks) > min((lengths[i] - 2) // 3, 6):
        if len(top_masks) > min((lengths[i]) // 3, 6):
            # 그냥 attention 값 기준 top k 개 (k = 6 또는 토큰 수/3)를 뽑는다.
            top_masks = (
                # current_attn.topk(min((lengths[i] - 2) // 3, 6))[1] + 1
    #             current_attn.topk(min((lengths[i] - 2) // 3, 6))[1]
                current_attn.topk(max(min((lengths[i]) // 3, 6), 1))[1]
            )
            top_masks = top_masks.cpu().tolist()
            if len(current_attn) == 1:
                print("top_masks after selecting top k", top_masks)
        # 현재 문장의 input id를 가져온다.
        current_sent = batch["input_ids"][i][: lengths[i]]
        count = 0
        top_masks_final = []
        # top_masks에 뽑힌 index를 돌면서
        for index in top_masks:
            # mask해야 할 토큰이 and, of, or, so 에 해당하지 않으면
            if tokenizer.decode(current_sent[index]) not in [ ## maybe add more!
                " and",
                " of",
                " or",
                " so",
            ]:
                # token을 mask 한다.
                # current_sent[index] = mask_token
                top_masks_final.append(index)
                # count 수를 늘린다.
                count += 1
            else:
                # 만약에 and, of, or, so 에 해당하면 아무것도 하지 않는다.
                pass
        locate_ixes.append(top_masks_final)
    return locate_ixes