In [1]:
import os
print(os.getcwd())
os.chdir('/data/hyeryung/mucoco/notebooks')
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import torch
from torch.nn import Softmax
from torch.utils.data import DataLoader

from utils.load_ckpt import define_model

import json
import argparse
import time
from itertools import chain
from tqdm import tqdm


/data/hyeryung/mucoco/notebooks/XAI


In [2]:
os.chdir('/data/hyeryung/mucoco')

In [3]:
# set gpu (template code)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "0"  # Set the GPU 0 to use

In [4]:
BATCH_SIZE=64
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Which cpu/cuda?: {DEVICE}")
print(f"Which device?: {torch.cuda.current_device()}")

Which cpu/cuda?: cuda
Which device?: 0


In [5]:
# 원본 데이터 로드
samples=pd.read_csv('./notebooks/results/test_mucoco+add_preds2.csv')
# label이 1인 데이터만 사용
sample1=samples.loc[samples['toxicity']>0.5].copy()

In [6]:
samples.shape

(4505, 7)

In [7]:
samples.head()

Unnamed: 0,text,toxicity,pred_binarize,pred_balanced,pred_add,pred_addmore,pred_binary
0,"Ha ha, HILLARY LOST. All you hillary fools nee...",0.854839,0.99752,0.862362,0.999752,0.884619,0.999752
1,Two dumbs makes one dead. Happens all too fre...,0.8,0.930683,0.839179,0.99006,0.441051,0.99006
2,Trash site. Just like Hillary Clinton.,0.742424,0.340262,0.148426,0.999478,0.39454,0.999478
3,"Chomp, chomp ... SLURP! Pigs at the trough. Mo...",0.509804,0.448426,0.622463,0.999693,0.4829,0.999693
4,how about capital for the buyers. they are as ...,0.95,0.997821,0.932114,0.999753,0.909587,0.999753


In [8]:
# define dataset, dataloader
from datasets import load_dataset
toxicity_data = load_dataset("csv", data_files="./notebooks/results/test_mucoco+add_preds2.csv")

Found cached dataset csv (/data/.cache/huggingface/datasets/csv/default-d03b5312eea11c79/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)


  0%|          | 0/1 [00:00<?, ?it/s]

In [9]:
toxicity_data

DatasetDict({
    train: Dataset({
        features: ['text', 'toxicity', 'pred_binarize', 'pred_balanced', 'pred_add', 'pred_addmore', 'pred_binary'],
        num_rows: 4505
    })
})

In [10]:
# load trained model
checkpoint='./models_bak_contd/roberta-base-jigsaw-toxicity-classifier-with-gpt2-large-embeds/checkpoint_best/pytorch_model.bin'
model, config, tokenizer = define_model(checkpoint, output_attentions=True)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2-large and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
S

DEVICE:  cuda


In [11]:
# 논문에서는 gpt-2의 tokenizer를 사용하였으므로, mask token이 기존에는 없었음
print(tokenizer.all_special_ids, tokenizer.all_special_tokens, tokenizer.vocab_size)

# tokenizer에 mask token 추가
print('add mask token')
SPECIAL_TOKENS = {"mask_token": "<mask>"}
tokenizer.add_special_tokens(SPECIAL_TOKENS)

print(tokenizer.all_special_ids, tokenizer.all_special_tokens, tokenizer.vocab_size)

[50256] ['<|endoftext|>'] 50257
add mask token
[50256, 50257] ['<|endoftext|>', '<mask>'] 50257


In [12]:
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_dataset=toxicity_data.map(tokenize_function, batched=True)

Map:   0%|          | 0/4505 [00:00<?, ? examples/s]

In [13]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'toxicity', 'pred_binarize', 'pred_balanced', 'pred_add', 'pred_addmore', 'pred_binary', 'input_ids', 'attention_mask'],
        num_rows: 4505
    })
})

In [15]:
# remove unnecessary columns
tokenized_dataset = tokenized_dataset.remove_columns(["text", 'pred_binarize', 'pred_balanced', 'pred_add', 'pred_addmore', 'pred_binary'])
tokenized_dataset = tokenized_dataset.rename_column("toxicity", "labels")
tokenized_dataset.set_format("torch")
tokenized_dataset["train"].column_names

['labels', 'input_ids', 'attention_mask']

In [18]:
# build data loader
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding
batch_size = 32
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
dataloader = DataLoader(
    tokenized_dataset['train'], shuffle=True, batch_size=batch_size, collate_fn=data_collator
)

In [19]:
for batch in dataloader:
    break

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [24]:
model.eval()
for batch in dataloader:
    batch = {k: v.to(DEVICE) for k, v in batch.items()} # move training data to gpu
    with torch.no_grad():
        outputs = model.forward(
            batch["input_ids"],
            attention_mask=batch["attention_mask"],
        )
    attentions = outputs["attentions"][10]
    cls_attns = attentions.max(1)[0][:, 0]
    print(attentions)
    break

tensor([[[[0.0134, 0.0134, 0.0133,  ..., 0.0000, 0.0000, 0.0000],
          [0.0134, 0.0134, 0.0133,  ..., 0.0000, 0.0000, 0.0000],
          [0.0134, 0.0134, 0.0133,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0134, 0.0134, 0.0133,  ..., 0.0000, 0.0000, 0.0000],
          [0.0134, 0.0134, 0.0133,  ..., 0.0000, 0.0000, 0.0000],
          [0.0134, 0.0134, 0.0133,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0127, 0.0136, 0.0137,  ..., 0.0000, 0.0000, 0.0000],
          [0.0127, 0.0136, 0.0137,  ..., 0.0000, 0.0000, 0.0000],
          [0.0127, 0.0136, 0.0137,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0127, 0.0136, 0.0137,  ..., 0.0000, 0.0000, 0.0000],
          [0.0127, 0.0136, 0.0137,  ..., 0.0000, 0.0000, 0.0000],
          [0.0127, 0.0136, 0.0137,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0128, 0.0135, 0.0135,  ..., 0.0000, 0.0000, 0.0000],
          [0.0128, 0.0135, 0.0135,  ..., 0.0000, 0.0000, 0.0000],
          [0.0128, 0.0135, 0.0135,  ..., 0

In [25]:
attentions.shape

torch.Size([32, 12, 241, 241])

In [26]:
attentions

tensor([[[[0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          ...,
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041]],

         [[0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          ...,
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041]],

         [[0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0.0041, 0.0041, 0.0041],
          [0.0042, 0.0042, 0.0042,  ..., 0

In [36]:
# mask_token에 해당하는 id 설정
# mask_token = tokenizer.encode("<mask>", add_special_tokens=False)[0]
output_sents=dict()
print("generating masks")
print("Extracting SLOT tokens")
# mask 대상 텍스트 로드
file = sample1['text'].tolist()
# 레이어 별로 attention 값을 기준으로 mask 해보기 위해서 outer for-loop 추가
for layer_num in range(0, 12):
    print(layer_num)
    output_sents[layer_num] = []
    # 텍스트를 32개씩 batch로 처리할 예정임
    for i in tqdm(range(0, len(file), 32)):
        # get batch
        input_lines = file[i : i + 32]
        # tokenize
        batch = tokenizer(
            input_lines, padding=True, return_tensors="pt", truncation=True
        )
        torch.cuda.empty_cache()
        # forward
        classifier_output = classifier.forward(
            batch["input_ids"].cuda(),
            attention_mask=batch["attention_mask"].cuda(),
        )
        torch.cuda.empty_cache()
        # get attentions
        attentions = classifier_output["attentions"]
        # attention_mask에서 1의 개수를 셈
        lengths = [i.tolist().count(1) for i in batch["attention_mask"]]
        # 보고자 하는 attention layer 만 가져옴
        attentions = attentions[
            layer_num # originally 10
        ]  # 10 is chosen because it is the magical layer number of the grand elves
        # attentions.max(1)[0]: axis 1 에 대해서 max 값을 가져온다. (sequence에 대해서 여러개의 attention heads 중에서 가장 큰 값을 가져온다.) -> [:, 0] 그리고 나서 cls token의 attention을 가져온다.
        cls_attns = attentions.max(1)[0][:, 0]
        # batch에 있는 각 example에 대해서
        for i, attn in enumerate(cls_attns):
            # attention_mask가 1인 곳 까지의 attention을 보고, start of sentence와 end of sentence에 해당하는 token을 제거하고, softmax를 취한다.
            # current_attn = attn[: lengths[i]][1:-1].softmax(-1)
            current_attn = attn[: lengths[i]].softmax(-1) # <- current tokenizer does not add <s> and </s> to the sentence.
            # 이 값의 평균을 구한다.
            avg_value = current_attn.view(-1).mean().item()
            # 이 값 중에 평균보다 큰 값을 지니는 위치를 찾는다. (+1 because we skipped the first token)
            # top_masks = ((current_attn > avg_value).nonzero().view(-1)) + 1
            top_masks = ((current_attn > avg_value).nonzero().view(-1))
            torch.cuda.empty_cache()
            top_masks = top_masks.cpu().tolist()
            # attention 값이 평균보다 큰 토큰의 수가 6 또는 문장 전체 토큰 수의 1/3 보다 크면  
            if len(top_masks) > min((lengths[i] - 2) // 3, 6):
                # 그냥 attention 값 기준 top k 개 (k = 6 또는 토큰 수/3)를 뽑는다.
                top_masks = (
                    # current_attn.topk(min((lengths[i] - 2) // 3, 6))[1] + 1
                    current_attn.topk(min((lengths[i] - 2) // 3, 6))[1]
                )
                top_masks = top_masks.cpu().tolist()
            # 현재 문장의 input id를 가져온다.
            current_sent = batch["input_ids"][i][: lengths[i]]
            count = 0
            # top_masks에 뽑힌 index를 돌면서
            for index in top_masks:
                # mask해야 할 토큰이 and, of, or, so 에 해당하지 않으면
                if tokenizer.decode(current_sent[index]) not in [ ## maybe add more!
                    " and",
                    " of",
                    " or",
                    " so",
                ]:
                    # token을 mask 한다.
                    current_sent[index] = mask_token
                    # count 수를 늘린다.
                    count += 1
                else:
                    # 만약에 and, of, or, so 에 해당하면 아무것도 하지 않는다.
                    pass
            # sent = (
            #     tokenizer.decode(current_sent)[3:-4]
            #     .replace("<mask>", " <mask>")
            #     .strip()
            # )
            sent = (
                # masking을 마친 input id를 decode 한다.
                tokenizer.decode(current_sent)
                .replace("<mask>", " <mask>")
                .strip()
            )
            # mask 된 곳이 없으면, 마지막에 추가한다.
            if "<mask>" not in sent:
                sent = sent + " <mask>."
            
            # 처리된 문장을 output_sents에 추가한다.
            output_sents[layer_num].append(sent)

# with open(args.out_file + ".intermediate_sentences.txt", "w") as f:
#     f.write("\n".join(output_sents))

generating masks
Extracting SLOT tokens
0


  0%|                                                                                                                                          | 0/45 [00:00<?, ?it/s]


NameError: name 'classifier' is not defined

In [57]:
# sample1['attn_L11'] = output_sents
# sample1['attn_L12'] = output_sents

In [24]:
for i in range(0, 12):
    sample1[f"attn_L{i+1}"] = output_sents[i]

In [11]:
import joblib
# joblib.dump(sample1, "notebooks/results/attn_based_locate_balanced_230524.pkl")
joblib.dump(sample1, "notebooks/results/attn_based_locate_binary_230524.pkl")

['notebooks/results/attn_based_locate_binary_230524.pkl']

In [12]:
sample1.sort_values(by='toxicity', ascending=False).iloc[1, ]['text'], sample1.sort_values(by='toxicity', ascending=False).iloc[1, ]['attn_L11']

KeyError: 'attn_L11'

In [None]:
for i,j in zip(sample1['text'].tolist()[:10], output_sents[10][:10]):
    print("[original]")
    print(i)
    print("[masked]")
    print(j)

[original]
Ha ha, HILLARY LOST. All you hillary fools need to be rounded up and put in prison along with your leader. You are all anti--American scum. Perhaps we should dump you in Mexico along with all the illegal trash you let in....if you love 'em so much, go live with 'em. But you are not welcome here, you hate filled Marxist morons.
[masked]
Ha ha, HILLARY LOST. All you hillary fools need to be rounded up and put in prison along with your leader. You are all anti--American scum. Perhaps we should dump you in Mexico along with all the illegal trash you let in....if you love 'em so much, go live with 'em. But you are not welcome here, <mask> <mask> <mask> <mask> <mask> <mask>.
[original]
Two dumbs makes one dead.  Happens all too frequently.
[masked]
<mask> <mask> <mask> <mask> one dead.  Happens all too frequently.
[original]
Trash site.  Just like Hillary Clinton.
[masked]
Tr <mask> <mask>.  Just like Hillary Clinton.
[original]
Chomp, chomp ... SLURP! Pigs at the trough. More slo