<a href="https://colab.research.google.com/github/gabie0208/chinese-ltp/blob/main/classifier_loader_false_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 7.4 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 51.7 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 54.4 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 7.7 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 52.8 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attem

In [2]:
#!/usr/bin/env python3

import re
import os
import time
import random
import datetime
from tqdm import tqdm
from pprint import pprint

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler

import transformers
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig, get_linear_schedule_with_warmup, AdamW

from tensorflow.keras.preprocessing.sequence import pad_sequences

# Preparing Dataset

In [5]:
f = open('false_data_1128-2.txt', 'r')
X_test_data = f.readlines()

In [6]:
for x in X_test_data[:20]:
    print(f"Sentence: {x}\n")

Sentence: 包龙星：将军！


Sentence: 爷爷：恩恩……


Sentence: 有为：十三叔！


Sentence: 来，很好吃呀！


Sentence: 师爷：嘿嘿……


Sentence: 包龙星：契爷呀


Sentence: 我来教教你吧。


Sentence: 应该这样念呀！


Sentence: 方唐镜：判了！


Sentence: 林员外：告辞！


Sentence: 包龙星：走吧。


Sentence: 众人：扔死他！


Sentence: 众人：走呀……


Sentence: 有为：拿武器。


Sentence: 有为：厉害呀。


Sentence: 戚老：包大人。


Sentence: 这是什么东西？


Sentence: 包龙星：什么？


Sentence: 戚老：多谢……


Sentence: 如花：活该呀！




In [7]:
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print('No GPU available, using the CPU instead.')

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


In [8]:
with open('test_tokenizer.txt', 'w', encoding='utf-8') as f:
    for line in X_test_data:
        print(line, file=f)

# Preprocessing

In [9]:
# Load Tokenizer
tokenizer = BertTokenizer.from_pretrained("hfl/chinese-roberta-wwm-ext", output_attentions=True)

# Tokenizing
tokenized_texts = [tokenizer.tokenize(s) for s in X_test_data]

Downloading:   0%|          | 0.00/107k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/19.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/263k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/689 [00:00<?, ?B/s]

In [10]:
pad_idx = tokenizer.convert_tokens_to_ids("[PAD]")
sos_idx = tokenizer.convert_tokens_to_ids("[SOS]")
eos_idx = tokenizer.convert_tokens_to_ids("[EOS]")

print(pad_idx)
print(sos_idx)
print(eos_idx)

0
100
100


In [11]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

MAX_LEN = 128
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype='long', truncating='post', padding='post')

In [12]:
# 데이터 개수 확인

print(f"# of sentences:\t\t{len(X_test_data)} 개")
print(f"# of encoded data:\t{len(input_ids)} 개")

# of sentences:		17534 개
# of encoded data:	17534 개


In [13]:
# 토크나이저 초기 훈련 결과 및 후처리 결과 확인

print(f"original text:\n{X_test_data[0]}")
print("")
print(f"tokenized text:\n{tokenized_texts[0]}")
print("")
print(f"ID:\n{input_ids[0]}")
print("")
print(f"decoded data:\n {tokenizer.decode(input_ids[0])}")

original text:
包龙星：将军！


tokenized text:
['包', '龙', '星', '：', '将', '军', '！']

ID:
[1259 7987 3215 8038 2199 1092 8013    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0]

decoded data:
 包 龙 星 ： 将 军 ！ [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PA

In [14]:
#어텐션 마스크
attention_masks = []

for seq in tqdm(input_ids):
    seq_mask = [float(i > 0) for i in seq]
    attention_masks.append(seq_mask)
    
print(attention_masks[0])

100%|██████████| 17534/17534 [00:01<00:00, 10750.16it/s]

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]





In [15]:
X_test = input_ids
y_test = [0] * len(X_test)
mask_test = attention_masks

print(len(X_test))
print(len(y_test))

17534
17534


In [16]:
X_test = torch.LongTensor(X_test)
y_test = torch.LongTensor(y_test)
mask_test = torch.tensor(mask_test)

In [17]:
BATCH_SIZE = 64

test_data = TensorDataset(X_test, mask_test, y_test)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=BATCH_SIZE, num_workers=16)

  cpuset_checked))


# 저장된 모델 불러오기

In [19]:
model = torch.load("/content/output_attention_1122_model.pt")
model.to(device)

model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [20]:
# 정확도 계산 함수
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

# 시간 표시 함수
def format_time(elapsed):
    # 반올림
    elapsed_rounded = int(round((elapsed)))
    # hh:mm:ss으로 형태 변경
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [21]:
#시작 시간 설정
result_x = []
result_pred = []
t0 = time.time()

# 평가모드로 변경
model.eval()

# 변수 초기화
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

# 데이터로더에서 배치만큼 반복하여 가져옴
for step, batch in enumerate(test_dataloader):
    # 경과 정보 표시
    if step % 100 == 0 and not step == 0:
        elapsed = format_time(time.time() - t0)
        print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

    # 배치를 GPU에 넣음
    batch = tuple(t.to(device) for t in batch)
    
    # 배치에서 데이터 추출
    b_input_ids, b_input_mask, b_labels = batch
    
    # 그래디언트 계산 안함
    with torch.no_grad():     
        # Forward 수행
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask,
                        return_dict=False)
    
    # 로스 구함
    logits = outputs[0]

    # CPU로 데이터 이동
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    
    # 출력 로짓과 라벨을 비교하여 정확도 계산
    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    eval_accuracy += tmp_eval_accuracy
    nb_eval_steps += 1

    #print("="*50)
    #print("Prediction:\t{}".format(np.argmax(logits, axis=1).tolist()))
    #print("Ground Truth:\t{}".format(b_labels.tolist()))
    
    result_x.extend(b_input_ids)
    result_pred.extend(np.argmax(logits, axis=1).tolist())

print("")
#print("Accuracy: {0:.4f}".format(eval_accuracy/nb_eval_steps))
print("Test took: {:}".format(format_time(time.time() - t0)))

  cpuset_checked))


  Batch   100  of    274.    Elapsed: 0:00:25.
  Batch   200  of    274.    Elapsed: 0:00:49.

Test took: 0:01:07


In [22]:
result_sent = []

for digit in tqdm(result_x):
    
    sent_splitted = tokenizer.decode(digit.tolist())
    sent_joint = "".join(sent_splitted)
    
    sent_joint = re.sub(r" ", "", sent_joint)
    sent_joint = re.sub(r"\[SOS\]", "", sent_joint)
    sent_joint = re.sub(r"\[EOS\]", "", sent_joint)
    sent_joint = re.sub(r"\[PAD\]", "", sent_joint)
    
    result_sent.append(sent_joint)

100%|██████████| 17534/17534 [00:09<00:00, 1805.57it/s]


In [23]:
result_df = pd.DataFrame([x for x in zip(result_sent, result_pred)], columns = ["sentence", "prediction"])
result_df

Unnamed: 0,sentence,prediction
0,包龙星：将军！,0
1,爷爷：恩恩[UNK][UNK],0
2,有为：十三叔！,0
3,来，很好吃呀！,0
4,师爷：嘿嘿[UNK][UNK],0
...,...,...
17529,据了解，电影《艋舺》自2月初在台湾上映以来，以势如破竹之势席卷岛内各地影市。,1
17530,主持人：通过刚才台湾东森电视台镜头，可以说我们看到了《艋舺》电影热映的情况。,0
17531,主持人：好的，在开始讨论之前，我们首先通过一个短片来了解一下相关的新闻背景。,0
17532,主持人：也就是提醒大陆的游客，虽然台湾的风光很好，很多的土特产也非常吸引人。,0


In [24]:
result_df.to_csv("classification_result_test_1122.csv", encoding="utf-8-sig", index=False)

# 시각화

In [25]:
!pip install captum

Collecting captum
  Downloading captum-0.4.1-py3-none-any.whl (1.4 MB)
[?25l[K     |▎                               | 10 kB 22.7 MB/s eta 0:00:01[K     |▌                               | 20 kB 27.8 MB/s eta 0:00:01[K     |▊                               | 30 kB 23.3 MB/s eta 0:00:01[K     |█                               | 40 kB 18.4 MB/s eta 0:00:01[K     |█▏                              | 51 kB 8.3 MB/s eta 0:00:01[K     |█▍                              | 61 kB 8.4 MB/s eta 0:00:01[K     |█▋                              | 71 kB 8.0 MB/s eta 0:00:01[K     |█▉                              | 81 kB 8.8 MB/s eta 0:00:01[K     |██▏                             | 92 kB 7.3 MB/s eta 0:00:01[K     |██▍                             | 102 kB 8.0 MB/s eta 0:00:01[K     |██▋                             | 112 kB 8.0 MB/s eta 0:00:01[K     |██▉                             | 122 kB 8.0 MB/s eta 0:00:01[K     |███                             | 133 kB 8.0 MB/s eta 0:00:01[K 

In [26]:
from captum.attr import visualization as viz
from captum.attr import LayerConductance, LayerIntegratedGradients

## Defining Funtions

In [27]:
def predict(inputs, token_type_ids=None, position_ids=None, attention_mask=None):
    
    """
    model을 불러와서 prediction하는 부분.
    직접 사용할 일은 없고, 아래의 forward_func에 물려서 사용됨.
    """
    
    output = model(inputs, token_type_ids=token_type_ids,
                 position_ids=position_ids, attention_mask=attention_mask, )
    
    return output

In [28]:
def forward_func(inputs, token_type_ids=None, position_ids=None, attention_mask=None, position=0):
    
    """
    특정 sentence 1개에 대하여 prediction을 진행한 후,
    예측값을 반환하는 함수.
    """
    
    pred = predict(inputs,
                   token_type_ids=token_type_ids,
                   position_ids=position_ids,
                   attention_mask=attention_mask)
    
    pred = pred[position]
    
    return pred.max(1).values

In [29]:
def construct_input_ref_pair(sentence, ref_token_id):
    
    """
    sentence의 길이와 맞는 reference tensor를 생성.
    sentence의 tensor, reference의 tensor, 이 둘의 길이를 반환함.
    """
    
    sentence_ids = tokenizer.encode(sentence, add_special_tokens=False)

    #input_ids = [cls_token_id] + sentence_ids + [sep_token_id]
    #ref_input_ids = [cls_token_id] + [ref_token_id] * len(sentence_ids) + [sep_token_id]
    
    input_ids = sentence_ids
    ref_input_ids = [ref_token_id] * len(sentence_ids)
    
    return torch.tensor([input_ids], device=device), torch.tensor([ref_input_ids], device=device), len(sentence_ids)

def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    
    """
    아직 용도를 모르겠어요??? 어디에 쓰는 거지
    """
    
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]], device=device)
    ref_token_type_ids = torch.zeros_like(token_type_ids, device=device)
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    
    """
    아직 용도를 모르겠어요??? 어디에 쓰는 거지
    """
    
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long, device=device)

    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    
    """
    attention mask를 만드는 함수. prediction을 한 문장마다 진행하기 때문에
    [PAD] token이 필요없으므로, 그냥 다음과 같이 간단하게 구현하면 된다.
    """
    
    return torch.ones_like(input_ids)

def construct_whole_bert_embeddings(input_ids, ref_input_ids, \
                                    token_type_ids=None, ref_token_type_ids=None, \
                                    position_ids=None, ref_position_ids=None):
    
    """
    sentence와 reference를 model의 embedding에 대입하는 함수.
    """
    
    input_embeddings = model.bert.embeddings(input_ids, token_type_ids=token_type_ids, position_ids=position_ids)
    ref_input_embeddings = model.bert.embeddings(ref_input_ids, token_type_ids=ref_token_type_ids, position_ids=ref_position_ids)
    
    return input_embeddings, ref_input_embeddings

## Prediction

In [30]:
# sentence, label = "现在亲丁们正在梳蓖草原，雪驹肯定比我更加危险", 1
sentence, label = "玉梅爹叫王宝全，外号万宝全，年轻时候给刘老五家当过长工，在那时候学会了赶骡子，学会了种园", 0

In [31]:
ref_token_id = tokenizer.pad_token_id

input_ids, ref_input_ids, sep_id = construct_input_ref_pair(sentence, ref_token_id)
token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
attention_mask = construct_attention_mask(input_ids)

indices = input_ids[0].detach().tolist()
all_tokens = tokenizer.convert_ids_to_tokens(indices)

In [32]:
pred = predict(input_ids, \
               token_type_ids=token_type_ids, \
               position_ids=position_ids, \
               attention_mask=attention_mask)

pred_ind = torch.argmax(torch.sigmoid(pred.logits)).item()

print("Sentence: {}".format(sentence))
print('Prediction: {}'.format(pred_ind))

Sentence: 玉梅爹叫王宝全，外号万宝全，年轻时候给刘老五家当过长工，在那时候学会了赶骡子，学会了种园
Prediction: 0


In [33]:
lig = LayerIntegratedGradients(forward_func, model.bert.embeddings)

attributions, delta = lig.attribute(inputs=input_ids,
                                  baselines=ref_input_ids,
                                  additional_forward_args=(token_type_ids, position_ids, attention_mask, 0),
                                  return_convergence_delta=True)

In [34]:
def summarize_attributions(attributions):
    
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    
    return attributions

In [35]:
attributions_sum = summarize_attributions(attributions)

In [36]:
label_vocab  = {0: "False", 1: "True"}

vis = viz.VisualizationDataRecord(
                        attributions_sum,
                        torch.max(torch.softmax(pred[0], dim=0)),
                        label_vocab[pred_ind],
                        label_vocab[label],
                        label_vocab[1],
                        attributions_sum.sum(),       
                        all_tokens,
                        delta)

print('\033[1m', 'Visualizations For Classification', '\033[0m')
viz.visualize_text([vis]);

[1m Visualizations For Classification [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
False,False (1.00),True,2.49,玉 梅 爹 叫 王 宝 全 ， 外 号 万 宝 全 ， 年 轻 时 候 给 刘 老 五 家 当 过 长 工 ， 在 那 时 候 学 会 了 赶 骡 子 ， 学 会 了 种 园
,,,,


In [37]:
print("Predction: ", label_vocab[pred_ind], '(', '%.2f'%pred_ind, ')')
print("Ground Truth: ", label_vocab[label])

Predction:  False ( 0.00 )
Ground Truth:  False


## Prediction for several sentences

In [38]:
result_df

Unnamed: 0,sentence,prediction
0,包龙星：将军！,0
1,爷爷：恩恩[UNK][UNK],0
2,有为：十三叔！,0
3,来，很好吃呀！,0
4,师爷：嘿嘿[UNK][UNK],0
...,...,...
17529,据了解，电影《艋舺》自2月初在台湾上映以来，以势如破竹之势席卷岛内各地影市。,1
17530,主持人：通过刚才台湾东森电视台镜头，可以说我们看到了《艋舺》电影热映的情况。,0
17531,主持人：好的，在开始讨论之前，我们首先通过一个短片来了解一下相关的新闻背景。,0
17532,主持人：也就是提醒大陆的游客，虽然台湾的风光很好，很多的土特产也非常吸引人。,0


In [39]:
sentence_list = result_df["sentence"].values.tolist()
prediction_list = result_df["prediction"].values.tolist()

sentence_list = sentence_list[:100]
prediction_list = prediction_list[:100]

In [40]:
label_vocab  = {0: "False", 1: "True"}
vis_list = []

for sentence, label in tqdm(zip(sentence_list, prediction_list), total=len(sentence_list)):
    
    input_ids, ref_input_ids, sep_id = construct_input_ref_pair(sentence, ref_token_id)
    token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
    position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
    attention_mask = construct_attention_mask(input_ids)

    indices = input_ids[0].detach().tolist()
    all_tokens = tokenizer.convert_ids_to_tokens(indices)
    
    pred = predict(input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask)

    pred_ind = torch.argmax(torch.sigmoid(pred.logits)).item()
    
    lig = LayerIntegratedGradients(forward_func, model.bert.embeddings)

    attributions, delta = lig.attribute(inputs=input_ids,
                                        baselines=ref_input_ids,
                                        additional_forward_args=(token_type_ids, position_ids, attention_mask, 0),
                                        return_convergence_delta=True)
    
    attributions_sum = summarize_attributions(attributions)
    
    vis = viz.VisualizationDataRecord(
                            attributions_sum,
                            torch.max(torch.softmax(pred[0], dim=0)),
                            label_vocab[pred_ind],
                            label_vocab[label],
                            None,
                            attributions_sum.sum(),       
                            all_tokens,
                            delta)
    
    vis_list.append(vis)

print('\033[1m', 'Visualizations For Classification', '\033[0m')    
viz.visualize_text(vis_list); 

100%|██████████| 100/100 [00:11<00:00,  8.83it/s]


[1m Visualizations For Classification [0m


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
False,False (1.00),,2.3,包 龙 星 ： 将 军 ！
,,,,
False,False (1.00),,2.1,爷 爷 ： 恩 恩 [UNK] [UNK]
,,,,
False,False (1.00),,1.67,有 为 ： 十 三 叔 ！
,,,,
False,False (1.00),,1.23,来 ， 很 好 吃 呀 ！
,,,,
False,False (1.00),,2.23,师 爷 ： 嘿 嘿 [UNK] [UNK]
,,,,


In [41]:
lig2 = LayerIntegratedGradients(forward_func, \
                                [model.bert.embeddings.word_embeddings, \
                                 model.bert.embeddings.token_type_embeddings, \
                                 model.bert.embeddings.position_embeddings])

attributions = lig2.attribute(inputs=(input_ids, token_type_ids, position_ids),
                                  baselines=(ref_input_ids, ref_token_type_ids, ref_position_ids),
                                  additional_forward_args=(attention_mask, 0))

attributions_word = summarize_attributions(attributions[0])

attributions_token_type = summarize_attributions(attributions[1])

attributions_position = summarize_attributions(attributions[2])

  "Multiple layers provided. Please ensure that each layer is"


In [42]:
def get_topk_attributed_tokens(attrs, k=5):
    
    values, indices = torch.topk(attrs, k)
    top_tokens = [all_tokens[idx] for idx in indices]
    
    return top_tokens, values, indices

In [43]:
top_words, top_words_val, top_word_ind = get_topk_attributed_tokens(attributions_word)

top_token_type, top_token_type_val, top_token_type_ind = get_topk_attributed_tokens(attributions_token_type)

top_pos, top_pos_val, pos_ind = get_topk_attributed_tokens(attributions_position)

df_delve = pd.DataFrame({'Word(Index), Attribution': ["{} ({}), {}".format(word, pos, round(val.item(),2)) for word, pos, val in zip(top_words, top_word_ind, top_words_val)],
                   'Token Type(Index), Attribution': ["{} ({}), {}".format(ttype, pos, round(val.item(),2)) for ttype, pos, val in zip(top_token_type, top_token_type_ind, top_words_val)],
                   'Position(Index), Attribution': ["{} ({}), {}".format(position, pos, round(val.item(),2)) for position, pos, val in zip(top_pos, pos_ind, top_pos_val)]})
df_delve.style.apply(['cell_ids: False'])

['{}({})'.format(token, str(i)) for i, token in enumerate(all_tokens)]

['老(0)', '板(1)', '娘(2)', '：(3)', '我(4)', '[UNK](5)', '[UNK](6)']

In [44]:
df_delve

Unnamed: 0,"Word(Index), Attribution","Token Type(Index), Attribution","Position(Index), Attribution"
0,"[UNK] (5), 0.5","板 (1), 0.5","[UNK] (6), 1.0"
1,"： (3), 0.47","老 (0), 0.47","我 (4), 0.07"
2,"老 (0), 0.34","娘 (2), 0.34","娘 (2), 0.04"
3,"[UNK] (6), 0.02","我 (4), 0.02","： (3), 0.03"
4,"娘 (2), -0.3","： (3), -0.3","[UNK] (5), 0.03"


해석방법
* Word Embedding: 
* Token Embedding:
* Position Embedding: