- 저장된 RNN, CNN, BERT 모델을 load 합니다.
- nsmc/ratings_test.txt 파일을 읽고, 해당 파일 중 문장 길이가 5이상인 문장에 대해서 모델별 prediction을 진행합니다. 
- 실제 label과 비교해서 어떤 모델이 가장 좋은 성능을 나타내는지 classification report(scikit-learn 참고)를 출력합니다.

In [1]:
path = '/content/drive/My Drive/Colab Notebooks/2020-PoscoICT/Data/'

## BERT

In [2]:
!pip install transformers



In [3]:
import torch
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig

In [4]:
bert_model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=2)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [5]:
bert_model.load_state_dict(torch.load(path+'bert-model.pt'))

<All keys matched successfully>

In [6]:
# BERT의 토크나이저로 문장을 토큰으로 분리
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)

In [7]:
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

test = pd.read_csv(path+"nsmc/ratings_test.txt", sep='\t')
test.dropna(how='any', inplace=True)

sentences = test['document']
sentences = ["[CLS] " + str(sentence) + " [SEP]" for sentence in sentences]
labels = test['label'].values
# BERT의 토크나이저로 문장을 토큰으로 분리
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)
tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]

# 입력 토큰의 최대 시퀀스 길이
MAX_LEN = 128
# 토큰을 숫자 인덱스로 변환
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
# 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

# 어텐션 마스크 초기화
attention_masks = []

# 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정
# 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상
for seq in input_ids:
    seq_mask = [float(i>0) for i in seq]
    attention_masks.append(seq_mask)

# 데이터를 파이토치의 텐서로 변환
test_inputs = torch.tensor(input_ids)
test_labels = torch.tensor(labels)
test_masks = torch.tensor(attention_masks)

test_data = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = RandomSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=8)

In [8]:
# 정확도 계산 함수
def flat_accuracy(preds, labels):
    
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [9]:
# 시간 표시 함수
def format_time(elapsed):

    # 반올림
    elapsed_rounded = int(round((elapsed)))
    
    # hh:mm:ss으로 형태 변경
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [11]:
import time
import datetime

device = 'cuda' if torch.cuda.is_available() else 'cpu'
#시작 시간 설정
t0 = time.time()
bert_model.to(device)
# 평가모드로 변경
bert_model.eval()

# 변수 초기화
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

# 데이터로더에서 배치만큼 반복하여 가져옴
for step, batch in enumerate(test_dataloader):
    # 경과 정보 표시
    if step % 100 == 0 and not step == 0:
        elapsed = format_time(time.time() - t0)
        print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

    # 배치를 GPU에 넣음
    batch = tuple(t.to(device) for t in batch)
    
    # 배치에서 데이터 추출
    b_input_ids, b_input_mask, b_labels = batch
    
    # 그래디언트 계산 안함
    with torch.no_grad():     
        # Forward 수행
        outputs = bert_model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)
    
    # 로스 구함
    logits = outputs[0]

    # CPU로 데이터 이동
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    
    # 출력 로짓과 라벨을 비교하여 정확도 계산
    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    eval_accuracy += tmp_eval_accuracy
    nb_eval_steps += 1

print("")
print("Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
print("Test took: {:}".format(format_time(time.time() - t0)))

  Batch   100  of  6,250.    Elapsed: 0:00:06.
  Batch   200  of  6,250.    Elapsed: 0:00:12.
  Batch   300  of  6,250.    Elapsed: 0:00:18.
  Batch   400  of  6,250.    Elapsed: 0:00:24.
  Batch   500  of  6,250.    Elapsed: 0:00:31.
  Batch   600  of  6,250.    Elapsed: 0:00:37.
  Batch   700  of  6,250.    Elapsed: 0:00:43.
  Batch   800  of  6,250.    Elapsed: 0:00:50.
  Batch   900  of  6,250.    Elapsed: 0:00:56.
  Batch 1,000  of  6,250.    Elapsed: 0:01:03.
  Batch 1,100  of  6,250.    Elapsed: 0:01:10.
  Batch 1,200  of  6,250.    Elapsed: 0:01:17.
  Batch 1,300  of  6,250.    Elapsed: 0:01:24.
  Batch 1,400  of  6,250.    Elapsed: 0:01:30.
  Batch 1,500  of  6,250.    Elapsed: 0:01:37.
  Batch 1,600  of  6,250.    Elapsed: 0:01:43.
  Batch 1,700  of  6,250.    Elapsed: 0:01:50.
  Batch 1,800  of  6,250.    Elapsed: 0:01:57.
  Batch 1,900  of  6,250.    Elapsed: 0:02:03.
  Batch 2,000  of  6,250.    Elapsed: 0:02:10.
  Batch 2,100  of  6,250.    Elapsed: 0:02:16.
  Batch 2,200