In [1]:
import sys
import glob
import os
import itertools
import random
import collections
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn

import tensorflow as tf

from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from tensorflow.keras.preprocessing.sequence import pad_sequences as pad_sequences_keras
from torch.nn.utils.rnn import pad_sequence as pad_sequences_torch
from sklearn.model_selection import train_test_split

import random
import time
import datetime

In [2]:
train = pd.read_csv("./nsmc/ratings_train.txt", sep='\t')
test = pd.read_csv("./nsmc/ratings_test.txt", sep='\t')

# Train Data

In [3]:
# 리뷰 문장 추출
sentences = train['document']

# 라벨 추출
labels = train['label'].values

In [4]:
# BERT의 토크나이저로 문장을 토큰으로 분리
tokenizer = BertTokenizer.from_pretrained(
    'bert-base-multilingual-cased', do_lower_case=False)

In [14]:
%%time
# BERT의 입력 형식에 맞게 변환
sentences_ = ["[CLS] " + str(sentence) + " [SEP]" for sentence in sentences]
# BERT의 토크나이저로 문장을 토큰으로 분리
tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences_]
# 토큰을 숫자 인덱스로 변환
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]

CPU times: user 21.5 s, sys: 279 ms, total: 21.8 s
Wall time: 21.8 s


In [15]:
%%time
# 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움
# keras version
input_ids_1 = pad_sequences_keras(
    input_ids, maxlen=128, dtype="long", truncating="post", padding="post")

CPU times: user 681 ms, sys: 59.6 ms, total: 740 ms
Wall time: 738 ms


In [16]:
%%time
# 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움
# torch version
input_ids_2 = pad_sequences_torch(
    list(map(torch.LongTensor, input_ids))).T[:, :128]

CPU times: user 3.29 s, sys: 200 ms, total: 3.49 s
Wall time: 1.93 s


In [37]:
input_ids = input_ids_1
del input_ids_1, input_ids_2

In [76]:
print(input_ids)

[[  101  8911   100 ...     0     0     0]
 [  101   144 11490 ...     0     0     0]
 [  101  9303 21711 ...     0     0     0]
 ...
 [  101  8924 67527 ...     0     0     0]
 [  101  9666 14423 ...     0     0     0]
 [  101  9246 32537 ...     0     0     0]]


In [78]:
input_ids.shape

(50000, 128)

In [44]:
# 어텐션 마스크 초기화
# attention_masks = []
# 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정
# 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상
# for seq in input_ids:
#     seq_mask = [float(i>0) for i in seq]
#     attention_masks.append(seq_mask)
attention_masks = (input_ids != 0).astype(np.int)

In [46]:
# 훈련셋과 검증셋으로 분리
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(
    input_ids,
    labels, 
    random_state=2018, 
    test_size=0.1
)

# 어텐션 마스크를 훈련셋과 검증셋으로 분리
train_masks, validation_masks, _, _ = train_test_split(
    attention_masks, 
    input_ids,
    random_state=2018, 
    test_size=0.1
)

# 데이터를 파이토치의 텐서로 변환
train_inputs = torch.LongTensor(train_inputs)
train_labels = torch.LongTensor(train_labels)
train_masks = torch.LongTensor(train_masks)
validation_inputs = torch.LongTensor(validation_inputs)
validation_labels = torch.LongTensor(validation_labels)
validation_masks = torch.LongTensor(validation_masks)

In [47]:
# 배치 사이즈
batch_size = 32

# 파이토치의 DataLoader로 입력, 마스크, 라벨을 묶어 데이터 설정
# 학습시 배치 사이즈 만큼 데이터를 가져옴
train_data = TensorDataset(
    train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(
    train_data, sampler=train_sampler, batch_size=batch_size)

validation_data = TensorDataset(
    validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(
    validation_data)
validation_dataloader = DataLoader(
    validation_data, sampler=validation_sampler, batch_size=batch_size)

# Test Data

In [48]:
# 리뷰 문장 추출
sentences = test['document']

# 라벨 추출
labels = test['label'].values

In [50]:
%%time

# BERT의 입력 형식에 맞게 변환
sentences_ = ["[CLS] " + str(sentence) + " [SEP]" for sentence in sentences]
# BERT의 토크나이저로 문장을 토큰으로 분리
tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences_]
# 토큰을 숫자 인덱스로 변환
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
# 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움
input_ids = pad_sequences_keras(
    input_ids, maxlen=128, dtype="long", truncating="post", padding="post")

# 어텐션 마스크 초기화
# attention_masks = []
# 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정
# 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상
# for seq in input_ids:
#     seq_mask = [float(i>0) for i in seq]
#     attention_masks.append(seq_mask)
attention_masks = (input_ids != 0).astype(np.int)

CPU times: user 7.21 s, sys: 132 ms, total: 7.34 s
Wall time: 7.34 s


In [51]:
# 데이터를 파이토치의 텐서로 변환
test_inputs = torch.LongTensor(input_ids)
test_labels = torch.LongTensor(labels)
test_masks = torch.LongTensor(attention_masks)

In [65]:
# 배치 사이즈
batch_size = 32

# 파이토치의 DataLoader로 입력, 마스크, 라벨을 묶어 데이터 설정
# 학습시 배치 사이즈 만큼 데이터를 가져옴
test_data = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = RandomSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

# Generate Model

In [52]:
# GPU 디바이스 이름 구함
device_name = tf.test.gpu_device_name()

# GPU 디바이스 이름 검사
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

SystemError: GPU device not found

In [1]:
# 디바이스 설정
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print
    ('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print('No GPU available, using the CPU instead.')

NameError: name 'torch' is not defined

In [2]:
# GPU 할당 변경하기
GPU_NUM = 0 # 원하는 GPU 번호 입력
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device) # change allocation of current GPU
print ('Current cuda device ', torch.cuda.current_device()) # check

# Additional Infos
if device.type == 'cuda':
    print(torch.cuda.get_device_name(GPU_NUM))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(GPU_NUM)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(GPU_NUM)/1024**3,1), 'GB')

NameError: name 'torch' is not defined

In [56]:
# 분류를 위한 BERT 모델 생성
model = BertForSequenceClassification.from_pretrained(
    "bert-base-multilingual-cased", num_labels=2)
model.cuda()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=625.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=714314041.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [61]:
# 옵티마이저 설정
optimizer = AdamW(
    model.parameters(),
    lr=2e-5, # 학습률
    eps=1e-8 # 0으로 나누는 것을 방지하기 위한 epsilon 값
)

# 에폭수
epochs = 4

# 총 훈련 스텝 : 배치반복 횟수 * 에폭
total_steps = len(train_dataloader) * epochs

# 학습률을 조금씩 감소시키는 스케줄러 생성
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=0,
    num_training_steps=total_steps
)

# Train Model

In [62]:
# 정확도 계산 함수
def flat_accuracy(preds, labels):
    
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    return np.sum(pred_flat == labels_flat) / len(labels_flat)


# 시간 표시 함수
def format_time(elapsed):

    # 반올림
    elapsed_rounded = int(round((elapsed)))
    
    # hh:mm:ss으로 형태 변경
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [63]:
# 재현을 위해 랜덤시드 고정
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# 그래디언트 초기화
model.zero_grad()

# 에폭만큼 반복
for epoch_i in range(0, epochs):
    
    # ========================================
    #               Training
    # ========================================
    
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # 시작 시간 설정
    t0 = time.time()

    # 로스 초기화
    total_loss = 0

    # 훈련모드로 변경
    model.train()
        
    # 데이터로더에서 배치만큼 반복하여 가져옴
    for step, batch in enumerate(train_dataloader):
        # 경과 정보 표시
        if step % 500 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        # 배치를 GPU에 넣음
        batch = tuple(t.to(device) for t in batch)
        
        # 배치에서 데이터 추출
        b_input_ids, b_input_mask, b_labels = batch

        # Forward 수행                
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask, 
                        labels=b_labels)
        
        # 로스 구함
        loss = outputs[0]

        # 총 로스 계산
        total_loss += loss.item()

        # Backward 수행으로 그래디언트 계산
        loss.backward()

        # 그래디언트 클리핑
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # 그래디언트를 통해 가중치 파라미터 업데이트
        optimizer.step()

        # 스케줄러로 학습률 감소
        scheduler.step()

        # 그래디언트 초기화
        model.zero_grad()

    # 평균 로스 계산
    avg_train_loss = total_loss / len(train_dataloader)            

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(format_time(time.time() - t0)))
        
    # ========================================
    #               Validation
    # ========================================

    print("")
    print("Running Validation...")

    #시작 시간 설정
    t0 = time.time()

    # 평가모드로 변경
    model.eval()

    # 변수 초기화
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    # 데이터로더에서 배치만큼 반복하여 가져옴
    for batch in validation_dataloader:
        # 배치를 GPU에 넣음
        batch = tuple(t.to(device) for t in batch)
        
        # 배치에서 데이터 추출
        b_input_ids, b_input_mask, b_labels = batch
        
        # 그래디언트 계산 안함
        with torch.no_grad():     
            # Forward 수행
            outputs = model(b_input_ids, 
                            token_type_ids=None, 
                            attention_mask=b_input_mask)
        
        # 로스 구함
        logits = outputs[0]

        # CPU로 데이터 이동
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        # 출력 로짓과 라벨을 비교하여 정확도 계산
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1

    print("  Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
    print("  Validation took: {:}".format(format_time(time.time() - t0)))

print("")
print("Training complete!")


Training...
  Batch   500  of  4,219.    Elapsed: 0:02:04.
  Batch 1,000  of  4,219.    Elapsed: 0:04:08.
  Batch 1,500  of  4,219.    Elapsed: 0:06:12.
  Batch 2,000  of  4,219.    Elapsed: 0:08:16.
  Batch 2,500  of  4,219.    Elapsed: 0:10:20.
  Batch 3,000  of  4,219.    Elapsed: 0:12:24.
  Batch 3,500  of  4,219.    Elapsed: 0:14:28.
  Batch 4,000  of  4,219.    Elapsed: 0:16:32.

  Average training loss: 0.38
  Training epcoh took: 0:17:26

Running Validation...
  Accuracy: 0.85
  Validation took: 0:00:34

Training...
  Batch   500  of  4,219.    Elapsed: 0:02:04.
  Batch 1,000  of  4,219.    Elapsed: 0:04:08.
  Batch 1,500  of  4,219.    Elapsed: 0:06:12.
  Batch 2,000  of  4,219.    Elapsed: 0:08:16.
  Batch 2,500  of  4,219.    Elapsed: 0:10:20.
  Batch 3,000  of  4,219.    Elapsed: 0:12:24.
  Batch 3,500  of  4,219.    Elapsed: 0:14:29.
  Batch 4,000  of  4,219.    Elapsed: 0:16:33.

  Average training loss: 0.29
  Training epcoh took: 0:17:27

Running Validation...
  Accura

In [66]:
#시작 시간 설정
t0 = time.time()

# 평가모드로 변경
model.eval()

# 변수 초기화
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

# 데이터로더에서 배치만큼 반복하여 가져옴
for step, batch in enumerate(test_dataloader):
    # 경과 정보 표시
    if step % 100 == 0 and not step == 0:
        elapsed = format_time(time.time() - t0)
        print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

    # 배치를 GPU에 넣음
    batch = tuple(t.to(device) for t in batch)
    
    # 배치에서 데이터 추출
    b_input_ids, b_input_mask, b_labels = batch
    
    # 그래디언트 계산 안함
    with torch.no_grad():     
        # Forward 수행
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)
    
    # 로스 구함
    logits = outputs[0]

    # CPU로 데이터 이동
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    
    # 출력 로짓과 라벨을 비교하여 정확도 계산
    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    eval_accuracy += tmp_eval_accuracy
    nb_eval_steps += 1

print("")
print("Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
print("Test took: {:}".format(format_time(time.time() - t0)))

  Batch   100  of  1,563.    Elapsed: 0:00:07.
  Batch   200  of  1,563.    Elapsed: 0:00:14.
  Batch   300  of  1,563.    Elapsed: 0:00:22.
  Batch   400  of  1,563.    Elapsed: 0:00:29.
  Batch   500  of  1,563.    Elapsed: 0:00:36.
  Batch   600  of  1,563.    Elapsed: 0:00:43.
  Batch   700  of  1,563.    Elapsed: 0:00:51.
  Batch   800  of  1,563.    Elapsed: 0:00:58.
  Batch   900  of  1,563.    Elapsed: 0:01:05.
  Batch 1,000  of  1,563.    Elapsed: 0:01:12.
  Batch 1,100  of  1,563.    Elapsed: 0:01:20.
  Batch 1,200  of  1,563.    Elapsed: 0:01:27.
  Batch 1,300  of  1,563.    Elapsed: 0:01:34.
  Batch 1,400  of  1,563.    Elapsed: 0:01:42.
  Batch 1,500  of  1,563.    Elapsed: 0:01:49.

Accuracy: 0.87
Test took: 0:01:53


# 새로운 문장 평가

In [70]:
# 입력 데이터 변환
def convert_input_data(sentences):

    # BERT의 토크나이저로 문장을 토큰으로 분리
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]

    # 입력 토큰의 최대 시퀀스 길이
    MAX_LEN = 128

    # 토큰을 숫자 인덱스로 변환
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    
    # 문장을 MAX_LEN 길이에 맞게 자르고, 모자란 부분을 패딩 0으로 채움
    input_ids = pad_sequences_keras(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

    # 어텐션 마스크 초기화
    attention_masks = []

    # 어텐션 마스크를 패딩이 아니면 1, 패딩이면 0으로 설정
    # 패딩 부분은 BERT 모델에서 어텐션을 수행하지 않아 속도 향상
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)

    # 데이터를 파이토치의 텐서로 변환
    inputs = torch.LongTensor(input_ids)
    masks = torch.LongTensor(attention_masks)

    return inputs, masks

In [71]:
# 문장 테스트
def test_sentences(sentences):

    # 평가모드로 변경
    model.eval()

    # 문장을 입력 데이터로 변환
    inputs, masks = convert_input_data(sentences)

    # 데이터를 GPU에 넣음
    b_input_ids = inputs.to(device)
    b_input_mask = masks.to(device)
            
    # 그래디언트 계산 안함
    with torch.no_grad():     
        # Forward 수행
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)

    # 로스 구함
    logits = outputs[0]

    # CPU로 데이터 이동
    logits = logits.detach().cpu().numpy()

    return logits

In [74]:
text = '연기는 조금 별로였지만 재미는 있었다!!'
logits = test_sentences([text])

print(logits)
print(np.argmax(logits))

[[-2.9139855  3.0741386]]
1


In [73]:
logits = test_sentences(['주연배우가 아깝다. 총체적 난국...'])

print(logits)
print(np.argmax(logits))

[[ 3.19841   -3.2974944]]
0


In [79]:
def predict(text):
    logits = test_sentences([text])
    label = np.argmax(logits)
    return '긍정' if label == 1 else '부정'

In [85]:
predict('이 상품 괜찮네요!')

'긍정'

In [87]:
torch.save(model.state_dict(), 'bert200724.pt')

In [92]:
import pickle

with open('bertconfig200724.pkl', 'wb') as f:
    pickle.dump(model.config, f, protocol=pickle.HIGHEST_PROTOCOL)

In [93]:
with open('bertconfig200724.pkl', 'rb') as f:
    config = pickle.load(f)

In [95]:
model_ = BertForSequenceClassification(config)

In [97]:
model_.load_state_dict(torch.load('bert200724.pt'))
model_.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [99]:
# 문장 테스트
def test_sentences(sentences, model):

    # 평가모드로 변경
    model.eval()

    # 문장을 입력 데이터로 변환
    inputs, masks = convert_input_data(sentences)

    # 데이터를 GPU에 넣음
    b_input_ids = inputs.to(device)
    b_input_mask = masks.to(device)
            
    # 그래디언트 계산 안함
    with torch.no_grad():     
        # Forward 수행
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)

    # 로스 구함
    logits = outputs[0]

    # CPU로 데이터 이동
    logits = logits.detach().cpu().numpy()

    return logits

def predict(text, model):
    logits = test_sentences([text], model)
    label = np.argmax(logits)
    return '긍정' if label == 1 else '부정'

In [101]:
model_.cuda()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [102]:
predict('오 이 상품 괜찮네요', model_)

'긍정'

In [104]:
torch.__version__

'1.5.1+cu92'