In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm import tqdm, tqdm_notebook

In [2]:
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

> 모델 및 KoBERT의 BOW 불러오기

In [4]:
bertmodel, vocab = get_pytorch_kobert_model()

using cached model
using cached model


> KoBERT의 tokenizer 불러오기

In [5]:
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

using cached model


> 문장을 tokenizing 및 token to id로 변환하기

In [6]:
max_len = 64
batch_size = 64
warmup_ratio = 0.1
num_epochs = 5
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

In [7]:
transform = nlp.data.BERTSentenceTransform(
            tok, max_seq_length=max_len, pad=True, pair=False)

In [8]:
sample = "오늘은 날씨가 좋습니다"
print(tok(sample))
sentences = transform([sample])
print(sentences)

['▁오늘', '은', '▁날씨', '가', '▁좋', '습니다']
(array([   2, 3419, 7086, 1408, 5330, 4204, 6701,    3,    1,    1,    1,
          1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
          1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
          1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
          1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
          1,    1,    1,    1,    1,    1,    1,    1,    1]), array(8), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))


> KoBERT를 위한 모델 구축

In [52]:
class BERTClassifier(nn.Module):
    def __init__(self, bert):
        super(BERTClassifier, self).__init__()
        self.bert = bert
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        a, b = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        
        return (a, b)

In [53]:
model = BERTClassifier(bertmodel)

In [54]:
token_ids  = torch.LongTensor(sentences[0]).reshape(1, -1)
valid_length = sentences[1].reshape(1)
segment_ids = torch.LongTensor(sentences[2]).reshape(1,-1)
output = model(token_ids, valid_length, segment_ids)

In [55]:
print(output[0])

tensor([[[ 0.3828,  0.0767,  0.1623,  ..., -0.0174,  0.3067,  0.1588],
         [ 0.4204,  0.2880,  0.3486,  ...,  0.1764, -0.2831,  0.1037],
         [ 0.4132,  0.3160,  0.0253,  ..., -0.0364, -0.4628, -0.0692],
         ...,
         [ 0.2153, -0.2118,  0.5839,  ..., -0.3280, -0.0882, -0.2605],
         [ 0.2153, -0.2118,  0.5839,  ..., -0.3280, -0.0882, -0.2605],
         [ 0.2153, -0.2118,  0.5839,  ..., -0.3280, -0.0882, -0.2605]]],
       grad_fn=<NativeLayerNormBackward>)


In [56]:
print(output[0].shape)

torch.Size([1, 64, 768])
