In [213]:
import torch
from transformers import PreTrainedTokenizerFast, GPT2LMHeadModel
tokenizer = PreTrainedTokenizerFast.from_pretrained("skt/kogpt2-base-v2",
                                                    bost_token='</s>', eos_token='</s>', unk_token='<unk>',
                                                    pad_token='<pad>', mask_token='<mask>')
model = GPT2LMHeadModel.from_pretrained('skt/kogpt2-base-v2')

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'GPT2Tokenizer'. 
The class this function is called from is 'PreTrainedTokenizerFast'.


In [None]:
inputs = tokenizer('안녕하세요')
input_ids = torch.LongTensor(inputs['input_ids'])
model(input_ids=input_ids)[0]

tensor([[-5.9339, -5.3134, -4.9179,  ..., -1.1562, -3.8732, -2.3281],
        [-4.3314, -3.9102, -5.0052,  ...,  0.2313, -3.9817, -2.9163],
        [-4.5099, -3.4394, -4.6740,  ..., -1.8925, -4.0105, -5.9007],
        [-5.0462, -4.4081, -5.4466,  ..., -2.0748, -5.0898, -4.6430]],
       grad_fn=<MmBackward0>)

In [None]:
text = '근육이 커지기 위해서는'
inputs_ids = tokenizer.encode(text, return_tensors='pt')
gen_ids = model.generate(inputs_ids, max_length=100, repetition_penalty=2.0,
              pad_token_id=tokenizer.pad_token_id,
              eos_token_id=tokenizer.eos_token_id,
              bos_token_id=tokenizer.bos_token_id,
              use_cache=True)
tokenizer.decode(gen_ids[0])


'근육이 커지기 위해서는 무엇보다 규칙적인 생활습관이 중요하다.\n특히, 아침식사는 단백질과 비타민이 풍부한 과일과 채소를 많이 섭취하는 것이 좋다.\n또한 하루 30분 이상 충분한 수면을 취하는 것도 도움이 된다.\n아침 식사를 거르지 않고 규칙적으로 운동을 하면 혈액순환에 도움을 줄 뿐만 아니라 신진대사를 촉진해 체내 노폐물을 배출하고 혈압을 낮춰준다.\n운동은 하루에 10분 정도만 하는 게 좋으며 운동 후에는 반드시 스트레칭을 통해 근육량을 늘리고 유연성을 높여야 한다.\n운동'

In [214]:
import yaml

with open("./config/config.yaml") as file:
    config = yaml.safe_load(file)

train_path = config['path']['train']
maxlen = config['params']['max_length']
batch_size = config['params']['batch_size']

In [215]:
import numpy as np
import pandas as pd
import tensorflow as tf

df = pd.read_csv(train_path)

In [248]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

train_dataset = np.array([tokenizer.encode(text, max_length=maxlen, padding='max_length', truncation=True, return_tensors='np') for text in df['text']])
label = to_categorical(LabelEncoder().fit_transform(df['label']))

In [226]:
vocab = [0] * 51200
for item in list(tokenizer.vocab.items()):
    vocab[item[1]] = item[0]

In [227]:
embedding = model.transformer.wte.weight.detach().numpy()

In [228]:
model_cls = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(vocab), 768, weights=[embedding], input_length=maxlen, trainable=False),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(8, activation='softmax')
])

model_cls.compile(loss='categorical_crossentropy',
                 optimizer='adam',
                 metrics=['accuracy'])
model_cls.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 512, 768)          39321600  
_________________________________________________________________
dense_10 (Dense)             (None, 512, 128)          98432     
_________________________________________________________________
dense_11 (Dense)             (None, 512, 8)            1032      
Total params: 39,421,064
Trainable params: 99,464
Non-trainable params: 39,321,600
_________________________________________________________________


In [242]:
len(train_dataset), len(label)

(2196, 2196)

In [249]:
model_cls.fit(train_dataset, label, epochs=100, batch_size=4)

Epoch 1/100


ValueError: in user code:

    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:755 train_step
        loss = self.compiled_loss(
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /home/jone/.conda/envs/nlp/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (4, 8) and (4, 1, 512, 8) are incompatible
