In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
sys.path.append('drive/My Drive/Colab Notebooks/KoGPT_Wellness/')

In [3]:
!pip install kogpt2-transformers==0.3.0
!pip install transformers==3.0.2
!pip install torch
!pip install tokenizers==0.8.1rc1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
import os
import numpy as np
import torch
import torch.nn as nn
from kogpt2_transformers import get_kogpt2_tokenizer, get_kogpt2_model

In [5]:
# root_path='drive/My Drive/Colab Notebooks/dialogLM'
save_path = "/content/drive/MyDrive/Colab Notebooks/KoGPT_Wellness/kogpt-wellness-autoregressive.pth"

ctx = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(ctx)

# 저장한 Checkpoint 불러오기
checkpoint = torch.load(save_path, map_location=device)

# 1. Greedy Search

In [6]:
class KoGPT2Dialogue(nn.Module):
  def __init__(self):
    super(KoGPT2Dialogue, self).__init__()
    self.kogpt2 = get_kogpt2_model()

  def generate(self,
               input_ids,
               max_length= 50
               ):
      
    return self.kogpt2.generate(input_ids,
               max_length=max_length
              )

  def forward(self, input, labels = None):
    if labels is not None:
      outputs = self.kogpt2(input, labels=labels)
    else:
      outputs = self.kogpt2(input)

    return outputs

In [7]:
greedy = KoGPT2Dialogue()
greedy.load_state_dict(checkpoint['model_state_dict'])

greedy.eval()

tokenizer = get_kogpt2_tokenizer()

In [8]:
while 1:
  sent = input('Question: ')  
  tokenized_indexs = tokenizer.encode(sent)

  if '종료' in sent:
            break

  input_ids = torch.tensor([tokenizer.bos_token_id,]  + tokenized_indexs +[tokenizer.eos_token_id]).unsqueeze(0)
  sample_output = greedy.generate(input_ids=input_ids)

  print("Answer: " + tokenizer.decode(sample_output[0].tolist()[len(tokenized_indexs)+1:],skip_special_tokens=True))
  print(100 * '-')

Question: 요즘 너무 우울해...




Answer: 우울할 때는 칭찬타임! 오늘의 잘한 일을 말해봐요.당신이 행복하다면 나도 기뻐요. 당신은 행복하죠. 당신에게 행복한 일이 더 많이 있었으면 좋겠어요
----------------------------------------------------------------------------------------------------
Question: 종료


## 2. Beam

In [10]:
class KoGPT2Dialogue(nn.Module):
  def __init__(self):
    super(KoGPT2Dialogue, self).__init__()
    self.kogpt2 = get_kogpt2_model()

  def generate(self,
               input_ids,
               num_beams = 5,
               max_length= 50,
               early_stopping=True
               ):
      
    return self.kogpt2.generate(input_ids,
               num_beams = num_beams,
               max_length=max_length
              )

  def forward(self, input, labels = None):
    if labels is not None:
      outputs = self.kogpt2(input, labels=labels)
    else:
      outputs = self.kogpt2(input)

    return outputs

In [11]:
beam = KoGPT2Dialogue()
beam.load_state_dict(checkpoint['model_state_dict'])

beam.eval()

tokenizer = get_kogpt2_tokenizer()

In [12]:
while 1:
  sent = input('Question: ')  
  tokenized_indexs = tokenizer.encode(sent)

  if '종료' in sent:
            break

  input_ids = torch.tensor([tokenizer.bos_token_id,]  + tokenized_indexs +[tokenizer.eos_token_id]).unsqueeze(0)
  sample_output = beam.generate(input_ids=input_ids)

  print("Answer: " + tokenizer.decode(sample_output[0].tolist()[len(tokenized_indexs)+1:],skip_special_tokens=True))
  print(100 * '-')

Question: 요즘 너무 우울해...




Answer: 우울함은 저절로 없어지기도 하지만 그렇지 않을 때도 있어요. 그럴 때는 전문가에게 도움을 요청하는 것도 좋은 방법이에요. 전문가와 상담을 받아보신 적이 있나요? 힘이 들 땐 도움을 받는 게 좋아요.
----------------------------------------------------------------------------------------------------
Question: 종료


# 3. Sampling 방식

In [16]:
class KoGPT2Dialogue(nn.Module):
  def __init__(self):
    super(KoGPT2Dialogue, self).__init__()
    self.kogpt2 = get_kogpt2_model()

  def generate(self,
               input_ids,
               do_sample=True,
               max_length= 50,
               top_p=0.96,
               top_k=50,
               temperature= 0.8,
               early_stopping= True,
               ):
      
    return self.kogpt2.generate(input_ids,
               do_sample=do_sample,
               max_length=max_length,
               top_p = top_p,
               top_k=top_k,
               temperature=temperature,
               early_stopping = early_stopping
              )

  def forward(self, input, labels = None):
    if labels is not None:
      outputs = self.kogpt2(input, labels=labels)
    else:
      outputs = self.kogpt2(input)

    return outputs

In [17]:
sample = KoGPT2Dialogue()
sample.load_state_dict(checkpoint['model_state_dict'])

sample.eval()

tokenizer = get_kogpt2_tokenizer()

In [18]:
while 1:
  sent = input('Question: ')  
  tokenized_indexs = tokenizer.encode(sent)

  if '종료' in sent:
            break

  input_ids = torch.tensor([tokenizer.bos_token_id,]  + tokenized_indexs +[tokenizer.eos_token_id]).unsqueeze(0)
  sample_output = sample.generate(input_ids=input_ids)

  print("Answer: " + tokenizer.decode(sample_output[0].tolist()[len(tokenized_indexs)+1:],skip_special_tokens=True))
  print(100 * '-')

Question: 요즘 너무 우울해...




Answer: 이해해요. 아무 이유 없이 우울할 때가 있죠. 우울할 때는 칭찬타임! 오늘의 잘한 일을 말해봐요. 우울할 때는 칭찬타임! 오늘의 잘한 일을 말해봐요
----------------------------------------------------------------------------------------------------
Question: 종료
