In [None]:
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import random
import json
import os

In [None]:
from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [None]:
from torch.utils.data import TensorDataset # 텐서데이터셋
from torch.utils.data import DataLoader # 데이터로더

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string],'')
  plt.xlabel('Epoch')
  plt.ylabel(string)
  plt.legend([string,'val_'+string])
  plt.show()

In [None]:
PATH = "drive/MyDrive/Implementation/Attention/"
OUTPUT_PATH = "drive/MyDrive/Implementation/Attention/"
TRAIN_INPUT = 'train_input.npy'
TRAIN_OUTPUT = 'train_output.npy'
TRAIN_TARGET = 'train_target.npy'
DATA_CONFIGS = 'data_configs.json'

In [None]:
idx_input = np.load(open(PATH+TRAIN_INPUT, 'rb'))
idx_output = np.load(open(PATH+TRAIN_OUTPUT, 'rb'))
idx_target = np.load(open(PATH+TRAIN_TARGET, 'rb'))
config = json.load(open(PATH+DATA_CONFIGS, 'r'))

In [None]:
SEED_NUM = 777
torch.cuda.manual_seed(SEED_NUM)

In [None]:
print(len(idx_input), len(idx_output), len(idx_target))

11823 11823 11823


In [None]:
class Encoder(nn.Module):
  def __init__ (self, input_dim, hidden_dim, embed_dim, num_layers):
    super(Encoder, self).__init__()
    self.input_dim = input_dim
    self.embed_dim = embed_dim
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    
    self.embedding = nn.Embedding(self.input_dim, self.embed_dim)
    self.gru = nn.GRU(self.embed_dim, self.hidden_dim, num_layers=self.num_layers, batch_first=True)

  def forward(self, src):

    embedded = self.embedding(src)
    output, hidden = self.gru(embedded)

    return output, hidden

In [None]:
class Decoder(nn.Module):
  def __init__(self, output_dim, hidden_dim, embed_dim, num_layers):
    super(Decoder, self).__init__()

    self.embed_dim = embed_dim
    self.hidden_dim = hidden_dim
    self.output_dim = output_dim
    self.num_layers = num_layers
    
    self.embedding = nn.Embedding(output_dim, self.embed_dim)
    self.gru = nn.GRU(self.embed_dim, self.hidden_dim, num_layers=self.num_layers, batch_first=True)
    self.out = nn.Linear(self.hidden_dim, output_dim)
    # self.softmax = nn.LogSoftmax(dim=2) # Vanishing gradient

  def forward(self, input, hidden):

    input = input.view(-1,1) # = unsqueeze(0)
    embedded = self.embedding(input)
    output, hidden = self.gru(embedded, hidden)
    prediction = self.out(output)
    # prediction = self.softmax(out)

    return prediction, hidden

In [None]:
class seq2seq(nn.Module):
  def __init__(self, encoder, decoder, device):
    super().__init__()

    self.encoder = encoder
    self.decoder = decoder
    self.device = device

  def forward(self, input_data, output_data, taget_data, teacher_forcing_ratio=0.5):
    
    target_length = input_data.size(1)
    batch_size = input_data.size(0)
    voca_size = self.decoder.output_dim

    outputs = torch.zeros(batch_size, target_length, voca_size).to(self.device)
    
    encoder_output, encoder_hidden = self.encoder(input_data)
    
    decoder_hidden = encoder_hidden

    decoder_input = output_data[:,0]
    
    # Backpropagation 어떻게 됨?
    for t in range(target_length):
      decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
      outputs[:,t,:] = decoder_output.squeeze(1)
      teacher_force = random.random() < teacher_forcing_ratio
      top_1 = decoder_output.argmax(2) # topk
      decoder_input = (taget_data[:,t] if teacher_force else top_1[:,0])

      # if(teacher_force == False and input.item() == 2):
      #   break

    return outputs

In [None]:
def Model(model, input_tensor, output_tensor, target_tensor, model_optimizer, criterion):

  model_optimizer.zero_grad()

  loss = 0
  epoch_loss = 0

  output = model(input_tensor, output_tensor, target_tensor)

  output_ = output.view(-1,output.shape[-1])
  target_ = F.one_hot(target_tensor, num_classes=voca_size)
  target_ = target_.view(-1, target_.shape[-1]).type(torch.FloatTensor).to(device)

  loss = criterion(output_, target_)

  loss.backward()
  model_optimizer.step()
  epoch_loss = loss.item()

  return epoch_loss

In [None]:
def TrainModel(model, train_data, epoch=1000):
  
  optimizer = optim.Adam(model.parameters(), lr=0.001) # SGD
  criterion = nn.CrossEntropyLoss()

  model.train()

  for iter in range(epoch):
    loss = 0
    for input, output, target in tqdm(train_data):
      loss += Model(model, input, output, target, optimizer, criterion)
    
    if iter%1 == 0:
      print('iteration :%d\ntrain_loss : %.4f' % (iter, loss/len(train_data)))

  return model

In [None]:
# MODEL_NAME = 'seq2seq_kr'
# MAX_SEQUENCE = 20
EPOCH = 50
UNITS = 128
NUM_LAYERS = 2
BATCH_SIZE = 64
EMBEDDING_DIM = 128
# VALIDATION_SPLIT = 0.2

word2idx = config['word2idx']
idx2word = config['idx2word']
std_idx = config['std_symbol']
end_idx = config['end_symbol']
voca_size = config['voca_size']

In [None]:
train_input = torch.LongTensor(idx_input[1823:]).to(device)
train_output = torch.LongTensor(idx_output[1823:]).to(device)
train_target = torch.LongTensor(idx_target[1823:]).to(device)
test_input = torch.LongTensor(idx_input[:1823]).to(device)
test_output = torch.LongTensor(idx_output[:1823]).to(device)
test_target = torch.LongTensor(idx_target[:1823]).to(device)

In [None]:
dataset = TensorDataset(train_input, train_output, train_target)
train = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
enc = Encoder(input_dim = voca_size, hidden_dim=UNITS, embed_dim=EMBEDDING_DIM, num_layers=NUM_LAYERS).cuda()
dec = Decoder(output_dim = voca_size, hidden_dim=UNITS, embed_dim=EMBEDDING_DIM, num_layers=NUM_LAYERS).cuda()

In [None]:
model = seq2seq(enc, dec, device)
model.cuda()
print(enc)
print(dec)

In [None]:
Model = TrainModel(model, train, epoch=EPOCH)

In [None]:
def evaluation(model, input_data, output_data, target_data):

  model.eval()
  with torch.no_grad():

    decoder_words = []

    output = model(input_data, output_data, target_data, teacher_forcing_ratio=0)
    for bt in range(output.size(0)): # batch size
      for sl in range(output.size(1)): # seq_len
        if output[bt][sl].argmax(0).item() == 2:
          decoder_words.append(2)
          break
        else:
          decoder_words.append(output[:,sl,:].argmax(1).item())

  return decoder_words

In [None]:
def idxtoword(sentence):
  
  seq_temp = []
  for i in sentence:
    seq_temp.append(idx2word[str(i.item())])
  
  return " ".join(seq_temp)

In [None]:
def pre_idxtoword(sentence):
  
  seq_temp = []
  for i in sentence:
    seq_temp.append(idx2word[str(i)])
  
  return " ".join(seq_temp)

In [None]:
def evaluationRandomly(model, input_data, output_data, target_data):
  for i,j,k in zip(input_data, output_data, target_data):
    print('input : {}'.format(idxtoword(i)))
    print('output : {}'.format(idxtoword(j)))
    print('target : {}'.format(idxtoword(k)))
    output = evaluation(model,input_data,output_data,target_data)
    print('predicted : {}'.format(pre_idxtoword(output)))

In [None]:
# 학습 데이터 : 데이터 shape : [1,20] 이여야함.
idx = 123
train_input_ = train_input[idx:idx+1]
train_output_ = train_output[idx:idx+1]
train_target_ = train_target[idx:idx+1]

In [None]:
# 테스트 데이터 : 데이터 shape : [1,20] 이여야함.
ind = 321
test_input_ = test_input[ind:ind+1]
test_output_ = test_output[ind:ind+1]
test_target_ = test_target[ind:ind+1]

In [None]:
# 결과 확인
evaluationRandomly(model, train_input_, train_output_, train_target_)

In [None]:
# 결과 확인
# 처음 본 질문에는 답을 이상하게 함
evaluationRandomly(model, test_input_, test_output_, test_target_)