In [6]:
import torch 
import torch.nn as nn 
import torch.optim as optim
import numpy as np 
import random

import matplotlib.pyplot as plt 

%matplotlib inline

torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
raw = [
    'I feel hungry.  나는 배가 고프다',
    'Pytorch is very easy.  파이토치는 매우 쉽다.', 
    'Pytorch is a framework for depp learning.  파이토치는 딥러닝을 위한 프레임워크다.', 
    'Pytorch is very clear to use.  파이토치는 사용하기 매우 직관적이다.']

# SOS : start of sentence
SOS_token = 0
# EOS : end of sentence
EOS_token = 1

In [None]:
def preprocessing(corpus, source_max_length, target_max_length):
    print('reading corpus....')
    pairs = []

    for line in corpus:
        # 양쪽의 공백을 제거해주고, 소문자로 변환후 tab을 기준으로 split
        pairs.append([s for s in line.strip().lower().split('\t')])

        pairs = [pair for pair in pairs if filter_pair(pair, source_max_length, target_max_length)]

        source_vocab = Vocab()
        target_vocab = Vocab()

    for pair in pairs:
        source_vocab.add_vocab(pair[0])
        target_vocab.add_vocab(pair[1])
    
    return pairs, source_vocab, target_vocab

In [9]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size 
        self.input_size = input_size 
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, x, hidden):
        x = self.embedding(x).view(1, 1, -1)
        x, hidden = self.gru(x, hidden)
        return x, hidden


class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()

        self.hidden_size = hidden_size 
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.gru(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax(nn.LogSoftmax(dim = 1))

    def forward(self, x, hidden):
        x = self.embedding(x).view(1, 1, -1)
        x, hidden = self.gru(x, hidden)
        x = self.softmax(self.out(x[0]))

        return x, hidden

def tensorize(vocab, sentence):
    indexes = [vocab.vocab2index[word] for word in sentence.split(' ')]
    indexes.append(vocab.vocab2index['<EOS>'])
    return torch.Tensor(indexes).long().to(device).view(-1, 1)

def train(pairs, source_vocab, target_vocab, encoder, decoder, n_iter, print_every=1000, learning_rate = 0.01):
    loss_total = 0

    encoder_optimizer = optim.SGD(encoder.parameters(), lr = learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr = learning_rate)

    training_batch = [random.choice(pairs) for _ in range(n_iter)]
    training_source = [tensorize(source_vocab, pair[0]) for pair in training_batch]
    training_target = [tensorize(target_vocab, pair[1]) for pair in training_batch]

    criterion = nn.NLLoss()

    for i in range(1, n_iter + 1):
        source_tensor = training_source[i-1]
        target_tensor = training_target[i-1]

        # 첫번째 index가 들어갈때에는 hidden state가 없기 때문에 0으로 이루어진 값을 넣어준다.
        encoder_hidden = torch.zeros([1, 1, encoder.hidden_size]).to(device)

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        source_length = source_tensor.size(0)
        target_length = target_tensor.size(0)

        loss = 0

        for enc_input in range(source_length):
            _, encoder_hidden = encoder(source_tensor[enc_input], encoder_hidden)

        decoder_input = torch.Tensor([[SOS_token]]).long().to(device)
        decoder_hidden = encoder_hidden 

        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]

        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        loss_iter = loss.item() / target_length 
        loss_total += loss_iter 

        if i % print_every == 0:
            loss_avg = loss_total / print_every 
            loss_total = 0
            print('[{} - {}%] loss = {:05.4f}'.format(i, i/n_iter * 100, loss_avg ))

In [None]:

source_max_length = 10
target_max_length = 12

load_pairs, load_source_vocab, load_target_vocab = preprocessing(raw, source_max_length, target_max_length)
print(random.choice(load_pairs))

In [None]:
enc_hidden_size = 16
dec_hidden_size = enc_hidden_size 
enc = Encoder(load_source_vocab.n_vocab, enc_hidden_size).to(device)
dec = Decoder(dec_hidden_size, load_target_vocab.n_vocab).to(device)

train(load_pairs, load_source_vocab, load_target_vocab, enc, dec, 5000, print_every=1000)
# evaluate(load_pairs, load_source_vocab, load_target_vocab, enc, dec, target_max_length)