In [88]:
# main reference
# https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

import random
import torch
import torch.nn as nn
import torch.optim as optim

from torch.autograd import Variable
import torch.nn.functional as F

torch.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

raw = ["I feel hungry.	나는 배가 고프다.",
       "Pytorch is very easy.	파이토치는 매우 쉽다.",
       "Pytorch is a framework for deep learning.	파이토치는 딥러닝을 위한 프레임워크이다.",
       "Pytorch is very clear to use.	파이토치는 사용하기 매우 직관적이다."]

SOS_token = 0
EOS_token = 1

MAX_LENGTH = 50

raw

['I feel hungry.\t나는 배가 고프다.',
 'Pytorch is very easy.\t파이토치는 매우 쉽다.',
 'Pytorch is a framework for deep learning.\t파이토치는 딥러닝을 위한 프레임워크이다.',
 'Pytorch is very clear to use.\t파이토치는 사용하기 매우 직관적이다.']

#### 단어사전 Class

In [89]:
class Vocab:
    def __init__(self):                                                                                             # 'hello seq to seq model '
        self.vocab2index = {"<SOS>": SOS_token, "<EOS>": EOS_token}     # {'<SOS>': 0, '<EOS>': 1, 'hello': 2, 'seq': 3, 'to': 4, 'model': 5, '': 6}
        self.index2vocab = {SOS_token: "<SOS>", EOS_token: "<EOS>"}     # {0: '<SOS>', 1: '<EOS>', 2: 'hello', 3: 'seq', 4: 'to', 5: 'model', 6: ''}
        self.vocab_count = {}                                                                                  # {'hello': 1, 'seq': 2, 'to': 1, 'model': 1, '': 1}
        self.n_vocab = len(self.vocab2index)                                                       # 7

    def add_vocab(self, sentence):
        for word in sentence.split(" "):
            if word not in self.vocab2index:
                self.vocab2index[word] = self.n_vocab
                self.vocab_count[word] = 1
                self.index2vocab[self.n_vocab] = word
                self.n_vocab += 1
            else:
                self.vocab_count[word] += 1

##### 단어사전 test

In [90]:
test_sentence = 'hello seq to seq model '

test_vocab = Vocab()
test_vocab.add_vocab(test_sentence)
print("vocab2index: \t", test_vocab.vocab2index)
print("index2vocab: \t", test_vocab.index2vocab)
print("vocab_count: \t", test_vocab.vocab_count)
print("n_vocab: \t\t", test_vocab.n_vocab)

vocab2index: 	 {'<SOS>': 0, '<EOS>': 1, 'hello': 2, 'seq': 3, 'to': 4, 'model': 5, '': 6}
index2vocab: 	 {0: '<SOS>', 1: '<EOS>', 2: 'hello', 3: 'seq', 4: 'to', 5: 'model', 6: ''}
vocab_count: 	 {'hello': 1, 'seq': 2, 'to': 1, 'model': 1, '': 1}
n_vocab: 		 7


#### filter_pair : pair(src, tgt)가 모두 Max Length 이하인지 여부

In [91]:
def filter_pair(pair, source_max_length, target_max_length):
    return len(pair[0].split(" ")) < source_max_length and len(pair[1].split(" ")) < target_max_length

In [92]:
pairs = []
for line in raw:
    pairs.append([s for s in line.strip().lower().split("\t")])
print("Read {} sentence pairs".format(len(pairs)))
print(pairs[0])

print(filter_pair(pairs[0], 10, 12))
print(filter_pair(pairs[0], 3, 3))

Read 4 sentence pairs
['i feel hungry.', '나는 배가 고프다.']
True
False


### 전처리 

In [93]:
def preprocess(corpus, source_max_length, target_max_length):

    # 문장별로 pair 간단히 만들어줌
    print("[preprocess] reading corpus...")
    pairs = []
    for line in corpus:
        pairs.append([s for s in line.strip().lower().split("\t")])
    print("[preprocess] Read {} sentence pairs".format(len(pairs)))

    # (src, tgt) 둘다 max length 이하인 경우만 통과
    pairs = [pair for pair in pairs if filter_pair(pair, source_max_length, target_max_length)]
    print("[preprocess] Trimmed to {} sentence pairs".format(len(pairs)))

    # vocab 2개 생성
    source_vocab = Vocab() 
    target_vocab = Vocab()

    # src들 → src_vocab에 추가, tgt들 → tgt_vocab에 추가
    print("[preprocess] Counting words...")
    for pair in pairs:
        source_vocab.add_vocab(pair[0])
        target_vocab.add_vocab(pair[1])
    print("[preprocess] source vocab size =", source_vocab.n_vocab)
    print("[preprocess] target vocab size =", target_vocab.n_vocab)

    # 리턴, pairs원본, 소스모음vocab, 타겟모음vocab
    return pairs, source_vocab, target_vocab

In [94]:
preprocess(raw, 10, 12)

[preprocess] reading corpus...
[preprocess] Read 4 sentence pairs
[preprocess] Trimmed to 4 sentence pairs
[preprocess] Counting words...
[preprocess] source vocab size = 17
[preprocess] target vocab size = 13


([['i feel hungry.', '나는 배가 고프다.'],
  ['pytorch is very easy.', '파이토치는 매우 쉽다.'],
  ['pytorch is a framework for deep learning.', '파이토치는 딥러닝을 위한 프레임워크이다.'],
  ['pytorch is very clear to use.', '파이토치는 사용하기 매우 직관적이다.']],
 <__main__.Vocab at 0x7f2995468438>,
 <__main__.Vocab at 0x7f2995468198>)

In [147]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def initHidden(self):
        zero_init = Variable(torch.zeros(1, 1, self.hidden_size)).to(device)    
        return zero_init

In [121]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))                        # 3D → 2D  # tensor([[[-0.1669, ... ,  0.4492]]])  → tensor([[-0.1669, ... ,  0.4492]]) ※ 요소 : 16개(hidden_size)
        return output, hidden
    
    def initSoS(self):
        sos_init = Variable(torch.Tensor([[SOS_token]]).long().to(device))
        return sos_init

In [122]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size  # 추가
        self.dropout_p   = dropout_p    # 추가
        self.max_length = max_length # 추가
        
        self.embedding = nn.Embedding(output_size, hidden_size)

        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)                   # attn (Linear) Layer추가
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) # attn_combine (Linear) Layer추가
        self.dropout = nn.Dropout(self.dropout_p)                                               # dropout Layer추가
        
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)                                                   # 주석 처리

    def forward(self, input, hidden, encoder_outputs):  # encoder_outputs 추가
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        
        ### Attention Decoder 구조 추가
        embedded = self.dropout(embedded)
        attn_weights = F.softmax( self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1 )
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)
        output = F.relu(output)
        ### 
        
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))                        # 3D → 2D ( 한겹 벗기기 ) ※ 요소 : 16개(hidden_size)
        return output, hidden, attn_weights                                  # attn_weights  리턴 추가
    
    def initSoS(self):
        sos_init = Variable(torch.Tensor([[SOS_token]]).long().to(device))
        return sos_init

#### 토큰화

In [123]:
def tensorize(vocab, sentence):
    indexes = [vocab.vocab2index[word] for word in sentence.split(" ")]
    indexes.append(vocab.vocab2index["<EOS>"])
    return torch.Tensor(indexes).long().to(device).view(-1, 1) # 특성1개 모양으로 : [2, 3, 4, 3, 5, 6, 1] → [ [2], [3], [4], [3], [5], [6], [1] ] 

##### 토큰화 테스트

In [124]:
print(test_sentence)
print(test_vocab.index2vocab)
tensorize(test_vocab, test_sentence)

hello seq to seq model 
{0: '<SOS>', 1: '<EOS>', 2: 'hello', 3: 'seq', 4: 'to', 5: 'model', 6: ''}


tensor([[2],
        [3],
        [4],
        [3],
        [5],
        [6],
        [1]])

##### n_iter만큼 랜덤 묶음 batch만큼 Pair 구성 테스트

In [125]:
[random.choice(pairs) for _ in range(10)]

[['pytorch is very clear to use.', '파이토치는 사용하기 매우 직관적이다.'],
 ['i feel hungry.', '나는 배가 고프다.'],
 ['pytorch is very easy.', '파이토치는 매우 쉽다.'],
 ['i feel hungry.', '나는 배가 고프다.'],
 ['pytorch is very clear to use.', '파이토치는 사용하기 매우 직관적이다.'],
 ['pytorch is very easy.', '파이토치는 매우 쉽다.'],
 ['pytorch is very clear to use.', '파이토치는 사용하기 매우 직관적이다.'],
 ['pytorch is very clear to use.', '파이토치는 사용하기 매우 직관적이다.'],
 ['i feel hungry.', '나는 배가 고프다.'],
 ['i feel hungry.', '나는 배가 고프다.']]

### Train

In [144]:
def train(pairs, source_vocab, target_vocab, encoder, decoder, n_iter, print_every=1000, learning_rate=0.01, max_length=MAX_LENGTH):
    loss_total = 0

    # opt, loss  설정
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()
    
    # 배치단위로 src, tgt를 토큰화
    training_batch = [random.choice(pairs) for _ in range(n_iter)] # n_iter만큼 랜덤묶음 batch 구성해줌
    training_source = [tensorize(source_vocab, pair[0]) for pair in training_batch]
    training_target = [tensorize(target_vocab, pair[1]) for pair in training_batch]

    # batch 단위로 보면 될 듯(?)
    for i in range(1, n_iter + 1):
        source_tensor = training_source[i - 1] # idx를 1부터 시작해서 1씩 빼준 것 뿐
        target_tensor = training_target[i - 1]   
        
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        source_length = source_tensor.size(0)
        target_length = target_tensor.size(0)

        loss = 0
        
        # 최초 hidden은 0으로 채워진 모양으로 준비
        encoder_hidden=encoder.initHidden()        
        
        #attn 추가
        encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size).to(device))
        
        # source문장 돌면서 토큰단위로 Encoding
        for ei in range(source_length):                                 
            encoder_output, encoder_hidden = encoder(source_tensor[ei], encoder_hidden) # encoder_output 활용 시작         
            encoder_outputs[ei] = encoder_output[0][0]             
    
        # Decoding 준비
        decoder_input = decoder.initSoS()
        decoder_hidden = encoder_hidden

        # target문장 돌면서 Decoder Loss →  훈련(back, step)
        for di in range(target_length):                                                          # 시작 [ output의 마지막 hidden →연결→ decoder의 hidden ]                                                
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)             # enc_out 입력, weight 출력 부분 추가
            loss += criterion(decoder_output, target_tensor[di])              # 이후 결과로 나온 decoder_hidden→ 다음 hidden으로 계속 활용                
            decoder_input = target_tensor[di]  # teacher forcing
            
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()

        loss_iter = loss.item() / target_length
        loss_total += loss_iter

        if i % print_every == 0:
            loss_avg = loss_total / print_every
            loss_total = 0
            print("[{} - {}%] loss = {:05.4f}".format(i, i / n_iter * 100, loss_avg))

In [136]:
#  # RuntimeError: legacy constructor expects device type: cpubut device type: cuda was passed
#  torch.Tensor([0], device=device)

torch.Tensor([0]).to(device)

tensor([0.])

### 평가

In [149]:
def evaluate(pairs, source_vocab, target_vocab, encoder, decoder, target_max_length, max_length=MAX_LENGTH):
    for pair in pairs:
        print(">", pair[0])
        print("=", pair[1])

        source_tensor = tensorize(source_vocab, pair[0])
        source_length = source_tensor.size()[0]
        
        # Encoding 준비
        encoder_hidden = encoder.initHidden()

        #attn 추가
        encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size).to(device))

        # source문장 돌면서 Encoding     
        for ei in range(source_length):       
            encoder_output, encoder_hidden = encoder(source_tensor[ei], encoder_hidden)             #  encoder_output 이제 attnDecoder에 활용 
            encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]                

        # Decoding 준비
        decoder_input = decoder.initSoS()   
        decoder_hidden = encoder_hidden                                                      
        
        # Decoding
        decoded_words = []
        for di in range(target_max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)             # enc_out 입력, weight 출력 부분 추가
            _, top_index = decoder_output.data.topk(1)
            
            # 디코더 끝이면(<EOS>) 종료
            if top_index.item() == EOS_token:
                decoded_words.append("<EOS>")
                break
            else:
                decoded_words.append(target_vocab.index2vocab[top_index.item()])

            decoder_input = top_index.squeeze().detach()

        predict_words = decoded_words
        predict_sentence = " ".join(predict_words)
        print("<", predict_sentence)
        print("")

### main

In [150]:
SOURCE_MAX_LENGTH = 10
TARGET_MAX_LENGTH = 12

load_pairs, load_source_vocab, load_target_vocab = preprocess(raw, SOURCE_MAX_LENGTH, TARGET_MAX_LENGTH)
print("\n▶Pair모양 확인(random) : ", random.choice(load_pairs))

print("\n▶인코더/디코더 선언")
enc_hidden_size = 16
dec_hidden_size = enc_hidden_size

enc = EncoderRNN(load_source_vocab.n_vocab, enc_hidden_size).to(device)
# dec = DecoderRNN(dec_hidden_size, load_target_vocab.n_vocab).to(device)
attn_dec = AttnDecoderRNN(dec_hidden_size, load_target_vocab.n_vocab, dropout_p=0.1).to(device)

print("\n▶훈련시작 >>>>>>>>>>> train()")
train(load_pairs, load_source_vocab, load_target_vocab, enc, attn_dec, 100, print_every=1000)

print("\n▶전체 평가 >>>>>>>>>>> evaluate()")
evaluate(load_pairs, load_source_vocab, load_target_vocab, enc, attn_dec, TARGET_MAX_LENGTH)

[preprocess] reading corpus...
[preprocess] Read 4 sentence pairs
[preprocess] Trimmed to 4 sentence pairs
[preprocess] Counting words...
[preprocess] source vocab size = 17
[preprocess] target vocab size = 13

▶Pair모양 확인(random) :  ['i feel hungry.', '나는 배가 고프다.']

▶인코더/디코더 선언

▶훈련시작 >>>>>>>>>>> train()

▶전체 평가 >>>>>>>>>>> evaluate()
> i feel hungry.
= 나는 배가 고프다.
< <EOS>

> pytorch is very easy.
= 파이토치는 매우 쉽다.
< <EOS>

> pytorch is a framework for deep learning.
= 파이토치는 딥러닝을 위한 프레임워크이다.
< <EOS>

> pytorch is very clear to use.
= 파이토치는 사용하기 매우 직관적이다.
< <EOS>

