In [90]:
'''
  code by Minho Ryu @bzantium
  reference : https://github.com/graykode/nlp-tutorial/blob/master/4-1.Seq2Seq/Seq2Seq_Torch.ipynb
  
'''
import numpy as np
import torch
import torch.nn as nn

from torch import LongTensor as LT

# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps

char_arr = list('SEPabcdefghijklmnopqrstuvwxyz')
num_dic = {n: i for i, n in enumerate(char_arr)}

seq_data = [['man', 'woman'], ['men', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]

# Seq2Seq Parameter
vocab_size = len(num_dic)
n_embed = 5
n_step = 5
n_hidden = 128
n_class = len(num_dic) # number of class(=number of vocab)

def make_batch(seq_data):
    input_batch, output_batch, target_batch = [], [], []

    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))

        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]

        input_batch.append(input)
        output_batch.append(output)
        target_batch.append(target)

    return LT(input_batch), LT(output_batch), LT(target_batch)


# Model
class Seq2Seq(nn.Module):
    def __init__(self, vocab_size, n_embed, n_hidden, n_class):
        super(Seq2Seq, self).__init__()

        self.enc_lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden)
        self.dec_lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden)
        self.embedding = nn.Embedding(vocab_size, n_embed)
        self.linear = nn.Linear(n_hidden, n_class)

    def forward(self, enc_input, dec_input):
        enc_input = self.embedding(enc_input).transpose(0, 1) # enc_input: [n_step, batch_size, n_embed]
        dec_input = self.embedding(dec_input).transpose(0, 1) # dec_input: [n_step+1, batch_size, n_embed]

        _, enc_states = self.enc_lstm(enc_input)
        
        outputs, _ = self.dec_lstm(dec_input, enc_states) # outputs: [n_step+1, batch_size, n_hidden]
        outputs = outputs.transpose(0, 1) # outputs: [batch_size, n_step+1, n_hidden]
        model = self.linear(outputs).transpose(1, 2) # mode: [batch_size, n_class, n_step+1]
        return model


input_batch, output_batch, target_batch = make_batch(seq_data)

model = Seq2Seq(vocab_size, n_embed, n_hidden, n_class)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1000):
    optimizer.zero_grad()
    
    output = model(input_batch, output_batch)
    loss = criterion(output, target_batch)
    if (epoch + 1) % 100 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
        
    loss.backward()
    optimizer.step()


# Test
def translate(word):
    seq_data = [word, 'P' * len(word)]
    
    input_batch, output_batch, _ = make_batch([seq_data])

    # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
    output = model(input_batch, output_batch)
    # output : [max_len+1(=6), batch_size(=1), n_class]
    predict = output.data.max(1, keepdim=True)[1] # select n_class dimension
    decoded = [char_arr[i] for i in predict.squeeze()]
    if 'E' in decoded:
        end = decoded.index('E')
        translated = ''.join(decoded[:end])
    else:
        translated = ''.join(decoded)
    return translated.replace('P', '')

print('test')
print('man ->', translate('man'))
print('men ->', translate('men'))
print('king ->', translate('king'))
print('black ->', translate('black'))
print('upp ->', translate('upp'))

Epoch: 0100 cost = 0.370827
Epoch: 0200 cost = 0.033180
Epoch: 0300 cost = 0.012336
Epoch: 0400 cost = 0.006958
Epoch: 0500 cost = 0.004573
Epoch: 0600 cost = 0.003269
Epoch: 0700 cost = 0.002465
Epoch: 0800 cost = 0.001925
Epoch: 0900 cost = 0.001549
Epoch: 1000 cost = 0.001274
test
man -> woman
men -> women
king -> queen
black -> white
upp -> down
