In [115]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable

In [116]:
dtype = torch.FloatTensor

In [117]:
char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
num_dic = {n: i for i, n in enumerate(char_arr)}

In [118]:
char_arr

['S',
 'E',
 'P',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [119]:
num_dic

{'S': 0,
 'E': 1,
 'P': 2,
 'a': 3,
 'b': 4,
 'c': 5,
 'd': 6,
 'e': 7,
 'f': 8,
 'g': 9,
 'h': 10,
 'i': 11,
 'j': 12,
 'k': 13,
 'l': 14,
 'm': 15,
 'n': 16,
 'o': 17,
 'p': 18,
 'q': 19,
 'r': 20,
 's': 21,
 't': 22,
 'u': 23,
 'v': 24,
 'w': 25,
 'x': 26,
 'y': 27,
 'z': 28}

In [120]:
seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]


In [121]:
seq_data

[['man', 'women'],
 ['black', 'white'],
 ['king', 'queen'],
 ['girl', 'boy'],
 ['up', 'down'],
 ['high', 'low']]

In [122]:
# Seq2seq parameter
n_step = 5
n_hidden = 128
n_class = len(num_dic)
batch_size = len(seq_data)

In [123]:
def make_batch(seq_data):
    input_batch, output_batch, target_batch = [], [], []

    for seq in seq_data:
        for i in range(2):
            seq[i] = seq[i] + 'P' * (n_step - len(seq[i]))

        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]

        input_batch.append(np.eye(n_class)[input])
        output_batch.append(np.eye(n_class)[output])
        target_batch.append(target) # not one-hot

    # make tensor
    return Variable(torch.Tensor(input_batch)), Variable(torch.Tensor(output_batch)), Variable(torch.LongTensor(target_batch))


In [124]:
class Seq2Seq(nn.Module):
    def __init__(self):
        super(Seq2Seq, self).__init__()
        
        self.enc_cell = nn.RNN(input_size = n_class, hidden_size = n_hidden, dropout = 0.5)
        self.dec_cell = nn.RNN(input_size = n_class, hidden_size = n_hidden, dropout = 0.5)
        self.fc = nn.Linear(n_hidden, n_class)
        
    def forward(self,enc_input,enc_hidden,dec_input):
        enc_input = enc_input.transpose(0,1) # enc_input : [seq_len, batch_size, n_class]
        dec_input = dec_input.transpose(0,1) # dec_input : [seq_len+1, batch_size, n_class]
        
        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        _, enc_states = self.enc_cell(enc_input, enc_hidden)
        
        # outputs : [seq_len+1, batchsize, n_hidden]
        outputs,_ = self.dec_cell(dec_input,enc_states)
        
        model = self.fc(outputs) # model : [seq_len+1, batch_size. n_class]
        return model

In [125]:
model = Seq2Seq()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

  "num_layers={}".format(dropout, num_layers))


In [106]:
for epoch in range(5000):
    # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
    hidden = Variable(torch.zeros(1,batch_size,n_hidden))
    # input_batch : [batch_size, seq_len+1, n_class]
    # output_batch : [batch_size, seq_len+1,n_class]
    # target_batch : [batch_size, seq_len+1], not one-hot
    optimizer.zero_grad()
    output = model(input_batch, hidden, output_batch)
    #output : [seq_len+1, batch_size, n_class]
    output = output.transpose(0,1) # [batch_size, seq_len+1,n_class(=29)]
    loss = 0
    for i in range(0,len(target_batch)):
        # output[i] : [seq_len+1,n_class], target_batch : [seq_len+1] 
        loss += criterion(output[i], target_batch[i])
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    loss.backward()
    optimizer.step()

Epoch: 1000 cost = 0.003328
Epoch: 2000 cost = 0.000919
Epoch: 3000 cost = 0.000395
Epoch: 4000 cost = 0.000198
Epoch: 5000 cost = 0.000107


In [127]:
n_class

29

In [137]:
for epoch in range(5000):
    # make hidden shape [num_layers * num_directions, batch_size, n_hidden]
    hidden = Variable(torch.zeros(1,batch_size,n_hidden))
    # input_batch : [batch_size, seq_len+1, n_class]
    # output_batch : [batch_size, seq_len+1,n_class]
    # target_batch : [batch_size, seq_len+1], not one-hot
    optimizer.zero_grad()
    output = model(input_batch, hidden, output_batch)
    #output : [seq_len+1, batch_size, n_class]
    output = output.transpose(0,1) # [batch_size, seq_len+1,n_class(=29)]
    loss = 0

    output = output.contiguous().view(-1,n_class) # output : [batch_size*seq_len+1, n_class(=29)]
    target = target_batch.view(-1)   # target : [batch_size*seq_len+1]
    loss += criterion(output, target)
    if (epoch + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    loss.backward()
    optimizer.step()

Epoch: 1000 cost = 0.000577
Epoch: 2000 cost = 0.000161
Epoch: 3000 cost = 0.000069
Epoch: 4000 cost = 0.000035
Epoch: 5000 cost = 0.000019
