In [51]:
'''
  code by Minho Ryu @bzantium
  
'''
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F

from torch import LongTensor as LT

sentence = (
    'Lorem ipsum dolor sit amet consectetur adipisicing elit '
    'sed do eiusmod tempor incididunt ut labore et dolore magna '
    'aliqua Ut enim ad minim veniam quis nostrud exercitation'
)

word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}
word_dict['<E>'] = len(word_dict)
number_dict = {i: w for w, i in word_dict.items()}
vocab_size = len(word_dict)
n_embed = 5
n_class = len(word_dict)
n_step = len(sentence.split())
n_hidden = 5
n_batch = 32

def make_batch(sentence, n_batch):
    words = sentence.split()
    input_batch = []
    target_batch = []
    for _ in range(n_batch):
        input_batch.append([word_dict[n] for n in words])
        target_batch.append([word_dict[n] for n in (sentence + ' <E>').split()[1:]])

    return LT(input_batch), LT(target_batch)

class BiLSTM(nn.Module):
    def __init__(self, vocab_size, n_embed, n_hidden, n_class):
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, bidirectional=True)
        self.embedding = nn.Embedding(vocab_size, n_embed)
        self.linear = nn.Linear(2*n_hidden, n_class)

    def forward(self, X):
        inputs = self.embedding(X)
        inputs = inputs.transpose(0, 1)

        hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden))   # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
        cell_state = Variable(torch.zeros(1*2, len(X), n_hidden))     # [num_layers(=1) * num_directions(=1), batch_size, n_hidden]

        outputs, (_, _) = self.lstm(inputs, (hidden_state, cell_state))
        outputs = outputs.transpose(0, 1)
        model = self.linear(outputs).transpose(1, 2)
        return model
      
input_batch, target_batch = make_batch(sentence, n_batch)

model = BiLSTM(vocab_size, n_embed, n_hidden, n_class)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
for epoch in range(1000):
    optimizer.zero_grad()
    
    output = model(input_batch)
    loss = criterion(output, target_batch)
    if (epoch + 1) % 100 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()

predict = model(input_batch[0].unsqueeze(0)).data.max(1, keepdim=True)[1]
print(sentence)
print([number_dict[n.item()] for n in predict.squeeze()])

Epoch: 0100 cost = 3.061696
Epoch: 0200 cost = 2.355835
Epoch: 0300 cost = 1.712905
Epoch: 0400 cost = 1.251645
Epoch: 0500 cost = 0.912084
Epoch: 0600 cost = 0.666684
Epoch: 0700 cost = 0.497319
Epoch: 0800 cost = 0.379404
Epoch: 0900 cost = 0.294939
Epoch: 1000 cost = 0.233430
Lorem ipsum dolor sit amet consectetur adipisicing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation
['ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipisicing', 'elit', 'sed', 'do', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', 'magna', 'aliqua', 'Ut', 'enim', 'ad', 'minim', 'veniam', 'quis', 'nostrud', 'exercitation', '<E>']
