In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import matplotlib.pyplot as plt

In [5]:
vocublary_size = 256   
x_ = list(map(ord, "hello"))   
y_ = list(map(ord, "hola"))    
print("hello:", x_)
print("hola:", y_)

hello: [104, 101, 108, 108, 111]
hola: [104, 111, 108, 97]


In [6]:
x = torch.LongTensor(x_)
y = torch.LongTensor(y_)

In [7]:
class squence(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(squence, self).__init__()
        self.n_layers = 1
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.encoder = nn.GRU(hidden_size, hidden_size)
        self.decoder = nn.GRU(hidden_size, hidden_size)
        self.project = nn.Linear(hidden_size, vocab_size)

    def forward(self, inputs, targets):
        initial_state = self._init_state()
        embedding = self.embedding(inputs).unsqueeze(1)
        encoder_output, encoder_state = self.encoder(embedding, initial_state)
        decoder_state = encoder_state
        decoder_input = torch.LongTensor([0])
        outputs = []
        for i in range(targets.size()[0]):
            decoder_input = self.embedding(decoder_input).unsqueeze(1)
            decoder_output, decoder_state = self.decoder(decoder_input, decoder_state)
            projection = self.project(decoder_output)
            outputs.append(projection)
            decoder_input = torch.LongTensor([targets[i]])
        outputs = torch.stack(outputs).squeeze()
        return outputs
    
    def _init_state(self, batch_size=1):
        weight = next(self.parameters()).data
        return weight.new(self.n_layers, batch_size, self.hidden_size).zero_()

In [9]:
seq_model = squence(vocublary_size, 16)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(seq_model.parameters(), lr=1e-3)

In [12]:
log = []
for i in range(1000):
    prediction = seq_model(x, y)
    loss = criterion(prediction, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_val = loss.data
    log.append(loss_val)
    if i % 100 == 0:
        print("\n Epoch:%d Error: %s" % (i, loss_val.item()))
        _, top1 = prediction.data.topk(1, 1)
        print([chr(c) for c in top1.squeeze().numpy().tolist()])


 Epoch:0 Error: 0.03135327994823456
['h', 'o', 'l', 'a']

 Epoch:100 Error: 0.026706485077738762
['h', 'o', 'l', 'a']

 Epoch:200 Error: 0.022573135793209076
['h', 'o', 'l', 'a']

 Epoch:300 Error: 0.019104862585663795
['h', 'o', 'l', 'a']

 Epoch:400 Error: 0.016450950875878334
['h', 'o', 'l', 'a']

 Epoch:500 Error: 0.014325978234410286
['h', 'o', 'l', 'a']

 Epoch:600 Error: 0.012592888437211514
['h', 'o', 'l', 'a']

 Epoch:700 Error: 0.011159168556332588
['h', 'o', 'l', 'a']

 Epoch:800 Error: 0.009958023205399513
['h', 'o', 'l', 'a']

 Epoch:900 Error: 0.008939617313444614
['h', 'o', 'l', 'a']
