In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [5]:
input_str='appl'
label_str='pple'
char_vocab=sorted(list(set(input_str+label_str)))
vocab_size=len(char_vocab)
print(char_vocab, vocab_size)

['a', 'e', 'l', 'p'] 4


In [6]:
input_size=vocab_size
hidden_size=4
output_size=4
learning_rate=0.1

In [7]:
char_to_index=dict((c,i) for i, c in enumerate (char_vocab))
print(char_to_index)

{'a': 0, 'e': 1, 'l': 2, 'p': 3}


In [9]:
index_to_char={}
for key, value in char_to_index.items():
    index_to_char[value]=key
print(index_to_char)

{0: 'a', 1: 'e', 2: 'l', 3: 'p'}


In [10]:
x_data=[char_to_index[c] for c in input_str]
y_data=[char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

[0, 3, 3, 2]
[3, 3, 2, 1]


In [11]:
x_data=[x_data] # 차원을 하나 올린다
y_data=[y_data]
print(x_data, y_data)

[[0, 3, 3, 2]] [[3, 3, 2, 1]]


In [12]:
x_one_hot=[np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.]])]


In [14]:
X=torch.FloatTensor(x_one_hot)
Y=torch.LongTensor(y_data)
print(X.shape, X)
print(Y.shape, Y)

torch.Size([1, 4, 4]) tensor([[[1., 0., 0., 0.],
         [0., 0., 0., 1.],
         [0., 0., 0., 1.],
         [0., 0., 1., 0.]]])
torch.Size([1, 4]) tensor([[3, 3, 2, 1]])


In [17]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.rnn=nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc=nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x, _status=self.rnn(x)
        x=self.fc(x)
        return x

In [18]:
net=Net(input_size, hidden_size, output_size)

outputs=net(X)
print(outputs.shape, outputs)

torch.Size([1, 4, 4]) tensor([[[ 1.0088, -0.4070, -0.0286, -0.4452],
         [ 1.0270, -0.1272,  0.3263, -0.5567],
         [ 0.8653, -0.0964,  0.3555, -0.5505],
         [ 0.9027, -0.1660,  0.2742, -0.5294]]], grad_fn=<ViewBackward0>)


In [20]:
print(outputs.view(-1, input_size).shape)

torch.Size([4, 4])


In [21]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([1, 4])
torch.Size([4])


In [22]:
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(), learning_rate)

In [24]:
for i in range(10):
    optimizer.zero_grad()
    outputs=net(X)
    loss=criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    result=outputs.data.numpy().argmax(axis=2)
    result_str=''.join([index_to_char[c] for c in np.squeeze(result)])
    print(i, "loss", loss.item(), "prediction:", result, "true Y", y_data, "prediction_str:", result_str)

0 loss 1.5138822793960571 prediction: [[0 2 2 2]] true Y [[3, 3, 2, 1]] prediction_str: alll
1 loss 1.33321213722229 prediction: [[0 2 2 2]] true Y [[3, 3, 2, 1]] prediction_str: alll
2 loss 1.1580781936645508 prediction: [[0 2 2 1]] true Y [[3, 3, 2, 1]] prediction_str: alle
3 loss 0.9515467882156372 prediction: [[0 2 2 1]] true Y [[3, 3, 2, 1]] prediction_str: alle
4 loss 0.800633430480957 prediction: [[0 3 2 1]] true Y [[3, 3, 2, 1]] prediction_str: aple
5 loss 0.6639522314071655 prediction: [[0 3 2 1]] true Y [[3, 3, 2, 1]] prediction_str: aple
6 loss 0.4802587032318115 prediction: [[3 3 2 1]] true Y [[3, 3, 2, 1]] prediction_str: pple
7 loss 0.34566566348075867 prediction: [[3 3 2 1]] true Y [[3, 3, 2, 1]] prediction_str: pple
8 loss 0.25749266147613525 prediction: [[3 3 2 1]] true Y [[3, 3, 2, 1]] prediction_str: pple
9 loss 0.19284559786319733 prediction: [[3 3 2 1]] true Y [[3, 3, 2, 1]] prediction_str: pple
