In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [2]:
device = torch.device("cuda:1")

In [3]:
sentence = ('Imagine you obtained some data from a particular collection of things. \
It could be the heights of individuals within a group of people, the weights of cats in a clowder, \
the number of petals in a bouquet of flowers, and so on. Such collections are called samples \
and you can use the obtained data in two ways. The most straightforward thing you can do is \
give a detailed description of the sample. For example, you can calculate some of its useful properties. I love this article!')

In [4]:
char_set = list(set(sentence))
char_dic = {c: i for i, c in enumerate(char_set)}

In [5]:
print(len(sentence))
print(char_dic)

480
{'o': 0, 'r': 1, 't': 2, 'q': 3, 'I': 4, 'g': 5, 's': 6, 'b': 7, 'u': 8, 'f': 9, 'c': 10, 'h': 11, ',': 12, 'e': 13, 'x': 14, 'i': 15, 'p': 16, '.': 17, 'v': 18, 'S': 19, '!': 20, 'a': 21, 'l': 22, 'm': 23, 'n': 24, 'T': 25, 'd': 26, ' ': 27, 'w': 28, 'y': 29, 'F': 30}


In [6]:
input_size = len(char_dic)  # 각 char를 one-hot으로 변경 예정이므로 input_size = len(char_dic)
hidden_state = 16
sequence_length = 20
learning_rate = 0.01

In [7]:
# data preprocessing
x_data = []
y_data = []

for i in range(len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]

    x_data.append([char_dic[c] for c in x_str]) 
    y_data.append([char_dic[c] for c in y_str])

x_one_hot = [np.eye(input_size)[x] for x in x_data]

In [8]:
print(x_str)
print([char_dic[c] for c in x_str])

 I love this article
[27, 4, 27, 22, 0, 18, 13, 27, 2, 11, 15, 6, 27, 21, 1, 2, 15, 10, 22, 13]


In [9]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

- Vanilla RNN
- $h_t = \text{tanh}(W_{ih} x_t + b_{ih} + W_{hh} h_{(t-1)} + b_{hh})$

In [10]:
class CharacterRNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(CharacterRNN, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=num_layers, nonlinearity ='tanh', batch_first=True)
        self.fc = nn.Linear(hidden_dim, len(char_dic), bias=True)

    def forward(self, x):
        outputs, next_state = self.rnn(x)
        outputs = self.fc(outputs)
        return outputs

In [11]:
len(char_dic)

31

In [12]:
np.shape(X)

torch.Size([460, 20, 31])

In [13]:
model = CharacterRNN(input_size, hidden_state, num_layers=2)

- output, next_state = nn.RNN()
- **output** of shape `(batch, seq_len, num_directions * hidden_size)`: tensor containing the output features (`h_t`) from the last layer of the RNN for each `t`.
    - 각 time step마다 마지막 state를 output
- **next_state** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor containing the hidden state for `t = last time step`.
    - multi-layer RNN, bi-directional RNN 일 경우, 여러 next state를 output

In [14]:
output, next_state = model.rnn(X)

In [15]:
print('output:', output.shape)
print('next_state:', next_state.shape) #two layer RNN

output: torch.Size([460, 20, 16])
next_state: torch.Size([2, 460, 16])


- nn.CrossEntropyLoss()
    - The `input` is expected to contain raw, **unnormalized scores for each class**.
`input` has to be a Tensor of size `(minibatch, num_class)` 
$$\text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right)$$

In [16]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), learning_rate)

In [17]:
for i in range(500):    
    outputs = model(X)
    loss = criterion(outputs.view(-1, len(char_dic)), Y.view(-1))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i%50==0:
        results = outputs.argmax(dim=2)  # outptus.shape : [460, 20, 31] 각 time step마다 31개 중 하나 prediction
        predict_str = ""
        for j, result in enumerate(results):
            if j == 0:
                predict_str += ''.join([char_set[t] for t in result])  #처음 문장은 그대로 붙이고
            else:
                predict_str += char_set[result[-1]]  # 두번째부터는 마지막 character만 붙여가면 됨

        print('epoch:',i)        
        print(predict_str)
        print('-----------------------------------------------------------------')
        

epoch: 0
,I,,,l,I,,,,,,,,,,,.l,,,,I,,,,,,.,,,,,,,,,,l,,,,,,,,,,ll,,,,,,,,,,,,,,,,ll,,,,,,,,I,,,I,,,l,,,I,,l,,,,l,,,,,,I,,,,,,,,,,ll,,,,ll,,,,,,,I,,,I,,,l,,,I,,l,,,,I,,,,,,,,,,,.lI,,,I,,,,,.l,,l,,,,,,I,,,,,,,,,,,ll,,l,,,,,.,,I,,,l,,,,,,,,Il,,,,,,,.l,,,,I,,,Il,,,,,l,,,,,,,I,,,l,,,,,,,,,,,I,,,I,,,,,,,.l,,,,,,,,,l,,,,v,,,,,,I,,,l,,,l,,,,,,,,l,,,,,,,,,l,l,,,,,,,,,,,I,,l,I,,,,,,,,,,l,,,l,,,,,,,,,,,,,,I,,,,,,,,,,,l,I,,,,,,I,,,,,,,,l,,l,,,,,I,,,,I,,,,,,I,,,I,,,,,,,,.l,,,,,,I,,l,I,,,,I,,,,l,,
-----------------------------------------------------------------
epoch: 50
    te to  aftaln   tf e t ta o oe a panleonle  aanle taon af ahen    th oonle ao ohe th ohee of an  oe      aeleen a chon  on aaotae  the th ohee of aana on a panle    the t  e   of aaoaoe an a can el  af a oee    to  t  af  tflaeoanle taon  tf  talce  to ele  ao  toe aan a   the tf aln   thla an cheedh p  the ao   oo olohee u e   ahen  oo  aan a tan ahoe arp ta ne  th  aooeeon af ahe th ele  tne a a e e  toe aan aaleele   oo e tf

- 각 step output을 다음 input으로 이어받아 자유롭게 문장 생성

In [18]:
init_data=[]
init_str= 'what can I do for yo'
init_data.append([char_dic[c] for c in x_str]) 
data = torch.FloatTensor([np.eye(input_size)[x] for x in init_data])

predict_str = init_str
next_data=None

for step in range(800):
    output = model(data) #output.shape = [1, 20, 31]
    xx, yy = torch.max(output, dim=2)
    results = output.argmax(dim=2) # results.shape = [1,20]
    next_char = torch.FloatTensor(np.eye(31)[results[:,-1]]) #next_char.shape = [31]
    next_data = data[:,1:,:] # [1, 19, 31]
    next_data = torch.cat((next_data, next_char.view(1,1,31)), dim=1)
    predict_str += char_set[results[:, -1]]  # 두번째부터는 마지막 character만 붙여가면 됨
    data = next_data

In [19]:
predict_str

'what can I do for yo, the weights of cats in a clowder, the number of petals in a bouquet of flowers, and so on. Such collection of things. It could be the heights of individuals within a group of people, the weights of cats in a clowder, the number of petals in a bouquet of flowers, and so on. Such collection of things. It could be the heights of individuals within a group of people, the weights of cats in a clowder, the number of petals in a bouquet of flowers, and so on. Such collection of things. It could be the heights of individuals within a group of people, the weights of cats in a clowder, the number of petals in a bouquet of flowers, and so on. Such collection of things. It could be the heights of individuals within a group of people, the weights of cats in a clowder, the number of petals in a bouque'