# Long Short Term Memory
 

### Inports


In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# for reproducibility
torch.manual_seed(100)

<torch._C.Generator at 0x7fa97ea35630>

### 1 Data Construction


In [4]:
# Dictionary
sample_sentences = ['howareyou', 'whats up?', 'iamgreat.']
char_set = list(set(''.join(sample_sentences)))
dic = {c: i for i, c in enumerate(char_set)}

# Parameters
dic_size = len(dic)
input_size = dic_size
hidden_size = dic_size * 2
output_size = dic_size

# Dataset setting
input_batch = []
target_batch = []

for sentence in sample_sentences:
  x_data = [dic[c] for c in sentence[:-1]]
  x_one_hot = [np.eye(dic_size)[x] for x in x_data]
  y_data = [dic[c] for c in sentence[1:]]
  input_batch.append(x_one_hot)
  target_batch.append(y_data)

# To torch tensors
X = torch.FloatTensor(np.array(input_batch))
Y = torch.LongTensor(np.array(target_batch))

print(X.shape)
print(Y.shape)

torch.Size([3, 8, 17])
torch.Size([3, 8])


### 2 RNN model
* Input (입력의 형태)
  + Input type: torch.Tensor
  + Input shape: (N x S x E) 
    - N: Batch size, S: Sequence length, E: Embedding size
* Hidden (출력의 형태)
  + Hidden type: torch.Tensor


In [20]:
# Model
class Custom_RNN(torch.nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim, layers):
    super(Custom_RNN, self).__init__()
    self.rnn = torch.nn.RNN(input_dim, hidden_dim, num_layers=layers)
    self.fc = torch.nn.Linear(hidden_dim, output_dim, bias=True)

  def forward(self, x):
    x, _status = self.rnn(x)
    x = self.fc(x)
    return x

learning_rate = 0.05
training_epochs = 100
model = Custom_RNN(input_size, hidden_size, output_size, 2)

### 3 Train

In [21]:
# define cost/loss & optimizer
criterion = nn.CrossEntropyLoss()    # Softmax
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# train
for epoch in range(training_epochs):
  optimizer.zero_grad()
  outputs = model(X)
  loss = criterion(outputs.reshape(-1, dic_size), Y.reshape(-1))
  loss.backward()
  optimizer.step()
  if epoch % 10 == 9:
    print('epoch: ',epoch, 'loss: ', loss.item())

result = outputs.data.numpy().argmax(axis=2)
for sentence in result:
  print(''.join([char_set[c] for c in np.squeeze(sentence)]))


epoch:  9 loss:  0.09576673060655594
epoch:  19 loss:  0.05868130922317505
epoch:  29 loss:  0.05805883929133415
epoch:  39 loss:  0.057936299592256546
epoch:  49 loss:  0.05782328546047211
epoch:  59 loss:  0.057829175144433975
epoch:  69 loss:  0.05781327188014984
epoch:  79 loss:  0.057803377509117126
epoch:  89 loss:  0.057801321148872375
epoch:  99 loss:  0.05779866501688957
owareyow
hats up?
amgreat.


### 3 Assignment
### 다음 미완성 코드를 활용해 좀 더 긴 문장을 학습해보자
* Sample sentences
  + "if you want to build a ship, don't drum up people together to "
  + "collect wood and don't assign them tasks and work, but rather "
  + "teach them to long for the endless immensity of the sea."

* Training data sentence
  + Shape: (N, S, E)
  
* Hidden 
  + Shape: (N, S, E *2)

* Output
  + Shape: (N, S, E)

In [72]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# for reproducibility
torch.manual_seed(100)

# Dictionary
sample_sentence_1 = "if you want to build a ship, don't drum up people together to "
sample_sentence_2 = "collect wood and don't assign them tasks and work, but rather "
sample_sentence_3 = "teach them to long for the endless immensity of the sea."
sample_sentence = sample_sentence_1 + sample_sentence_2 + sample_sentence_3
char_set = list(set(sample_sentence))
dic = {c: i for i, c in enumerate(char_set)}

# Parameters
dic_size = len(dic)
input_size = dic_size
hidden_size = dic_size * 2
output_size = dic_size
unit_sequence_length = 20

# Dataset setting
input_batch = []
target_batch = []
"""
  문장의 시작부터 끝까지 unit_sequence_length 만큼의 window size로 이동하며 sequence를 잘라내서, 
  여러 개 batch로 이루어진 X와, Y dataset을 만든다
"""

# To torch tensors
X = torch.FloatTensor(np.array(input_batch))
Y = torch.LongTensor(np.array(target_batch))
print(X.shape)
print(Y.shape)

# Model
""" 
  실습 때 사용한 모델을 기초로 해서, 
  RNN쪽 부분을 RNN, LSTM, GRU 세 가지로 바꾸어가며 테스트해 결과 accuracy를 비교해본다
"""
learning_rate = 0.05
training_epochs = 100
model = Custom_RNN(input_size, hidden_size, output_size, 2)

# define cost/loss & optimizer
criterion = nn.CrossEntropyLoss()    # Softmax
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# train
for epoch in range(training_epochs):
  optimizer.zero_grad()
  outputs = model(X)
  loss = criterion(outputs.reshape(-1, dic_size), Y.reshape(-1))
  loss.backward()
  optimizer.step()
  if epoch % 10 == 9:
    print('epoch: ',epoch, 'loss: ', loss.item())

# result
results = outputs.data.numpy().argmax(axis=2)
"""
  학습이 종료된 후, 학습된 모델을 이용해 문장 전체를 순차적으로 예측해 완성한 후, 
  원본과 비교해 어느정도 일치한지 Accuracy를 계산한다.
"""

torch.Size([160, 20, 25])
torch.Size([160, 20])
epoch:  9 loss:  2.7191596031188965
epoch:  19 loss:  1.671645164489746
epoch:  29 loss:  0.6607065796852112
epoch:  39 loss:  0.27085548639297485
epoch:  49 loss:  0.17204250395298004
epoch:  59 loss:  0.14130252599716187
epoch:  69 loss:  0.12837830185890198
epoch:  79 loss:  0.12267328053712845
epoch:  89 loss:  0.11952614784240723
epoch:  99 loss:  0.11799737066030502
p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Accuracy =  0.994413407821229
