- Char RNN

  문자 단위 RNN

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

input_str = 'apple'
label_str = 'pple!'
char_vocab = sorted(list(set(input_str+label_str)))
vocab_size = len(char_vocab)

input_size = vocab_size
hidden_size = 5
output_size = 5
learning_rate = 0.1

char_to_index = dict((c, i) for i,c in enumerate(char_vocab))

index_to_char = {}
for key,value in char_to_index.items():
  index_to_char[value] = key 

index_to_char

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}

In [5]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [6]:
#  Pytorch nn.RNN() 은 3차원 텐서를 입력 받기 때문에 배치 차원 추가
x_data = [x_data]
y_data = [y_data]

# 각 문자 one-hot vector 화

x_one_hot = [np.eye(vocab_size)[x] for x in x_data] # np.eye(a) : n * n 의 단위 행렬 생성
x_one_hot

[array([[0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.],
        [0., 0., 1., 0., 0.]])]

In [7]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

  """Entry point for launching an IPython kernel.


In [8]:
class Net(torch.nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(Net,self).__init__()
    self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first = True)
    self.fc = torch.nn.Linear(hidden_size, output_size, bias = True)

  def forward(self,x):
    x, _status = self.rnn(x)
    x = self.fc(x)
    return x

model = Net(input_size, hidden_size, output_size)

outputs = model(X)
outputs.shape

torch.Size([1, 5, 5])

In [10]:
outputs.view(-1,input_size).shape # batch  차원 제거

torch.Size([5, 5])

In [11]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), learning_rate)

for i in range(100):
  optimizer.zero_grad()
  outputs=model(X)
  loss = criterion(outputs.view(-1, input_size), Y.view(-1))
  loss.backward()
  optimizer.step()

  result = outputs.data.numpy().argmax(axis=2)
  result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
  print(i,'loss: ', loss.item(), 'prediction: ', result, 'true Y: ', y_data, 'prediction str: ',result_str)

0 loss:  1.619940996170044 prediction:  [[4 4 4 4 4]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppppp
1 loss:  1.4305297136306763 prediction:  [[4 4 4 4 4]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppppp
2 loss:  1.1994540691375732 prediction:  [[4 4 4 2 3]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppel
3 loss:  1.04063880443573 prediction:  [[4 4 3 0 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppl!!
4 loss:  0.8797737956047058 prediction:  [[4 4 3 0 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppl!!
5 loss:  0.7086673974990845 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
6 loss:  0.5679048299789429 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
7 loss:  0.4600202143192291 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
8 loss:  0.35548874735832214 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
9 loss:  0.2531050443649292 prediction:  [[4 4 3 2 0]] tr

 - word RNN - 임베딩 사용
 

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

sentence = 'Repeat is the vest medicine for memory'.split()

vocab = list(set(sentence))

word2 = {tkn: i for i, tkn in enumerate(vocab,1)}
word2['<unk>']=0

index2 = {v: k for k, v in word2.items()}
index2

{0: '<unk>',
 1: 'memory',
 2: 'medicine',
 3: 'vest',
 4: 'the',
 5: 'is',
 6: 'for',
 7: 'Repeat'}

- Text classification using PyTorch

  RNN의 다-대-일 문제

  IMDB Movie Review Sentiment Analysis

In [1]:
from torch.backends.cuda import cuBLASModule
import os
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torchtext.legacy import data,datasets
import random

batch_size = 64
lr = 0.001
epochs = 10

device = 'cuda'

text = data.Field(sequential = True, batch_first = True, lower = True) 
label = data.Field(sequential = False, batch_first = True)

trainset, testset = datasets.IMDB.splits(text, label)

text.build_vocab(trainset,min_freq = 5)
label.build_vocab(trainset)

vocab_size = len(text.vocab)
n_classes = 2


trainset, valset = trainset.split(split_ratio=0.8)

train_iter, val_iter, test_iter = data.BucketIterator.splits((trainset, valset, testset), batch_size = batch_size, shuffle = True, repeat = False)

downloading aclImdb_v1.tar.gz


100%|██████████| 84.1M/84.1M [00:02<00:00, 29.4MB/s]


In [9]:
class GRU(nn.Module):
  def __init__(self, n_layers, hidden_dim, n_vocab, embed_dim, n_classes, dropout_p = 0.2):
    super(GRU, self).__init__()
    self.n_layers = n_layers
    self.hidden_dim = hidden_dim

    self.embed = nn.Embedding(n_vocab, embed_dim)
    self.dropout = nn.Dropout(dropout_p)
    self.gru = nn.GRU(embed_dim, self.hidden_dim, num_layers = self.n_layers, batch_first = True)
    self.out = nn.Linear(self.hidden_dim, n_classes)

  def forward(self, x):
    x =self.embed(x)
    h = self._init_state(batch_size = x.size(0))
    x, _ = self.gru(x,h)
    h_t = x[:,-1,:]
    self.dropout(h_t)
    logit = self.out(h_t)
    return logit

  def _init_state(self,batch_size = 1):
    weight = next(self.parameters()).data
    return weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()

In [10]:
model = GRU(1,256, vocab_size, 128, n_classes, 0.5).to(device)
optimizer = torch.optim.Adam(model.parameters(),lr = lr)

In [11]:
def train(model, optimizer,train_iter):
  model.train()
  for b, batch in enumerate(train_iter):
    x,y = batch.text.to(device), batch.label.to(device)
    y.data.sub_(1)
    optimizer.zero_grad()

    logit = model(x)
    loss = F.cross_entropy(logit,y)
    loss.backward()
    optimizer.step()

In [12]:
def evaluate(model, val_iter):
    model.eval()
    corrects, total_loss = 0, 0
    for batch in val_iter:
        x, y = batch.text.to(device), batch.label.to(device)
        y.data.sub_(1) 
        logit = model(x)
        loss = F.cross_entropy(logit, y, reduction='sum')
        total_loss += loss.item()
        corrects += (logit.max(1)[1].view(y.size()).data == y.data).sum()
    size = len(val_iter.dataset)
    avg_loss = total_loss / size
    avg_accuracy = 100.0 * corrects / size
    return avg_loss, avg_accuracy


In [13]:
best_val_loss = None
for e in range(1, epochs+1):
    train(model, optimizer, train_iter)
    val_loss, val_accuracy = evaluate(model, val_iter)

    print("[Epoch: %d] val loss : %5.2f | val accuracy : %5.2f" % (e, val_loss, val_accuracy))

    # 검증 오차가 가장 적은 최적의 모델을 저장
    if not best_val_loss or val_loss < best_val_loss:
        if not os.path.isdir("snapshot"):
            os.makedirs("snapshot")
        torch.save(model.state_dict(), './snapshot/txtclassification.pt')
        best_val_loss = val_loss

[Epoch: 1] val loss :  0.70 | val accuracy : 49.52
[Epoch: 2] val loss :  0.69 | val accuracy : 50.28
[Epoch: 3] val loss :  0.69 | val accuracy : 51.02
[Epoch: 4] val loss :  0.69 | val accuracy : 52.94
[Epoch: 5] val loss :  0.69 | val accuracy : 52.08
[Epoch: 6] val loss :  0.63 | val accuracy : 68.64
[Epoch: 7] val loss :  0.41 | val accuracy : 82.76
[Epoch: 8] val loss :  0.35 | val accuracy : 85.78
[Epoch: 9] val loss :  0.35 | val accuracy : 85.70
[Epoch: 10] val loss :  0.35 | val accuracy : 86.40
