In [1]:
import numpy as np
import matplotlib.pyplot as plt

import os
import torch
from torch import nn, optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
def torch_seed(seed=123, deter=False):

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = deter
    torch.use_deterministic_algorithms = deter

In [19]:
input_size = 2
hidden_size = 4

inputs = torch.rand((1, 5, input_size))

print(inputs)
print(inputs.shape)
print(inputs.dtype)

tensor([[[0.7388, 0.7179],
         [0.7058, 0.9156],
         [0.4340, 0.0772],
         [0.3565, 0.1479],
         [0.5331, 0.4066]]])
torch.Size([1, 5, 2])
torch.float32


In [20]:
rnn = nn.RNN(input_size, hidden_size, batch_first=True)

In [21]:
torch_seed()

print(list(rnn.parameters()))

[Parameter containing:
tensor([[-0.2682, -0.0455],
        [ 0.4737, -0.0394],
        [ 0.0159, -0.0780],
        [ 0.0786,  0.4455]], requires_grad=True), Parameter containing:
tensor([[ 0.3057,  0.1775,  0.1087,  0.1179],
        [ 0.1932, -0.0646, -0.4647, -0.3092],
        [ 0.4268,  0.0299, -0.4050,  0.0789],
        [ 0.4131, -0.4725, -0.3366, -0.1991]], requires_grad=True), Parameter containing:
tensor([ 0.0201, -0.1166, -0.0549, -0.4874], requires_grad=True), Parameter containing:
tensor([ 0.2341,  0.4389,  0.3056, -0.3541], requires_grad=True)]


In [22]:
outputs, _status = rnn(inputs)
print(outputs)
print(_status)

tensor([[[ 0.0234,  0.5675,  0.2035, -0.4330],
         [ 0.1019,  0.5564,  0.1005, -0.5503],
         [ 0.2072,  0.5594,  0.2237, -0.7250],
         [ 0.2481,  0.5437,  0.1995, -0.6948],
         [ 0.2020,  0.6003,  0.2107, -0.6054]]], grad_fn=<TransposeBackward1>)
tensor([[[ 0.2020,  0.6003,  0.2107, -0.6054]]], grad_fn=<StackBackward0>)


In [23]:
inputs = torch.Tensor(1, 5, 2)
cell = nn.RNN(input_size=2, hidden_size=6, num_layers=2, batch_first=True)

outputs, _status = cell(inputs)
print(outputs)
print(_status)

print(_status[0])
print(_status[1])

tensor([[[-0.0506, -0.3345, -0.0627, -0.4197,  0.3749,  0.3338],
         [-0.2343, -0.1329,  0.1991, -0.2963,  0.4662,  0.1552],
         [-0.1321, -0.2785,  0.2445, -0.3711,  0.6102,  0.1623],
         [-0.0717, -0.2599,  0.3451, -0.3760,  0.6385,  0.1538],
         [-0.0166, -0.2569,  0.3646, -0.3898,  0.6689,  0.1642]]],
       grad_fn=<TransposeBackward1>)
tensor([[[ 0.1740, -0.2262,  0.0954,  0.4356,  0.1677, -0.2853]],

        [[-0.0166, -0.2569,  0.3646, -0.3898,  0.6689,  0.1642]]],
       grad_fn=<StackBackward0>)
tensor([[ 0.1740, -0.2262,  0.0954,  0.4356,  0.1677, -0.2853]],
       grad_fn=<SelectBackward0>)
tensor([[-0.0166, -0.2569,  0.3646, -0.3898,  0.6689,  0.1642]],
       grad_fn=<SelectBackward0>)


In [24]:
input_str = 'apple'
label_str = 'pple!'

In [29]:
char_vocab = sorted(list(set(input_str+label_str)))
vocab_size = len(char_vocab)
print(char_vocab)
print(vocab_size)

['!', 'a', 'e', 'l', 'p']
5


In [26]:
char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

index_to_char = dict((i, c) for i, c in enumerate(char_vocab))
print(index_to_char)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}
{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [27]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]

print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [28]:
x_data = [x_data]
y_data = [y_data]

print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [38]:
x_one_hot = np.eye(vocab_size)[x_data]

print(x_one_hot)

[[[0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 1.]
  [0. 0. 0. 1. 0.]
  [0. 0. 1. 0. 0.]]]


In [39]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

print(X.shape)
print(Y.shape)

print(X)
print(Y)

torch.Size([1, 5, 5])
torch.Size([1, 5])
tensor([[[0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.],
         [0., 0., 1., 0., 0.]]])
tensor([[4, 4, 3, 2, 0]])


In [40]:
input_size = vocab_size
hidden_size = 6
output_size = 5
learning_rate = 0.1

In [41]:
class VanillaRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [42]:
net = VanillaRNN(input_size, hidden_size, output_size)

In [43]:
outputs = net(X)
print(outputs.shape)
print(outputs)

torch.Size([1, 5, 5])
tensor([[[ 0.6672, -0.4526,  0.0573, -0.2197,  0.4151],
         [ 0.4566, -0.4126, -0.1461, -0.2106,  0.1750],
         [ 0.3333, -0.4490, -0.1274, -0.1861,  0.2743],
         [ 0.7592, -0.4006, -0.0284, -0.1635,  0.2178],
         [ 0.5743, -0.4294, -0.0412, -0.3804,  0.3693]]],
       grad_fn=<ViewBackward0>)


In [44]:
print(outputs)
print(Y)

tensor([[[ 0.6672, -0.4526,  0.0573, -0.2197,  0.4151],
         [ 0.4566, -0.4126, -0.1461, -0.2106,  0.1750],
         [ 0.3333, -0.4490, -0.1274, -0.1861,  0.2743],
         [ 0.7592, -0.4006, -0.0284, -0.1635,  0.2178],
         [ 0.5743, -0.4294, -0.0412, -0.3804,  0.3693]]],
       grad_fn=<ViewBackward0>)
tensor([[4, 4, 3, 2, 0]])


In [50]:
print(outputs.view(-1, output_size))
print(Y.view(-1))

print(outputs.argmax(axis=2).numpy().squeeze())

tensor([[ 0.6672, -0.4526,  0.0573, -0.2197,  0.4151],
        [ 0.4566, -0.4126, -0.1461, -0.2106,  0.1750],
        [ 0.3333, -0.4490, -0.1274, -0.1861,  0.2743],
        [ 0.7592, -0.4006, -0.0284, -0.1635,  0.2178],
        [ 0.5743, -0.4294, -0.0412, -0.3804,  0.3693]],
       grad_fn=<ViewBackward0>)
tensor([4, 4, 3, 2, 0])
[0 0 0 0 0]


In [51]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

for i in range(100):
    outputs = net(X)
    loss = criterion(outputs.view(-1, output_size), Y.view(-1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    result = outputs.argmax(axis=2)
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result).numpy()])
    print(i, "loss:", round(loss.item(), 3), "prediction:", result, 'true Y:', y_data, "prediction str:", result_str)
    

0 loss: 1.513 prediction: tensor([[0, 0, 0, 0, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: !!!!!
1 loss: 1.239 prediction: tensor([[4, 4, 3, 3, 4]]) true Y: [[4, 4, 3, 2, 0]] prediction str: ppllp
2 loss: 0.96 prediction: tensor([[4, 4, 3, 2, 4]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pplep
3 loss: 0.69 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
4 loss: 0.465 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
5 loss: 0.302 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
6 loss: 0.202 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
7 loss: 0.13 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
8 loss: 0.086 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
9 loss: 0.057 prediction: tensor([[4, 4, 3, 2, 0]]) true Y: [[4, 4, 3, 2, 0]] prediction str: pple!
10 

## 한글을 이용한 RNN

In [52]:
text = '안녕하세요 여러분'
input_str_kr = text
label_str_kr = text[1:] +'!'

print(input_str_kr)
print(label_str_kr)

안녕하세요 여러분
녕하세요 여러분!


In [53]:
voc_set = set(input_str_kr+label_str_kr)
char_vocab = sorted(list(voc_set))
vocab_size = len(char_vocab)

print(voc_set)
print(char_vocab)
print(vocab_size)

{'러', '하', '!', '녕', ' ', '분', '여', '세', '요', '안'}
[' ', '!', '녕', '러', '분', '세', '안', '여', '요', '하']
10


In [54]:
char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

index_to_char = dict((i, c) for i, c in enumerate(char_vocab))
print(index_to_char)

{' ': 0, '!': 1, '녕': 2, '러': 3, '분': 4, '세': 5, '안': 6, '여': 7, '요': 8, '하': 9}
{0: ' ', 1: '!', 2: '녕', 3: '러', 4: '분', 5: '세', 6: '안', 7: '여', 8: '요', 9: '하'}


In [55]:
x_data = [[char_to_index[c] for c in input_str_kr]]
y_data = [[char_to_index[c] for c in label_str_kr]]

print(x_data)
print(y_data)

[[6, 2, 9, 5, 8, 0, 7, 3, 4]]
[[2, 9, 5, 8, 0, 7, 3, 4, 1]]


In [57]:
x_one_hot = np.eye(vocab_size)[x_data]

print(x_one_hot)

[[[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]]


In [58]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

print(X)
print(Y)

tensor([[[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])
tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]])


In [61]:
vocab_size = len(char_vocab)
input_size = vocab_size
hidden_size = 20
output_size = vocab_size
learning_rate = 0.1

class VanillaRNN_Kor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x , _status = self.rnn(x)
        x = self.fc(x)
        return x
    
net = VanillaRNN_Kor(input_size, hidden_size, output_size)
print(net.parameters)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

for i in range(100):
    outputs = net(X)
    loss = criterion(outputs.view(-1, output_size), Y.view(-1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    result = outputs.argmax(axis=2)
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result).numpy()])
    print(i, "loss:", round(loss.item(), 3), "prediction:", result, 'true Y:', y_data, "prediction str:", result_str)
    

<bound method Module.parameters of VanillaRNN_Kor(
  (rnn): RNN(10, 20, batch_first=True)
  (fc): Linear(in_features=20, out_features=10, bias=True)
)>
0 loss: 2.327 prediction: tensor([[9, 9, 8, 9, 6, 9, 9, 9, 9]]) true Y: [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str: 하하요하안하하하하
1 loss: 1.8 prediction: tensor([[2, 9, 5, 9, 0, 9, 3, 2, 1]]) true Y: [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str: 녕하세하 하러녕!
2 loss: 1.074 prediction: tensor([[2, 9, 5, 8, 0, 7, 3, 5, 1]]) true Y: [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str: 녕하세요 여러세!
3 loss: 0.446 prediction: tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) true Y: [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str: 녕하세요 여러분!
4 loss: 0.163 prediction: tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) true Y: [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str: 녕하세요 여러분!
5 loss: 0.055 prediction: tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) true Y: [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str: 녕하세요 여러분!
6 loss: 0.02 prediction: tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) true Y: [[2, 9, 5, 

## LSTM을 이용한 영화 리뷰 분류

In [63]:
import pandas as pd
from urllib import request

input_dim = 5
hidden_size = 3

inputs = torch.rand(1, 3, 5)
print(inputs)
print(inputs.dtype)
print(inputs.shape)

tensor([[[0.7200, 0.8107, 0.0901, 0.3766, 0.2162],
         [0.2487, 0.4383, 0.6422, 0.2299, 0.3147],
         [0.5300, 0.8756, 0.4635, 0.6490, 0.7249]]])
torch.float32
torch.Size([1, 3, 5])


In [64]:
lstm = nn.LSTM(input_dim, hidden_size, batch_first=True)
print(list(lstm.parameters()))

[Parameter containing:
tensor([[-0.2246,  0.5499, -0.1073, -0.1129, -0.1378],
        [ 0.5190,  0.2085, -0.3787, -0.0854, -0.1555],
        [-0.0650,  0.4903, -0.2045,  0.0726, -0.4082],
        [-0.5200, -0.2374,  0.0030,  0.1109, -0.3820],
        [ 0.5747,  0.4850, -0.2531, -0.1494,  0.4621],
        [-0.5327, -0.4004,  0.5042,  0.5536,  0.4995],
        [ 0.4724,  0.4621,  0.0836, -0.0287, -0.0046],
        [-0.5560, -0.2912, -0.0042, -0.2184,  0.4152],
        [-0.2537,  0.1111, -0.2759,  0.3105, -0.5026],
        [ 0.2394,  0.5469, -0.2024,  0.5086,  0.0399],
        [-0.4681, -0.5723,  0.2233,  0.2187, -0.3615],
        [ 0.3662,  0.2126, -0.4324, -0.1043, -0.3270]], requires_grad=True), Parameter containing:
tensor([[ 0.3815, -0.1068,  0.5448],
        [ 0.0606,  0.3318,  0.4958],
        [-0.0583, -0.1228,  0.1885],
        [ 0.0096,  0.4449, -0.0524],
        [ 0.5760,  0.3786,  0.4486],
        [-0.0960, -0.0086,  0.4365],
        [-0.1709,  0.4585,  0.0352],
        [ 0.22

In [65]:
request.urlretrieve("https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt", 
                    filename="ratings_train.txt")
request.urlretrieve("https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt", 
                    filename="ratings_test.txt")

('ratings_test.txt', <http.client.HTTPMessage at 0x241e1929d30>)

In [68]:
train_data = pd.read_table('ratings_train.txt', sep = "\t", nrows = 10000)
test_data = pd.read_table('ratings_test.txt', sep = "\t", nrows = 10000)

print(len(train_data))
print(len(test_data))

train_data.head()

10000
10000


Unnamed: 0,id,document,label
0,9976970,아 더빙.. 진짜 짜증나네요 목소리,0
1,3819312,흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나,1
2,10265843,너무재밓었다그래서보는것을추천한다,0
3,9045019,교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정,0
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...,1


In [69]:
train_data.dropna(inplace=True, how='any')
train_data.drop_duplicates(subset=['document'], inplace=True)
print(len(train_data))

9918


In [71]:
train_data['document'] = train_data['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣]", " ", regex=True)
train_data.head(10)

Unnamed: 0,id,document,label
0,9976970,아 더빙 진짜 짜증나네요 목소리,0
1,3819312,흠 포스터보고 초딩영화줄 오버연기조차 가볍지 않구나,1
2,10265843,너무재밓었다그래서보는것을추천한다,0
3,9045019,교도소 이야기구먼 솔직히 재미는 없다 평점 조정,0
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화 스파이더맨에서 늙어보이기만 했던 커스틴 ...,1
5,5403919,막 걸음마 뗀 세부터 초등학교 학년생인 살용영화 ㅋㅋㅋ 별반개도 아까움,0
6,7797314,원작의 긴장감을 제대로 살려내지못했다,0
7,9443947,별 반개도 아깝다 욕나온다 이응경 길용우 연기생활이몇년인지 정말 발로해도 그것보단...,0
8,7156791,액션이 없는데도 재미 있는 몇안되는 영화,1
9,5912145,왜케 평점이 낮은건데 꽤 볼만한데 헐리우드식 화려함에만 너무 길들여져 있나,1


In [75]:
train_data['document'] = train_data['document'].str.strip()
train_data['document'] = train_data['document'].replace('', np.nan)
train_data.dropna(how='any', inplace=True)
print(train_data.isnull().sum())
print(train_data.shape)
train_data.head(10)

id          0
document    0
label       0
dtype: int64
(9858, 3)


Unnamed: 0,id,document,label
0,9976970,아 더빙 진짜 짜증나네요 목소리,0
1,3819312,흠 포스터보고 초딩영화줄 오버연기조차 가볍지 않구나,1
2,10265843,너무재밓었다그래서보는것을추천한다,0
3,9045019,교도소 이야기구먼 솔직히 재미는 없다 평점 조정,0
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화 스파이더맨에서 늙어보이기만 했던 커스틴 ...,1
5,5403919,막 걸음마 뗀 세부터 초등학교 학년생인 살용영화 ㅋㅋㅋ 별반개도 아까움,0
6,7797314,원작의 긴장감을 제대로 살려내지못했다,0
7,9443947,별 반개도 아깝다 욕나온다 이응경 길용우 연기생활이몇년인지 정말 발로해도 그것보단...,0
8,7156791,액션이 없는데도 재미 있는 몇안되는 영화,1
9,5912145,왜케 평점이 낮은건데 꽤 볼만한데 헐리우드식 화려함에만 너무 길들여져 있나,1
