# 19장 순환 신경망 (Recurrent neural network)
- Vanilla RNN, LSTM

* "부록3 매트플롯립 입문"에서 한글 폰트를 올바르게 출력하기 위한 설치 방법을 설명했다. 설치 방법은 다음과 같다.

In [None]:
!sudo apt-get install -y fonts-nanum* | tail -n 1
!sudo fc-cache -fv
!rm -rf ~/.cache/matplotlib

In [None]:
# 필요 라이브러리 설치

!pip install torchviz | tail -n 1
!pip install torchinfo | tail -n 1

In [1]:
# 라이브러리 임포트

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display

# 폰트 관련 용도
import matplotlib.font_manager as fm

# Colab, Linux
# 나눔 고딕 폰트의 경로 명시
path = '/usr/share/fonts/truetype/nanum/NanumGothic.ttf'
font_name = fm.FontProperties(fname=path, size=10).get_name()

# Window 
# font_name = "NanumBarunGothic"

# Mac
# font_name = "AppleGothic"

In [2]:
# warning 표시 끄기
import warnings
warnings.simplefilter('ignore')

import os
import torch
from torch import nn, optim

In [3]:
# 기본 폰트 설정
plt.rcParams['font.family'] = font_name

# 기본 폰트 사이즈 변경
plt.rcParams['font.size'] = 14

# 기본 그래프 사이즈 변경
plt.rcParams['figure.figsize'] = (6,6)

# 기본 그리드 표시
# 필요에 따라 설정할 때는, plt.grid()
plt.rcParams['axes.grid'] = True
plt.rcParams["grid.linestyle"] = ":"

# 마이너스 기호 정상 출력
plt.rcParams['axes.unicode_minus'] = False

# 넘파이 부동소수점 자릿수 표시
np.set_printoptions(suppress=True, precision=4)

In [4]:
# GPU 디바이스 할당

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# **Vanillla RNN**

### RNN 모델 구조

In [5]:
def torch_seed(seed=123, deter = False):
  
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = deter
    torch.use_deterministic_algorithms = deter

In [9]:
## parameter 
input_size = 2
hidden_size = 4

# (batch_size, time_steps, input_size)
inputs = torch.rand((1, 5, input_size)) # U(0, 1)
print(inputs)
print(inputs.dtype)

tensor([[[0.1241, 0.4324],
         [0.2235, 0.2961],
         [0.9725, 0.1091],
         [0.7995, 0.5880],
         [0.7150, 0.0430]]])
torch.float32


In [None]:
input_size = 2
hidden_size = 4
rnn = nn.RNN(input_size, hidden_size, batch_first=True)

In [14]:
torch_seed()

## parameter 
input_size = 2
hidden_size = 4
rnn = nn.RNN(input_size, hidden_size, batch_first=True) # 맨 앞이 batch
print('hidden weights = \n')
list(rnn.parameters())

hidden weights = 



[Parameter containing:
 tensor([[-0.2039,  0.0166],
         [-0.2483,  0.1886],
         [-0.4260,  0.3665],
         [-0.3634, -0.3975]], requires_grad=True),
 Parameter containing:
 tensor([[-0.3159,  0.2264, -0.1847,  0.1871],
         [-0.4244, -0.3034, -0.1836, -0.0983],
         [-0.3814,  0.3274, -0.1179,  0.1605],
         [ 0.3536,  0.0932,  0.1367,  0.4826]], requires_grad=True),
 Parameter containing:
 tensor([-0.2255,  0.1584, -0.2225,  0.3573], requires_grad=True),
 Parameter containing:
 tensor([ 0.3993, -0.4610,  0.4268,  0.2388], requires_grad=True)]

In [16]:
outputs, _status = rnn(inputs) # inputs = (1, 5, 2)
print("hiddens = \n", outputs)  ## 모든 노드
print("terminal = \n", _status)  ## 마지막 노드

hiddens = 
 tensor([[[ 0.1544, -0.2467,  0.3004,  0.3619],
         [ 0.0407, -0.3659,  0.1003,  0.5680],
         [-0.0306, -0.4653, -0.2222,  0.4361],
         [ 0.0475, -0.2338,  0.0348,  0.1952],
         [-0.0091, -0.4193, -0.1508,  0.3911]]], grad_fn=<TransposeBackward1>)
terminal = 
 tensor([[[-0.0091, -0.4193, -0.1508,  0.3911]]], grad_fn=<StackBackward0>)


### 2개 이상의 RNN layer

In [21]:
inputs = torch.Tensor(1, 5, 2)
# (batch_size, time_steps, input_size)
cell = nn.RNN(input_size = 2, hidden_size = 6, num_layers = 2, 
              batch_first=True)

outputs, _status = cell(inputs)
print("hidden shape = \n", outputs)  ## 모든 노드
print()
print("terminal = \n",_status)  ## 마지막 노드

print("_status[0] = \n", _status[0])  ## first layer 마지막 노드
print("_status[1] = \n", _status[1])  ## second layer 마지막 노드

hidden shape = 
 tensor([[[-0.0987, -0.6732, -0.4266,  0.0146, -0.0286,  0.3485],
         [-0.2571, -0.7404, -0.3544,  0.1276, -0.0586,  0.5508],
         [-0.1535, -0.6743, -0.2998, -0.0404, -0.1162,  0.5495],
         [-0.1249, -0.7686, -0.3432,  0.0732, -0.0550,  0.5247],
         [-0.1860, -0.7401, -0.3549,  0.0213, -0.0958,  0.5711]]],
       grad_fn=<TransposeBackward1>)

terminal = 
 tensor([[[ 0.6543, -0.5262,  0.0933, -0.8319,  0.3218, -0.0678]],

        [[-0.1860, -0.7401, -0.3549,  0.0213, -0.0958,  0.5711]]],
       grad_fn=<StackBackward0>)
_status[0] = 
 tensor([[ 0.6543, -0.5262,  0.0933, -0.8319,  0.3218, -0.0678]],
       grad_fn=<SelectBackward0>)
_status[1] = 
 tensor([[-0.1860, -0.7401, -0.3549,  0.0213, -0.0958,  0.5711]],
       grad_fn=<SelectBackward0>)


# **문자단위 RNN**

In [23]:
input_str = 'apple'
label_str = 'pple!'

### Vocab set

In [25]:
# 1. Vocab set
char_vocab = sorted(list(set(input_str+label_str)))
vocab_size = len(char_vocab)
print ('문자 집합의 크기 : {}'.format(vocab_size))
print(char_vocab)

문자 집합의 크기 : 5
['!', 'a', 'e', 'l', 'p']


### 문자 집합에 고유한 정수를 부여

In [26]:
# 2. 문자 집합에 고유한 정수를 부여
char_to_index = dict((c, i) for i, c in enumerate(char_vocab)) # 문자에 고유한 정수 인덱스 부여
print(char_to_index)

index_to_char = dict((i, c) for i, c in enumerate(char_vocab)) # 문자에 고유한 정수 인덱스 부여
print(index_to_char)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}
{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


### Label encoding 만들기

In [28]:
# 3. Label encoding 
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data)
print(y_data)


[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [29]:
# 배치 차원 추가
x_data = [x_data]
y_data = [y_data]
print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


### Onehot encoding

In [34]:
# 4. Onehot encoding 
# x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
x_one_hot = np.eye(vocab_size)[x_data]
print(x_one_hot)

[[[0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 1.]
  [0. 0. 0. 1. 0.]
  [0. 0. 1. 0. 0.]]]


### Tensor input 만들기

In [35]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))
print("X = \n", X)
print("Y = \n", Y)

훈련 데이터의 크기 : torch.Size([1, 5, 5])
레이블의 크기 : torch.Size([1, 5])
X = 
 tensor([[[0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.],
         [0., 0., 1., 0., 0.]]])
Y = 
 tensor([[4, 4, 3, 2, 0]])


### RNN 모델 만들기

In [36]:
input_size = vocab_size # 입력의 크기는 문자 집합의 크기
hidden_size = 6
output_size = 5
learning_rate = 0.1

In [38]:
class VanillaRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(VanillaRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True) # RNN 셀 구현
        self.fc = nn.Linear(hidden_size, output_size, bias=True) # 출력층 구현

    def forward(self, x): # 구현한 RNN 셀과 출력층을 연결
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [52]:
net = VanillaRNN(input_size, hidden_size, output_size)

### Output 크기

In [53]:
outputs = net(X) # X.shape = torch.Size([1, 5, 5])
print(outputs.shape) # 3차원 텐서
print(outputs)

torch.Size([1, 5, 5])
tensor([[[ 0.1416, -0.0020,  0.1332,  0.0077, -0.0202],
         [-0.3027, -0.1178,  0.1346, -0.0147,  0.0964],
         [-0.2770, -0.0168,  0.1117, -0.2443,  0.1217],
         [ 0.0175,  0.0066,  0.0873, -0.0874,  0.0303],
         [-0.0669,  0.2396,  0.1866,  0.0601,  0.2300]]],
       grad_fn=<ViewBackward0>)


In [41]:
print(outputs)
print(Y)


tensor([[[ 0.3650,  0.7164, -0.0127,  0.1906, -0.3810],
         [ 0.3215,  0.6482, -0.0727,  0.2921, -0.0229],
         [ 0.4450,  0.6361, -0.2579,  0.3304,  0.0214],
         [ 0.5763,  0.4344, -0.1728,  0.3319, -0.4698],
         [ 0.2496,  0.8205,  0.1673,  0.0787, -0.2713]]],
       grad_fn=<ViewBackward0>)
tensor([[4, 4, 3, 2, 0]])


In [54]:
print(outputs.view(-1, input_size)) # 2차원 텐서로 변환
print(Y.view(-1))


tensor([[ 0.1416, -0.0020,  0.1332,  0.0077, -0.0202],
        [-0.3027, -0.1178,  0.1346, -0.0147,  0.0964],
        [-0.2770, -0.0168,  0.1117, -0.2443,  0.1217],
        [ 0.0175,  0.0066,  0.0873, -0.0874,  0.0303],
        [-0.0669,  0.2396,  0.1866,  0.0601,  0.2300]],
       grad_fn=<ViewBackward0>)
tensor([4, 4, 3, 2, 0])


### Loss function, optimizer 정의

In [55]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

### RNN 모델 학습

In [57]:
# output shape = (1, 5, 5)
# Y shape = (1, 5)

for i in range(100):
    
    outputs = net(X)
    loss = criterion(outputs.view(-1, output_size), Y.view(-1)) # view를 하는 이유는 Batch 차원 제거를 위해, CrossEntropyLoss: outputs should be (N, C), and Y should be (N, )

    optimizer.zero_grad()
    loss.backward() # 기울기 계산
    optimizer.step() # 아까 optimizer 선언 시 넣어둔 파라미터 업데이트

    # 아래 세 줄은 모델이 실제 어떻게 예측했는지를 확인하기 위한 코드.
    result = outputs.argmax(axis=2) # 최종 예측값인 각 time-step 별 5차원 벡터에 대해서 가장 높은 값의 인덱스를 선택
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result.numpy())]) # np.squeeze(result.numpy()) : 1d vector
    print(i, "loss: ", round(loss.item(), 3), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

0 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
1 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
2 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
3 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
4 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
5 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
6 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
7 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.0 prediction:  tensor([[4, 4, 3, 2, 0]]) true Y:  [[4, 4, 3, 2, 0]] pre

### 한글을 이용한 RNN

In [59]:
# text = "공복에 드셔야 흡수가 잘되기 때문입니다. 공복에 드셨을 때 소화가 잘 되지 않는 분들은 식후에 드시는 것이 좋아요. 생들기름은 성인 기준으로 하루 5g정도 드시면 충분합니다. 숟가락에 따랐을 때 반절이면 5g이에요."
text = "안녕하세요 여러분"
input_str_kr = text
label_str_kr = text[1:] + "!"

print(input_str_kr)
print(label_str_kr)

# input_str_kr = '안녕하세요 여러분'
# label_str_kr = '녕하세요 여러분!'


안녕하세요 여러분
녕하세요 여러분!


In [60]:

## 1. Vocabulary set
voc_set = set(input_str_kr+label_str_kr)
char_vocab = sorted(list(voc_set))
vocab_size = len(char_vocab)
print ('문자 집합의 크기 : {}'.format(vocab_size))
print("Vocabulary set = ", voc_set)
print("="*50)


문자 집합의 크기 : 10
Vocabulary set =  {'요', '러', '분', '녕', '여', '세', ' ', '!', '하', '안'}


In [61]:

# 2. 문자 집합에 고유한 정수를 부여
char_to_index = dict((c, i) for i, c in enumerate(char_vocab)) # 문자에 고유한 정수 인덱스 부여
print("Char to index = \n", char_to_index)

index_to_char = dict((i, c) for i, c in enumerate(char_vocab)) # 문자에 고유한 정수 인덱스 부여
print("Idx to char = \n",index_to_char)
print("="*50)


Char to index = 
 {' ': 0, '!': 1, '녕': 2, '러': 3, '분': 4, '세': 5, '안': 6, '여': 7, '요': 8, '하': 9}
Idx to char = 
 {0: ' ', 1: '!', 2: '녕', 3: '러', 4: '분', 5: '세', 6: '안', 7: '여', 8: '요', 9: '하'}


In [62]:

# 3. Label encoding
x_data = [char_to_index[c] for c in input_str_kr]
y_data = [char_to_index[c] for c in label_str_kr]

# 배치 차원 추가
x_data = [x_data]
y_data = [y_data]

print("Label encoding = \n")
print("input_str = \n", x_data)
print("output_str = \n",y_data)
print("="*50)


Label encoding = 

input_str = 
 [[6, 2, 9, 5, 8, 0, 7, 3, 4]]
output_str = 
 [[2, 9, 5, 8, 0, 7, 3, 4, 1]]


In [64]:

# 4. Onehot encoding 
print('input_str One hot encoding =')
# x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
x_one_hot = np.eye(vocab_size)[x_data]
print(x_one_hot)


input_str One hot encoding =
[[[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]]


In [65]:

# 5. Tensor vector
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))
print(X)
print(Y)

훈련 데이터의 크기 : torch.Size([1, 9, 10])
레이블의 크기 : torch.Size([1, 9])
tensor([[[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])
tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]])


In [66]:
# 5. Model
vocab_size = len(char_vocab)
input_size = vocab_size # 입력의 크기는 문자 집합의 크기
hidden_size = 20
output_size = vocab_size
learning_rate = 0.1

class VanillaRNN_Kor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(VanillaRNN_Kor, self).__init__()
        self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True) # RNN 셀 구현
        self.fc = torch.nn.Linear(hidden_size, output_size, bias=True) # 출력층 구현

    def forward(self, x): # 구현한 RNN 셀과 출력층을 연결
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

net = VanillaRNN_Kor(input_size, hidden_size, output_size)
print("RNN parameters = \n", net.parameters)
# list(net.parameters())

outputs = net(X)
print(outputs.shape) # 3차원 텐서
# print(outputs)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

for i in range(100):
    
    outputs = net(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1)) # view를 하는 이유는 Batch 차원 제거를 위해
    # loss = criterion(outputs, Y) # view를 하는 이유는 Batch 차원 제거를 위해, doesnot work

    optimizer.zero_grad()
    loss.backward() # 기울기 계산
    optimizer.step() # 아까 optimizer 선언 시 넣어둔 파라미터 업데이트

    # 아래 세 줄은 모델이 실제 어떻게 예측했는지를 확인하기 위한 코드.
    result = outputs.argmax(axis=2) # 최종 예측값인 각 time-step 별 5차원 벡터에 대해서 가장 높은 값의 인덱스를 선택
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result.numpy())]) # np.squeeze(result.numpy()) : 1d vector
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

RNN parameters = 
 <bound method Module.parameters of VanillaRNN_Kor(
  (rnn): RNN(10, 20, batch_first=True)
  (fc): Linear(in_features=20, out_features=10, bias=True)
)>
torch.Size([1, 9, 10])
0 loss:  2.2905445098876953 prediction:  tensor([[8, 1, 1, 1, 8, 7, 8, 8, 8]]) true Y:  [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str:  요!!!요여요요요
1 loss:  1.9417288303375244 prediction:  tensor([[3, 9, 7, 7, 0, 7, 7, 7, 7]]) true Y:  [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str:  러하여여 여여여여
2 loss:  1.3427523374557495 prediction:  tensor([[2, 9, 5, 8, 0, 7, 3, 9, 1]]) true Y:  [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str:  녕하세요 여러하!
3 loss:  0.6551940441131592 prediction:  tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) true Y:  [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str:  녕하세요 여러분!
4 loss:  0.22501195967197418 prediction:  tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) true Y:  [[2, 9, 5, 8, 0, 7, 3, 4, 1]] prediction str:  녕하세요 여러분!
5 loss:  0.07384788990020752 prediction:  tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]]) 

# LSTM을 이용한 네이버 영화 리뷰 분류

In [67]:
import pandas as pd
from urllib import request

### LSTM 모델 확인하기

In [71]:
## parameter 
input_dim = 5
hidden_size = 3

# (batch_size, time_steps, input_size)
inputs = torch.rand(1, 3, 5)
print(inputs)
print(inputs.dtype)

tensor([[[0.7100, 0.9957, 0.4907, 0.9389, 0.7806],
         [0.3214, 0.4088, 0.0813, 0.1504, 0.1035],
         [0.4708, 0.6658, 0.8689, 0.5481, 0.7580]]])
torch.float32


In [None]:

input_dim = 5
hidden_size = 3
lstm = nn.LSTM(input_dim, hidden_size, batch_first=True)

In [72]:
lstm = nn.LSTM(input_dim, hidden_size, batch_first=True)
print(list(lstm.parameters()))

[Parameter containing:
tensor([[ 0.5397,  0.5626,  0.3286,  0.3228,  0.2959],
        [ 0.0164, -0.2817, -0.3701, -0.3762,  0.5716],
        [ 0.2709, -0.1316, -0.2345,  0.3070, -0.0737],
        [-0.5672,  0.5385,  0.0761, -0.2042,  0.1676],
        [ 0.0562, -0.2714,  0.2077,  0.3398, -0.0219],
        [-0.1687,  0.1323, -0.2917,  0.5124,  0.2712],
        [-0.2456, -0.2141,  0.5528,  0.5555,  0.1807],
        [-0.5257, -0.1093,  0.0794,  0.4418,  0.4990],
        [ 0.1540,  0.5476,  0.5192,  0.3305,  0.3985],
        [-0.4080,  0.1440, -0.2368,  0.4245, -0.5680],
        [-0.1213, -0.3567,  0.1859,  0.3139,  0.2983],
        [ 0.5680, -0.4044,  0.0663, -0.1984,  0.2319]], requires_grad=True), Parameter containing:
tensor([[-0.0327, -0.2666, -0.3233],
        [ 0.0801, -0.3055, -0.4298],
        [ 0.4196, -0.4396, -0.5388],
        [-0.4854, -0.3523, -0.5207],
        [-0.0070, -0.3771, -0.0437],
        [-0.4638,  0.0256, -0.3320],
        [ 0.2137,  0.5540, -0.2854],
        [-0.12

### 데이터 다운 로드

In [64]:
# 데이터 로드하기
# 각 각 ratings_train.txt, ratings_test.txt 저장
request.urlretrieve("https://raw.githubusercontent.com/e9t/nsmc/master/ratings_train.txt", 
                    filename="ratings_train.txt")
request.urlretrieve("https://raw.githubusercontent.com/e9t/nsmc/master/ratings_test.txt", 
                    filename="ratings_test.txt")

('ratings_test.txt', <http.client.HTTPMessage at 0x2c0172d5550>)

In [73]:
train_data = pd.read_table('ratings_train.txt', sep = "\t", nrows = 10000)
test_data = pd.read_table('ratings_test.txt', sep = "\t", nrows = 10000)
print('총 샘플의 수 :',len(train_data))
train_data.head()

총 샘플의 수 : 10000


Unnamed: 0,id,document,label
0,9976970,아 더빙.. 진짜 짜증나네요 목소리,0
1,3819312,흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나,1
2,10265843,너무재밓었다그래서보는것을추천한다,0
3,9045019,교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정,0
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...,1


### 데이터 전처리

In [None]:
train_data.dropna(inplace=True, how = "any")
train_data.drop_duplicates(subset=['document'], inplace=True)
print('총 샘플의 수 :',len(train_data)) 
train_data.head()

총 샘플의 수 : 9918


Unnamed: 0,id,document,label
0,9976970,아 더빙.. 진짜 짜증나네요 목소리,0
1,3819312,흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나,1
2,10265843,너무재밓었다그래서보는것을추천한다,0
3,9045019,교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정,0
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...,1


In [86]:
### 한글과 공백을 제외하고 모두 제거
train_data['document'] = train_data['document'].str.replace("[^ㄱ-ㅎㅏ-ㅣ가-힣 ]"," ",
                                                             regex=True)
train_data.head()

Unnamed: 0,id,document,label
0,9976970,아 더빙 진짜 짜증나네요 목소리,0
1,3819312,흠 포스터보고 초딩영화줄 오버연기조차 가볍지 않구나,1
2,10265843,너무재밓었다그래서보는것을추천한다,0
3,9045019,교도소 이야기구먼 솔직히 재미는 없다 평점 조정,0
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화 스파이더맨에서 늙어보이기만 했던 커스틴 ...,1


In [87]:
### 빈자료 제거
train_data['document'] = train_data['document'].str.strip()
train_data['document'] = train_data['document'].replace('', np.nan)
train_data.dropna(how = 'any', inplace=True)
print(train_data.isnull().sum())
print(train_data.shape) # (145393, 3)

id          0
document    0
label       0
dtype: int64
(9858, 3)


### 토큰화 (Tokenizing)

In [None]:
!pip install konlpy

In [88]:
from konlpy.tag import Okt
from tqdm import tqdm

In [89]:
stopwords = ['도', '는', '다', '의', '가', '이', '은', '한', '에', '하', '고', '을', '를', '인', '듯', '과', '와', '네', '들', '듯', '지', '임', '게']
okt = Okt()
okt.morphs("교도소 이야기구먼 솔직히 재미는 없다평점 조정")

['교도소', '이야기', '구먼', '솔직히', '재미', '는', '없다', '평점', '조정']

In [105]:
X_data = []
for sentence in tqdm(train_data['document']):
    tokenized_sentence = okt.morphs(sentence, stem=True) # 토큰화
    stopwords_removed_sentence = [word for word in tokenized_sentence if not word in stopwords] # 불용어 제거
    X_data.append(stopwords_removed_sentence)

100%|██████████| 9858/9858 [00:46<00:00, 211.89it/s]


In [106]:
y_data = train_data['label']

print(len(X_data))
print(len(y_data))

print(X_data[:5])
print(y_data[:5])

9858
9858
[['아', '더빙', '진짜', '짜증나다', '목소리'], ['흠', '포스터', '보고', '초딩', '영화', '줄', '오버', '연기', '조차', '가볍다', '않다'], ['너', '무재', '밓었', '다그', '래서', '보다', '추천'], ['교도소', '이야기', '구먼', '솔직하다', '재미', '없다', '평점', '조정'], ['사이', '몬페', '그', '익살스럽다', '연기', '돋보이다', '영화', '스파이더맨', '에서', '늙다', '보이다', '하다', '커스틴', '던스트', '너무나도', '이쁘다', '보이다']]
0    0
1    1
2    0
3    0
4    1
Name: label, dtype: int64


### 학습데이터, 검증데이터

In [108]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X_data, y_data, test_size=0.2, 
                                                      random_state=0, stratify=y_data)
print("X_train shape = ", len(X_train))
print("X_valid shape = ", len(X_valid))
print("y_train shape = ", len(y_train))
print("y_valid shape = ", len(y_valid))


X_train shape =  7886
X_valid shape =  1972
y_train shape =  7886
y_valid shape =  1972


### 단어 집합 만들기

In [109]:
from collections import Counter

word_list = []
for sent in X_train:
    for word in sent:
      word_list.append(word)

word_counts = Counter(word_list) # == len(set(word_list))
print('총 단어수 :', len(word_counts))
display(word_counts)


총 단어수 : 10876


Counter({'영화': 2678,
         '보다': 2244,
         '하다': 2241,
         '없다': 858,
         '이다': 733,
         '있다': 675,
         '좋다': 579,
         '정말': 537,
         '너무': 526,
         '만': 497,
         '재밌다': 477,
         '같다': 471,
         '점': 466,
         '되다': 461,
         '적': 456,
         '진짜': 440,
         '아니다': 420,
         '으로': 416,
         '로': 406,
         '않다': 385,
         '나오다': 364,
         '연기': 363,
         '에서': 362,
         '만들다': 335,
         '평점': 334,
         '나': 323,
         '최고': 322,
         '것': 322,
         '내': 318,
         '안': 317,
         '그': 310,
         '못': 293,
         '사람': 280,
         '스토리': 274,
         '드라마': 273,
         '왜': 271,
         '보고': 265,
         '감동': 258,
         '생각': 255,
         '감독': 249,
         '말': 248,
         '이렇다': 247,
         '때': 246,
         'ㅋㅋ': 238,
         '그냥': 237,
         '아깝다': 236,
         '아': 234,
         '거': 229,
         '재미없다': 224,
         '시간': 223,
  

In [110]:
print('훈련 데이터에서의 단어 영화의 등장 횟수 :', word_counts['영화']) # class Counter (= dictionary)
print('훈련 데이터에서의 단어 공감의 등장 횟수 :', word_counts['공감'])

훈련 데이터에서의 단어 영화의 등장 횟수 : 2678
훈련 데이터에서의 단어 공감의 등장 횟수 : 63


In [112]:
vocab = sorted(word_counts, key=word_counts.get, reverse=True)
vocab

['영화',
 '보다',
 '하다',
 '없다',
 '이다',
 '있다',
 '좋다',
 '정말',
 '너무',
 '만',
 '재밌다',
 '같다',
 '점',
 '되다',
 '적',
 '진짜',
 '아니다',
 '으로',
 '로',
 '않다',
 '나오다',
 '연기',
 '에서',
 '만들다',
 '평점',
 '나',
 '최고',
 '것',
 '내',
 '안',
 '그',
 '못',
 '사람',
 '스토리',
 '드라마',
 '왜',
 '보고',
 '감동',
 '생각',
 '감독',
 '말',
 '이렇다',
 '때',
 'ㅋㅋ',
 '그냥',
 '아깝다',
 '아',
 '거',
 '재미없다',
 '시간',
 '배우',
 '더',
 '내용',
 '중',
 '재미',
 '요',
 '자다',
 '지루하다',
 '가다',
 '까지',
 '재미있다',
 '하고',
 '뭐',
 '들다',
 '모르다',
 '주다',
 '작품',
 '쓰레기',
 '수',
 '좀',
 '알다',
 '사랑',
 '하나',
 '볼',
 '싶다',
 '이건',
 'ㅋ',
 '잘',
 '마지막',
 '정도',
 '그렇다',
 '개',
 '차다',
 '액션',
 '연출',
 '돈',
 '이렇게',
 '저',
 '다시',
 '걸',
 '주인공',
 '최악',
 '안되다',
 '지금',
 '완전',
 '기',
 '많다',
 '나다',
 '받다',
 '느낌',
 '오다',
 '처음',
 '장면',
 'ㅠㅠ',
 '역시',
 'ㅋㅋㅋ',
 '별',
 '명작',
 '이야기',
 '인데',
 '라',
 '넘다',
 '별로',
 '부터',
 '일',
 'ㅡㅡ',
 '면',
 '먹다',
 '남다',
 '이나',
 '좋아하다',
 '꼭',
 '괜찮다',
 '년',
 '버리다',
 '또',
 '아름답다',
 '인생',
 '이해',
 '끝',
 '난',
 '느끼다',
 '라고',
 '이영화',
 '이런',
 '무슨',
 '그리고',
 '멋지다',
 '해주다',
 '야',
 '서',
 '전',
 '줄',
 '많이',


In [113]:
### 
word_to_index = {}
word_to_index['<PAD>'] = 0
word_to_index['<UNK>'] = 1

for index, word in enumerate(vocab) :
  word_to_index[word] = index + 2

vocab_size = len(word_to_index)
print('패딩 토큰과 UNK 토큰을 고려한 단어 집합의 크기 :', vocab_size)

패딩 토큰과 UNK 토큰을 고려한 단어 집합의 크기 : 10878


In [114]:
print(word_to_index)
print('단어 <PAD>와 맵핑되는 정수 :', word_to_index['<PAD>'])
print('단어 <UNK>와 맵핑되는 정수 :', word_to_index['<UNK>'])
print('단어 영화와 맵핑되는 정수 :', word_to_index['영화'])

{'<PAD>': 0, '<UNK>': 1, '영화': 2, '보다': 3, '하다': 4, '없다': 5, '이다': 6, '있다': 7, '좋다': 8, '정말': 9, '너무': 10, '만': 11, '재밌다': 12, '같다': 13, '점': 14, '되다': 15, '적': 16, '진짜': 17, '아니다': 18, '으로': 19, '로': 20, '않다': 21, '나오다': 22, '연기': 23, '에서': 24, '만들다': 25, '평점': 26, '나': 27, '최고': 28, '것': 29, '내': 30, '안': 31, '그': 32, '못': 33, '사람': 34, '스토리': 35, '드라마': 36, '왜': 37, '보고': 38, '감동': 39, '생각': 40, '감독': 41, '말': 42, '이렇다': 43, '때': 44, 'ㅋㅋ': 45, '그냥': 46, '아깝다': 47, '아': 48, '거': 49, '재미없다': 50, '시간': 51, '배우': 52, '더': 53, '내용': 54, '중': 55, '재미': 56, '요': 57, '자다': 58, '지루하다': 59, '가다': 60, '까지': 61, '재미있다': 62, '하고': 63, '뭐': 64, '들다': 65, '모르다': 66, '주다': 67, '작품': 68, '쓰레기': 69, '수': 70, '좀': 71, '알다': 72, '사랑': 73, '하나': 74, '볼': 75, '싶다': 76, '이건': 77, 'ㅋ': 78, '잘': 79, '마지막': 80, '정도': 81, '그렇다': 82, '개': 83, '차다': 84, '액션': 85, '연출': 86, '돈': 87, '이렇게': 88, '저': 89, '다시': 90, '걸': 91, '주인공': 92, '최악': 93, '안되다': 94, '지금': 95, '완전': 96, '기': 97, '많다': 98, '나다': 99, '받다': 100, 

### 단어 인코딩 (label)

In [117]:
def texts_to_sequences(tokenized_X_data, word_to_index):
  encoded_X_data = []
  for sent in tokenized_X_data:
    index_sequences = []
    for word in sent:
      try:
          index_sequences.append(word_to_index[word])
      except KeyError:
          index_sequences.append(word_to_index['<UNK>'])
    encoded_X_data.append(index_sequences)
  return encoded_X_data

In [118]:
encoded_X_train = texts_to_sequences(X_train, word_to_index)
encoded_X_valid = texts_to_sequences(X_valid, word_to_index)

[[39,
  1186,
  843,
  976,
  73,
  2771,
  1718,
  272,
  24,
  573,
  49,
  164,
  3651,
  2,
  1279,
  11,
  4,
  49,
  399,
  514,
  5321,
  20,
  37,
  25,
  1949],
 [62, 3652, 1537, 247],
 [136,
  373,
  2,
  192,
  5322,
  202,
  155,
  75,
  1538,
  1950,
  666,
  2772,
  18,
  479,
  1045,
  25,
  5323,
  4],
 [5324,
  3653,
  3654,
  910,
  5325,
  1280,
  185,
  977,
  2773,
  4,
  91,
  113,
  74,
  797,
  74,
  294,
  1046,
  1187,
  1719,
  5326,
  41,
  2774,
  441,
  1720,
  4,
  186,
  80,
  172,
  1721,
  5327,
  2288,
  15,
  104,
  844,
  1539],
 [56,
  74,
  5,
  1951,
  911,
  3655,
  5328,
  452,
  16,
  54,
  798,
  63,
  3,
  912,
  1407,
  124,
  978,
  54,
  165,
  762,
  62,
  1120,
  16,
  85,
  54,
  425,
  65,
  845,
  156,
  92,
  217,
  137,
  5329,
  3656,
  1540,
  1722,
  4,
  7],
 [46, 480, 20, 1121, 227, 89, 353, 11, 3, 187, 176, 1952],
 [64, 273, 70, 164, 200, 26],
 [132, 114, 280, 5],
 [3657, 552, 846, 1047, 391, 3657, 93, 5330, 100, 2, 315, 261,

In [121]:
index_to_word = {}
for key, value in word_to_index.items():
    index_to_word[value] = key

decoded_sample = [index_to_word[word] for word in encoded_X_train[0]]
print('기존의 첫번째 샘플 :', X_train[0])
print('복원된 첫번째 샘플 :', decoded_sample)

기존의 첫번째 샘플 : ['감동', '교육', '계', '비판', '사랑', '손대다', '방향', '모두', '에서', '확실하다', '거', '없이', '어정쩡하다', '영화', '바라보다', '만', '하다', '거', '라면', '대체', '극영화', '로', '왜', '만들다', '거야']
복원된 첫번째 샘플 : ['감동', '교육', '계', '비판', '사랑', '손대다', '방향', '모두', '에서', '확실하다', '거', '없이', '어정쩡하다', '영화', '바라보다', '만', '하다', '거', '라면', '대체', '극영화', '로', '왜', '만들다', '거야']


### Zero 패딩 (Padding)

In [124]:
encoded_X_train[:10]

[[39,
  1186,
  843,
  976,
  73,
  2771,
  1718,
  272,
  24,
  573,
  49,
  164,
  3651,
  2,
  1279,
  11,
  4,
  49,
  399,
  514,
  5321,
  20,
  37,
  25,
  1949],
 [62, 3652, 1537, 247],
 [136,
  373,
  2,
  192,
  5322,
  202,
  155,
  75,
  1538,
  1950,
  666,
  2772,
  18,
  479,
  1045,
  25,
  5323,
  4],
 [5324,
  3653,
  3654,
  910,
  5325,
  1280,
  185,
  977,
  2773,
  4,
  91,
  113,
  74,
  797,
  74,
  294,
  1046,
  1187,
  1719,
  5326,
  41,
  2774,
  441,
  1720,
  4,
  186,
  80,
  172,
  1721,
  5327,
  2288,
  15,
  104,
  844,
  1539],
 [56,
  74,
  5,
  1951,
  911,
  3655,
  5328,
  452,
  16,
  54,
  798,
  63,
  3,
  912,
  1407,
  124,
  978,
  54,
  165,
  762,
  62,
  1120,
  16,
  85,
  54,
  425,
  65,
  845,
  156,
  92,
  217,
  137,
  5329,
  3656,
  1540,
  1722,
  4,
  7],
 [46, 480, 20, 1121, 227, 89, 353, 11, 3, 187, 176, 1952],
 [64, 273, 70, 164, 200, 26],
 [132, 114, 280, 5],
 [3657, 552, 846, 1047, 391, 3657, 93, 5330, 100, 2, 315, 261,

In [125]:
max_len = 30
def pad_sequences(sentences, max_len):
  features = np.zeros((len(sentences), max_len), dtype=int)
  for index, sentence in enumerate(sentences):
    if len(sentence) != 0:
      features[index, :len(sentence)] = np.array(sentence)[:max_len]
  return features

padded_X_train = pad_sequences(encoded_X_train, max_len=max_len)
padded_X_valid = pad_sequences(encoded_X_valid, max_len=max_len)

print('훈련 데이터의 크기 :', padded_X_train.shape)
print('검증 데이터의 크기 :', padded_X_valid.shape)


훈련 데이터의 크기 : (7886, 30)
검증 데이터의 크기 : (1972, 30)


In [126]:
print('첫번째 샘플의 길이 :', len(padded_X_train[0]))
print('첫번째 샘플 :', padded_X_train[0])

첫번째 샘플의 길이 : 30
첫번째 샘플 : [  39 1186  843  976   73 2771 1718  272   24  573   49  164 3651    2
 1279   11    4   49  399  514 5321   20   37   25 1949    0    0    0
    0    0]


### LSTM을 이용한 네이버 영화 리뷰 분류 모델

In [128]:
train_label_tensor = torch.tensor(np.array(y_train))
valid_label_tensor = torch.tensor(np.array(y_valid))

In [131]:
embedding_dim = 100
hidden_dim = 128
output_dim = 2
learning_rate = 0.1
num_epochs = 10

In [132]:
class TextClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(TextClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x: (batch_size, seq_length)
        embedded = self.embedding(x)  # (batch_size, seq_length, embedding_dim)

        # LSTM은 (hidden state, cell state)의 튜플을 반환합니다
        lstm_out, (hidden, cell) = self.lstm(embedded)  # lstm_out: (batch_size, seq_length, hidden_dim), hidden: (1, batch_size, hidden_dim)

        last_hidden = hidden.squeeze(0)  # (batch_size, hidden_dim)
        logits = self.fc(last_hidden)  # (batch_size, output_dim)
        
        return logits


In [134]:
torch.tensor(padded_X_train)

tensor([[  39, 1186,  843,  ...,    0,    0,    0],
        [  62, 3652, 1537,  ...,    0,    0,    0],
        [ 136,  373,    2,  ...,    0,    0,    0],
        ...,
        [ 221,   60,  459,  ...,    0,    0,    0],
        [ 365,  910,  386,  ...,    0,    0,    0],
        [ 312,  361,  235,  ...,    0,    0,    0]], dtype=torch.int32)

In [138]:
# encoded_train = torch.tensor(padded_X_train).to(torch.int64)
encoded_train = torch.tensor(padded_X_train)
train_dataset = torch.utils.data.TensorDataset(encoded_train,   # 2d
                                               train_label_tensor) # 1d
train_dataloader = torch.utils.data.DataLoader(train_dataset, 
                                               shuffle=True, 
                                               batch_size=32)

# encoded_valid = torch.tensor(padded_X_valid).to(torch.int64)
encoded_valid = torch.tensor(padded_X_valid)
valid_dataset = torch.utils.data.TensorDataset(encoded_valid, 
                                               valid_label_tensor)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, 
                                               shuffle=True, 
                                               batch_size=1)


In [139]:
model = TextClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)
model.to(device)

TextClassifier(
  (embedding): Embedding(10878, 100)
  (lstm): LSTM(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

In [140]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [141]:
def calculate_accuracy(logits, labels):
    # _, predicted = torch.max(logits, 1)
    predicted = torch.argmax(logits, dim=1)
    correct = (predicted == labels).sum().item()
    total = labels.size(0)
    accuracy = correct / total
    return accuracy

In [146]:
def evaluate(model, valid_dataloader, criterion, device):
    val_loss = 0
    val_correct = 0
    val_total = 0

    model.eval()
    with torch.no_grad():
        # 데이터로더로부터 배치 크기만큼의 데이터를 연속으로 로드
        for batch_X, batch_y in valid_dataloader:
            batch_X, batch_y = batch_X.to(device), batch_y.long().to(device)

            # 모델의 예측값
            logits = model(batch_X)

            # 손실을 계산
            loss = criterion(logits, batch_y)

            # 정확도와 손실을 계산함
            val_loss += loss.item()
            val_correct += calculate_accuracy(logits, batch_y) * batch_y.size(0)
            val_total += batch_y.size(0)

    val_accuracy = val_correct / val_total
    val_loss /= len(valid_dataloader)

    return val_loss, val_accuracy


In [143]:
num_epochs = 20

# Training loop
best_val_loss = float('inf')

# Training loop
for epoch in range(num_epochs):
    # Training
    train_loss = 0
    train_correct = 0
    train_total = 0
    model.train()
    for batch_X, batch_y in tqdm(train_dataloader):
        # Forward pass
        batch_X, batch_y = batch_X.to(device), batch_y.long().to(device)
        # batch_X.shape == (batch_size, max_len)
        # batch_y = batch_y.long()
        logits = model(batch_X)

        # Compute loss
        loss = criterion(logits, batch_y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate training accuracy and loss
        train_loss += loss.item()
        train_correct += calculate_accuracy(logits, batch_y) * batch_y.size(0)
        train_total += batch_y.size(0)

    train_accuracy = train_correct / train_total
    train_loss /= len(train_dataloader)

    val_loss, val_accuracy = evaluate(model, valid_dataloader, criterion, device)

    print(f'Epoch {epoch+1}/{num_epochs}:')
    print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # 검증 손실이 최소일 때 체크포인트 저장
    if val_loss < best_val_loss:
        print(f'Validation loss improved from {best_val_loss:.4f} to {val_loss:.4f}. 체크포인트를 저장합니다.')
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model_checkpoint.pth')


100%|██████████| 247/247 [00:01<00:00, 225.75it/s]


Epoch 1/20:
Train Loss: 0.0097, Train Accuracy: 0.9980
Validation Loss: 1.2554, Validation Accuracy: 0.7556
Validation loss improved from inf to 1.2554. 체크포인트를 저장합니다.


100%|██████████| 247/247 [00:01<00:00, 241.60it/s]


Epoch 2/20:
Train Loss: 0.0079, Train Accuracy: 0.9985
Validation Loss: 1.4400, Validation Accuracy: 0.7581


100%|██████████| 247/247 [00:01<00:00, 152.97it/s]


Epoch 3/20:
Train Loss: 0.0112, Train Accuracy: 0.9970
Validation Loss: 1.1156, Validation Accuracy: 0.7601
Validation loss improved from 1.2554 to 1.1156. 체크포인트를 저장합니다.


100%|██████████| 247/247 [00:01<00:00, 239.18it/s]


Epoch 4/20:
Train Loss: 0.0137, Train Accuracy: 0.9963
Validation Loss: 1.2092, Validation Accuracy: 0.7541


100%|██████████| 247/247 [00:01<00:00, 167.19it/s]


Epoch 5/20:
Train Loss: 0.0295, Train Accuracy: 0.9906
Validation Loss: 1.0336, Validation Accuracy: 0.7465
Validation loss improved from 1.1156 to 1.0336. 체크포인트를 저장합니다.


100%|██████████| 247/247 [00:01<00:00, 238.53it/s]


Epoch 6/20:
Train Loss: 0.0132, Train Accuracy: 0.9966
Validation Loss: 1.3661, Validation Accuracy: 0.7581


100%|██████████| 247/247 [00:01<00:00, 181.97it/s]


Epoch 7/20:
Train Loss: 0.0096, Train Accuracy: 0.9970
Validation Loss: 1.3687, Validation Accuracy: 0.7612


100%|██████████| 247/247 [00:01<00:00, 226.56it/s]


Epoch 8/20:
Train Loss: 0.0097, Train Accuracy: 0.9968
Validation Loss: 1.3568, Validation Accuracy: 0.7698


100%|██████████| 247/247 [00:01<00:00, 238.93it/s]


Epoch 9/20:
Train Loss: 0.0130, Train Accuracy: 0.9958
Validation Loss: 1.2913, Validation Accuracy: 0.7556


100%|██████████| 247/247 [00:01<00:00, 238.29it/s]


Epoch 10/20:
Train Loss: 0.0073, Train Accuracy: 0.9977
Validation Loss: 1.4615, Validation Accuracy: 0.7520


100%|██████████| 247/247 [00:00<00:00, 261.97it/s]


Epoch 11/20:
Train Loss: 0.0042, Train Accuracy: 0.9989
Validation Loss: 1.4505, Validation Accuracy: 0.7627


100%|██████████| 247/247 [00:01<00:00, 232.29it/s]


Epoch 12/20:
Train Loss: 0.0042, Train Accuracy: 0.9987
Validation Loss: 1.4539, Validation Accuracy: 0.7632


100%|██████████| 247/247 [00:01<00:00, 244.68it/s]


Epoch 13/20:
Train Loss: 0.0035, Train Accuracy: 0.9990
Validation Loss: 1.5447, Validation Accuracy: 0.7667


100%|██████████| 247/247 [00:00<00:00, 253.20it/s]


Epoch 14/20:
Train Loss: 0.0035, Train Accuracy: 0.9987
Validation Loss: 1.5068, Validation Accuracy: 0.7622


100%|██████████| 247/247 [00:00<00:00, 248.80it/s]


Epoch 15/20:
Train Loss: 0.0031, Train Accuracy: 0.9989
Validation Loss: 1.8140, Validation Accuracy: 0.7667


100%|██████████| 247/247 [00:00<00:00, 250.06it/s]


Epoch 16/20:
Train Loss: 0.0036, Train Accuracy: 0.9991
Validation Loss: 1.6003, Validation Accuracy: 0.7612


100%|██████████| 247/247 [00:00<00:00, 252.45it/s]


Epoch 17/20:
Train Loss: 0.0026, Train Accuracy: 0.9991
Validation Loss: 1.7392, Validation Accuracy: 0.7652


100%|██████████| 247/247 [00:01<00:00, 242.97it/s]


Epoch 18/20:
Train Loss: 0.0038, Train Accuracy: 0.9986
Validation Loss: 1.6365, Validation Accuracy: 0.7622


100%|██████████| 247/247 [00:00<00:00, 255.37it/s]


Epoch 19/20:
Train Loss: 0.0026, Train Accuracy: 0.9990
Validation Loss: 1.6332, Validation Accuracy: 0.7586


100%|██████████| 247/247 [00:00<00:00, 263.38it/s]


Epoch 20/20:
Train Loss: 0.0032, Train Accuracy: 0.9989
Validation Loss: 1.4980, Validation Accuracy: 0.7546


### 모델 로드 및 평가

In [147]:
# 모델 로드
model.load_state_dict(torch.load('best_model_checkpoint.pth'))

# 모델을 device에 올립니다.
model.to(device)


TextClassifier(
  (embedding): Embedding(10878, 100)
  (lstm): LSTM(100, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)

In [148]:
# 검증 데이터에 대한 정확도와 손실 계산
val_loss, val_accuracy = evaluate(model, valid_dataloader, criterion, device)

print(f'Best model validation loss: {val_loss:.4f}')
print(f'Best model validation accuracy: {val_accuracy:.4f}')


Best model validation loss: 1.0336
Best model validation accuracy: 0.7465
