# 0.1 라이브러리 임포트

In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.autograd import Variable
import numpy as np

In [2]:
!pip install torchtext==0.4.0



# 0.2 하이퍼파라미터 셋팅

In [3]:
# Hyper-parameters
batch_size = 128
num_epochs = 10

word_vec_size = 256
dropout_p = 0.3

hidden_size = 512
num_layers = 4

## yhk 추가
learning_rate = 0.001  # 디폴트 0.001

In [4]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1. SMS train, test dataset 가져오기

In [5]:
from data_loader import DataLoader

In [6]:
loaders = DataLoader(
    train_fn = './sms.maxlen.uniq.shuf.train.tsv',
    batch_size = batch_size,
    valid_ratio = .2,
    device = -1,
    max_vocab = 999999,
    min_freq = 5,
)

In [7]:
test_loaders = DataLoader(
    train_fn = './sms.maxlen.uniq.shuf.test.tsv',
    batch_size = batch_size,
    valid_ratio = .01,  # 모두 train
    device = -1,
    max_vocab = 999999,
    min_freq = 5,
)

# 2. 대략적인 데이터 형태

In [8]:
print("|train| =", len(loaders.train_loader.dataset),
      '|valid| =', len(loaders.valid_loader.dataset))

vocab_size = len(loaders.text.vocab)
num_classes = len(loaders.label.vocab)
print('|vocab| =', vocab_size, '|classes| =', num_classes)

|train| = 3722 |valid| = 931
|vocab| = 1564 |classes| = 2


# 3. 데이터 로드함수
## 학습시킬 때 batch_size 단위로 끊어서 로드하기 위함
## 데이터 로드함수 이해하기

In [9]:
n = 3  # 샘플로 그려볼 데이터 개수
for i, data in enumerate(loaders.train_loader):
    labels = data.label
    texts = data.text

    if i > n:
        break
    print("[%d]" %i)
    print("한 번에 로드되는 데이터 크기:", len(labels))

    # 출력
    for j in range(n):
        label = labels[j].numpy()  # tensor -> numpy 로 변환
        text = texts[j].numpy()
        print("label: ", label)
        print("text: ", text.shape)

[0]
한 번에 로드되는 데이터 크기: 128
label:  0
text:  (7,)
label:  0
text:  (7,)
label:  0
text:  (7,)
[1]
한 번에 로드되는 데이터 크기: 128
label:  0
text:  (6,)
label:  0
text:  (6,)
label:  0
text:  (6,)
[2]
한 번에 로드되는 데이터 크기: 128
label:  0
text:  (24,)
label:  0
text:  (24,)
label:  1
text:  (24,)
[3]
한 번에 로드되는 데이터 크기: 128
label:  1
text:  (23,)
label:  0
text:  (23,)
label:  0
text:  (23,)


# 4. 모델 선언

In [10]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self,
                 input_size,      # vocab_size
                 word_vec_size,   # word embbeding vector 차원
                 hidden_size,     # bidirectional LSTM 의 hidden state & cell state 의 size
                 n_classes,
                 num_layers = 4,  # 쌓을 레이어 개수
                 dropout_p = 0.3
                 ):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.word_vec_size = word_vec_size
        self.hidden_size = hidden_size
        self.n_classes = n_classes
        self.num_layers = num_layers
        self.dropout_p = dropout_p

        # 입력 차원(vocab_size), 출력 차원(word_vec_size)
        self.emb = nn.Embedding(input_size, word_vec_size)  # 부터!

        self.lstm = nn.LSTM(input_size = word_vec_size,
                            hidden_size = hidden_size,
                            num_layers = num_layers,
                            dropout = dropout_p,  # 얼만큼 끌지. 디폴트 0
                            batch_first = True,
                            bidirectional = True)
        self.fc = nn.Linear(hidden_size * 2, n_classes)
        # LogSoftmax + NLLLoss instead of Softmax + CrossEntropy
        self.activation = nn.LogSoftmax(dim = -1)  # 마지막차원에 softmax 씌워줌

    def forward(self, x):
        # x: (batch_size, length)
        x = self.emb(x)

        # x: (batch_size, length, word_vec_size)
        x, _ = self.lstm(x)  # x: output, _: 마지막 time step의 hidden state & cell state

        # x: (batch_size, length, hidden_size * 2)
        # x[:, -1]: (batch_size, 1, hidden_size * 2)
        out = self.activation(self.fc(x[:, -1]))  # 마지막 time step
        # self.fc(x[:, -1]): (batch_size, num_classes)

        return out

In [11]:
model = RNN(input_size = vocab_size,
            word_vec_size = word_vec_size,
            hidden_size = hidden_size,
            n_classes = num_classes,
            num_layers = num_layers,
            dropout_p = dropout_p).to(device)

In [12]:
def ComputeAccr(dloader, imodel):
    correct = 0
    total = 0

    model.eval()  # test mode
    for i, data in enumerate(dloader):  # batch_size 만큼
        texts = data.text.to(device)  # (batch_size, length)
        labels = data.label.to(device)  # (batch_size, num_classes)

        # Forward prop.
        output = model(texts)  # (batch_size, num_classes)
        _, output_index = torch.max(output, 1)  # (batch_size, 1)

        total += labels.size(0)
        correct += (output_index == labels).sum().float()
    #print("Accuracy of Test Data: {}".format(100 * correct / total))

    model.train()
    return (100 * correct / total).numpy()  # tensor -> numpy

In [13]:
print("Accuracy of Test Data: %.2f" %ComputeAccr(loaders.valid_loader, model))

Accuracy of Test Data: 15.57


# 5. loss, optimizer

In [14]:
# Loss and optimizer
# loss_func = nn.CrossEntropyLoss()
loss_func = nn.NLLLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# 6. 학습

In [16]:
# Train the model
total_step = len(loaders.train_loader)
for epoch in range(num_epochs):
    for i, data in enumerate(loaders.train_loader):
        texts = data.text.to(device)  # (batch_size, length)
        labels = data.label.to(device)  # (batch_size, num_classes)

        print("[%d]" %i)

        # Forward prop.
        outputs = model(texts)
        loss = loss_func(outputs, labels)

        # Backward prop. & optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 10 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accr: {:.2f}'
                  .format(epoch + 1, num_epochs, i + 1, total_step,
                          loss.item(),
                          ComputeAccr(loaders.valid_loader, model)))

[0]
[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]
Epoch [1/10], Step [10/30], Loss: 0.4040, Accr: 87.54
[10]
[11]
[12]
[13]
[14]
[15]
[16]
[17]
[18]
[19]
Epoch [1/10], Step [20/30], Loss: 0.2672, Accr: 87.54
[20]
[21]
[22]
[23]
[24]
[25]
[26]
[27]
[28]
[29]
Epoch [1/10], Step [30/30], Loss: 0.8364, Accr: 87.54
[0]
[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]
Epoch [2/10], Step [10/30], Loss: 0.2233, Accr: 87.54
[10]
[11]
[12]
[13]
[14]
[15]
[16]
[17]
[18]
[19]
Epoch [2/10], Step [20/30], Loss: 0.2384, Accr: 87.54
[20]
[21]
[22]
[23]
[24]
[25]
[26]
[27]
[28]
[29]
Epoch [2/10], Step [30/30], Loss: 0.9934, Accr: 87.54
[0]
[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]
Epoch [3/10], Step [10/30], Loss: 0.0829, Accr: 87.54
[10]
[11]
[12]
[13]
[14]
[15]
[16]
[17]
[18]
[19]
Epoch [3/10], Step [20/30], Loss: 0.1494, Accr: 87.54
[20]
[21]
[22]
[23]
[24]
[25]
[26]
[27]
[28]
[29]
Epoch [3/10], Step [30/30], Loss: 0.2840, Accr: 87.43
[0]
[1]
[2]
[3]
[4]
[5]
[6]
[7]
[8]
[9]
Epoch [4/10], Step [10/30], Loss: 0.9117, Accr: 87.43


# 7. 테스트

In [17]:
print("Accuracy of Valid Data: %.2f" %ComputeAccr(loaders.valid_loader, model))

Accuracy of Valid Data: 96.89


# 8. 학습된 파라미터 저장

In [21]:
netname = './nets/rnn_weight.pkl'
torch.save(model, netname)

# 9. 학습된 파라미터 로드
## 실무에서 학습된(pretrained) 파라미터 로드하고 싶다면: 5, 6, 8 과정 생략한 채 실행

In [22]:
netname = './nets/rnn_weight.pkl'
model = torch.load(netname)

In [23]:
print("Accuracy of Valid Data: %.2f" %ComputeAccr(loaders.valid_loader, model))

Accuracy of Valid Data: 96.89
