In [1]:
import os
import sys
sys.path.append('..')

In [2]:
import import_ipynb
from utils.dataset_loader import CreateDataset
from utils.training import Learning

importing Jupyter notebook from ..\utils\dataset_loader.ipynb
importing Jupyter notebook from ..\utils\training.ipynb


In [3]:
import torch
from torch import nn
from torch import optim

import random
import numpy as np

from tqdm import tqdm

In [4]:
### cpu, gpu 선택
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 불용어 사용 여부
use_stopword = True

### batch_size
batch_size = 32

In [5]:
### 미리 만들어둔 데이터셋을 가져옴
dataset = CreateDataset(device=device, use_stopword=use_stopword)

### 데이터셋에서 iterator만 뽑아냄
train_iterator, valid_iterator, test_iterator = dataset.get_iterator(batch_size=batch_size)

In [6]:
### Encoder 단순하게 LSTM으로만 이루어져 있음
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, num_layers=n_layers, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        embedded = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedded)
        return hidden, cell

In [7]:
### Decoder 단순하게 LSTM으로만 이루어져 있으며, Encoder로 부터 context vector를 전달 받음(hidden, cell)
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, num_layers=n_layers, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        embedded = self.dropout(self.embedding(x))
        outputs, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        output = self.fc_out(outputs).squeeze(0)
        return output, hidden, cell

In [8]:
### Encoder는 한번에 학습이 가능하지만 Decoder는 recursive하게 하나씩 예측해야한다.
### 학습시에 모든 label 데이터를 넣어서 output를 뽑아내어 for문 없이 한번에 처리 할 수 있지만
### 하나씩 예측하며 예측값을 가지고 다음 step의 token을 예측하는 방식으로 이용하고 있다,
### Inference 시에도 해당 코드 이용가능
class Seq2Seq(nn.Module):
    def __init__(self, enc, dec, device):
        super().__init__()
        self.enc = enc
        self.dec = dec
        self.device = device
        self.output_dim = dec.output_dim
        
    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        hidden, cell = self.enc(src)
        
        trg_len = trg.shape[0]
        batch_size = trg.shape[1]
        output_dim = self.output_dim
        
        outputs = torch.zeros(trg_len, batch_size, output_dim).to(self.device)
        
        dec_input = trg[0]
        for t in range(1, trg_len):
            output, hidden, cell = self.dec(dec_input, hidden, cell)
            outputs[t] = output
            
            top1 = torch.argmax(output, dim=1)
            
            dec_input = top1 if random.random() > teacher_forcing_ratio else trg[t]
            
        return outputs

In [11]:
input_dim = len(dataset.SRC.vocab)
output_dim = len(dataset.TRG.vocab)
emb_dim = 256
hid_dim = 512
n_layers = 2
dropout = 0.1
clip = 1

In [12]:
enc = Encoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
dec = Decoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
model = Seq2Seq(enc, dec, device).to(device)
epochs = 10

In [13]:
pad_index = dataset.TRG.vocab.stoi[dataset.TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index=pad_index)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [14]:
model

Seq2Seq(
  (enc): Encoder(
    (embedding): Embedding(7854, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.1)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (dec): Decoder(
    (embedding): Embedding(5893, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.1)
    (dropout): Dropout(p=0.1, inplace=False)
    (fc_out): Linear(in_features=512, out_features=5893, bias=True)
  )
)

In [15]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 13,898,757 trainable parameters


In [16]:
learn = Learning()

for epoch in range(epochs):
    model, train_loss = learn.train(model, criterion, optimizer, train_iterator, clip)
    eval_loss = learn.evaluation(model, criterion, valid_iterator)
    print(train_loss, eval_loss)

100%|████████████████████████████████████████████████████████████████████████████████| 907/907 [01:08<00:00, 13.15it/s]


NameError: name 'np' is not defined