## 1. Import libraries

In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pickle
import data_utils
from data_utils import *

## 2. Preparing data

In [9]:
with open('word_dict.pickle', 'rb') as handle:
    word_dict = pickle.load(handle)
    
train_x, train_y = build_word_dataset('train', word_dict,50) ##최대 50 token 나머지는 패딩
test_x, test_y = build_word_dataset('test', word_dict,50)

  warn('"Twitter" has changed to "Okt" since KoNLPy v0.4.5.')


In [12]:
# Bo-LSTM(Attention) parameters
embedding_dim = 256
n_hidden = 128 # number of hidden units in one cell
num_classes = 2 # 0 or 1
batch_size = 64
num_epochs = 5
seq_len = 50
vocab_size = len(word_dict)

In [13]:
class BiLSTM_Attention(nn.Module):
    def __init__(self):
        super(BiLSTM_Attention,self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm  = nn.LSTM(embedding_dim, n_hidden, bidirectional= True)
        self.out = nn.Linear(n_hidden*2, num_classes)
        
    # lstm_output : [batch_size, n_step, n_hidden*num_directions(=2)], F matrix
    def attention_net(self, lstm_output, final_state):
        
        hidden = final_state.view(-1,n_hidden*2,1) # hidden:[batch_size, n_hidden*num_directions(=2), 1(=n_layer)]

        attn_weights = torch.bmm(lstm_output,hidden).squeeze(2)      # attn_weights:[batch_size, len_seq]
        soft_attn_weights = F.softmax(attn_weights,1)                # soft_attn_weights : [batch_size, len_seq, 1]
        
        context = torch.bmm(lstm_output.transpose(1,2), soft_attn_weights.unsqueeze(2)).squeeze(2) # lstm_output.transpose(1,2): [batch_size, n_hidden, len_seq]
        return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]
    
    def forward(self, X):
        input = self.embedding(X) # input :[batch_size, len_seq, n_hidden]
        input = input.permute(1,0,2) # input : [len_seq, batch_size, n_hidden]
        
        hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden)) # [num_layers(=1)*num_directions(=2),batch_size,n_hidden] 0으로 초기화
        cell_state = Variable(torch.zeros(1*2,len(X), n_hidden))    # [num_layers(=1)*num_directions(=2), batch_size, n_hidden] 0으로 초기화
        
        output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
        
        output = output.permute(1,0,2) # output : [batch_size, len_seq, n_hidden*2]
        attn_output, attention = self.attention_net(output, final_hidden_state)
        return self.out(attn_output), attention # model: [batch_size, num_classes], attention: [batch_size, n_step]
    
model = BiLSTM_Attention()
    
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [None]:
# Training
train_batches = batch_iter(train_x,train_y,batch_size,num_epochs)
num_batches_per_epochs = (len(train_x)-1) // batch_size +1
step = 0
running_loss = 0
    
for x_batch,y_batch in train_batches:
        
    #FeedForward
        
    step += 1
    x_batch = Variable(torch.LongTensor(x_batch))
    y_batch = Variable(torch.LongTensor(y_batch))
    output, attention = model(x_batch)
    loss = criterion(output,y_batch)
        
    #Backprop and update weight
       
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
        
    #loss 출력
    running_loss += loss.item()
    if (step % 100 == 0):
        print("Loss: {}".format(running_loss/100),'Step: {}'.format(step))
        running_loss = 0

Loss: 0.4268805567920208 Step: 100
Loss: 0.5120247811079025 Step: 200
Loss: 0.48895525962114333 Step: 300
Loss: 0.46548069149255755 Step: 400
Loss: 0.4339281308650971 Step: 500
Loss: 0.4307232415676117 Step: 600
Loss: 0.4171468159556389 Step: 700
Loss: 0.42360946282744405 Step: 800
Loss: 0.42844311326742174 Step: 900
Loss: 0.4167817008495331 Step: 1000
Loss: 0.4175323759019375 Step: 1100
Loss: 0.4243927384912968 Step: 1200
Loss: 0.42445930540561677 Step: 1300
Loss: 0.413636591732502 Step: 1400
Loss: 0.41041492521762846 Step: 1500
Loss: 0.4123778466880321 Step: 1600
Loss: 0.41610816329717637 Step: 1700
Loss: 0.4009539279341698 Step: 1800
Loss: 0.40881083846092225 Step: 1900
Loss: 0.39300848796963694 Step: 2000
Loss: 0.4137266191840172 Step: 2100
Loss: 0.41189706161618234 Step: 2200
Loss: 0.38762944623827933 Step: 2300
Loss: 0.3676491089165211 Step: 2400
Loss: 0.352407388985157 Step: 2500
Loss: 0.36746615439653396 Step: 2600
Loss: 0.3631678073108196 Step: 2700
Loss: 0.35467688605189324 S