In [13]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [14]:
dtype = torch.FloatTensor

In [28]:
# Bo-LSTM(Attention) parameters
embedding_dim = 2
n_hidden = 5 # number of hidden units in one cell
num_classes = 2 # 0 or 1
batch_size = 6

In [29]:
# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", 'shee likes baseball','I hate you','sorry for that','this is awful']
labels = [1,1,1,0,0,0] # 1: 긍정 0: 부정

In [30]:
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
vocab_size = len(word_dict)


In [31]:
inputs = []
for sen in sentences:
    inputs.append(np.asarray([word_dict[n] for n in sen.split()]))

In [32]:
targets = []
for out in labels:
    targets.append(out) # To using Torch Softmax Loss function

In [33]:
targets

[1, 1, 1, 0, 0, 0]

In [34]:
input_batch = Variable(torch.LongTensor(inputs))
target_batch = Variable(torch.LongTensor(targets))

In [35]:
input_batch

tensor([[ 7,  8, 16],
        [ 5, 15,  3],
        [ 1, 14,  6],
        [13,  9, 16],
        [ 4, 10,  2],
        [11,  0, 12]])

In [36]:
len(input_batch)

6

In [37]:
input_batch.size()

torch.Size([6, 3])

In [38]:
class BiLSTM_Attention(nn.Module):
    def __init__(self):
        super(BiLSTM_Attention,self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm  = nn.LSTM(embedding_dim, n_hidden, bidirectional= True)
        self.out = nn.Linear(n_hidden*2, num_classes)
        
    # lstm_output : [batch_size, n_step, n_hidden*num_directions(=2)], F matrix
    def attention_net(self, lstm_output, final_state):
        
        hidden = final_state.view(-1,n_hidden*2,1) # hidden:[batch_size, n_hidden*num_directions(=2), 1(=n_layer)]

        attn_weights = torch.bmm(lstm_output,hidden).squeeze(2)      # attn_weights:[batch_size, len_seq]
        soft_attn_weights = F.softmax(attn_weights,1)                # soft_attn_weights : [batch_size, len_seq, 1]
        
        context = torch.bmm(lstm_output.transpose(1,2), soft_attn_weights.unsqueeze(2)).squeeze(2) # lstm_output.transpose(1,2): [batch_size, n_hidden, len_seq]
        return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]
    
    def forward(self, X):
        input = self.embedding(X) # input :[batch_size, len_seq, n_hidden]
        input = input.permute(1,0,2) # input : [len_seq, batch_size, n_hidden]
        
        hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden)) # [num_layers(=1)*num_directions(=2),batch_size,n_hidden] 0으로 초기화
        cell_state = Variable(torch.zeros(1*2,len(X), n_hidden))    # [num_layers(=1)*num_directions(=2), batch_size, n_hidden] 0으로 초기화
        
        output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
        
        output = output.permute(1,0,2) # output : [batch_size, len_seq, n_hidden*2]
        attn_output, attention = self.attention_net(output, final_hidden_state)
        return self.out(attn_output), attention # model: [batch_size, num_classes], attention: [batch_size, n_step]
    
model = BiLSTM_Attention()
    
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [39]:
# Training
for epoch in range(5000):
    optimizer.zero_grad()
    output,attention = model(input_batch)
    loss = criterion(output, target_batch)
    if (epoch +1) % 1000 == 0:
        print('Epoch: ', "%04d" % (epoch + 1), 'cost = ', '{:.6f}'.format(loss))
        
    loss.backward()
    optimizer.step()


Epoch:  1000 cost =  0.000084
Epoch:  2000 cost =  0.000024
Epoch:  3000 cost =  0.000010
Epoch:  4000 cost =  0.000005
Epoch:  5000 cost =  0.000003


In [40]:
# Test
test_text = 'sorry hate you'
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = Variable(torch.LongTensor(tests))

In [41]:
test_batch.size()

torch.Size([1, 3])

In [42]:
model(test_batch)

(tensor([[ 7.6350, -7.6986]], grad_fn=<AddmmBackward>),
 array([[0.65199715, 0.32035777, 0.02764505]], dtype=float32))

In [43]:
predict,_ = model(test_batch)
predict = predict.data.max(1,keepdim=True)[1]
if predict[0][0] == 0:
    print(test_text,"is Bad Mean...")
else:
    print(test_text,"is Good Mean!!")
    
    

sorry hate you is Bad Mean...
