# Speech tagging with RNN

In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

train = [
    ("I want chocolate latte".lower().split(), ["N", "V", "N", "N"]),
    ("He study the computer science".lower().split(), ["N", "V", "D", "N", "N"]),
    ("She loves coding".lower().split(), ["N", "V", "N"]),
    ("Jun loves me".lower().split(), ["N", "V", "N"]), 
]

test = [
    ("Jun study coding".lower().split(), ["N", "V", "N"]),
    ("Jun loves me".lower().split(), ["N", "V", "N"]),
    ("She loves latte".lower().split(), ["N", "V", "N"]),
    ("Computer science want chocolate study".lower().split(), ["N", "N", "V", "N", "V"]),
]

word2idx = {}
for sentence, tags in train:
    for word in sentence:
        if word not in word2idx:
            word2idx[word] = len(word2idx)

tag2idx = {"D": 0, "N": 1, "V": 2}
print(word2idx)

{'i': 0, 'want': 1, 'chocolate': 2, 'latte': 3, 'he': 4, 'study': 5, 'the': 6, 'computer': 7, 'science': 8, 'she': 9, 'loves': 10, 'coding': 11, 'jun': 12, 'me': 13}


In [6]:
def sequence_to_tensor(sentence, word2idx):
    idxs = np.array([word2idx[word] for word in sentence])
    return torch.from_numpy(idxs)

# check out what prepare_sequence does for one of our training sentences:
example = sequence_to_tensor("Jun want study computer science".lower().split(), word2idx)
print(example)

tensor([12,  1,  5,  7,  8])


# Create model and define loss and optimizer

# NLLLoss is the negative log likelihood loss function
텐서에 log 확률값이 들어있는 상황에서 사용하며, 이는 모델의 출력 단에 nn.LogSoftmax와 같은 함수를 적용하여 log 확률 형태로 변환한 뒤, 해당 log 확률과 실제 정답 레이블과 비교하여 손실을 계산하는 구조입니다.
로그가능도가 클 수록(가능도가 높을 수록) 모델이 데이터를 잘 표현하고 있는 것이므로 이를 최소화 문제로 바꿔 optimizer를 적용하려면 음수를 취해 최소화 문제로 바꿔서 사용합니다.

nn.CrossEntropyLoss 내부에서는 자동으로 nn.LogSoftmax를 적용하고 nn.NLLLoss를 계산하는 과정을 함께 처리합니다.
즉, 별도로 log_softmax를 적용해야 하는지(= NLLLoss를 써야 하는지), 혹은 그냥 한 번에 CrossEntropyLoss를 쓸지는 모델의 출력 형태에 달려 있습니다.

아래 모델에서는 `F.log_softmax(tag_outputs, dim=1)` 을 마지막 단에 적용하여 Log 확률 분포를 출력하므로 NLLLoss를 사용하여 음의 로그 가능도 기반으로 학습을 할 수 있습니다.


In [29]:
class Model(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        ''' Initialize the layers of this model.'''
        super(Model, self).__init__()
        
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        
        self.hidden = self.init_hidden()

        
    def init_hidden(self):
        # (n_layers, batch_size, hidden_dim)
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))

    def forward(self, sentence):
        ''' Define the feedforward behavior of the model.'''
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
        
        tag_outputs = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_outputs, dim=1)
        
        return tag_scores


model = Model(embedding_dim=5, hidden_dim=5, vocab_size=len(word2idx), tagset_size=len(tag2idx))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

test_sentence = "The cheese loves the elephant".lower().split()

tag_scores = model(example)
print(tag_scores)
_, predicted_tags = torch.max(tag_scores, 1)
print('\n')
print('Predicted tags: \n',predicted_tags)

tensor([[-1.0164, -1.2582, -1.0386],
        [-1.0163, -1.2089, -1.0802],
        [-1.0908, -1.2276, -0.9914],
        [-1.1119, -1.2051, -0.9905],
        [-1.0996, -1.2283, -0.9830]], grad_fn=<LogSoftmaxBackward0>)


Predicted tags: 
 tensor([0, 0, 2, 2, 2])


In [30]:
# normally these epochs take a lot longer 
# but with our toy data (only 3 sentences), we can do many epochs in a short time
n_epochs = 300

for epoch in range(n_epochs):
    
    epoch_loss = 0.0
    
    # get all sentences and corresponding tags in the training data
    for sentence, tags in train:
        
        # zero the gradients
        model.zero_grad()

        # zero the hidden state of the LSTM, this detaches it from its history
        model.hidden = model.init_hidden()

        # prepare the inputs for processing by out network, 
        # turn all sentences and targets into Tensors of numerical indices
        sentence_in = sequence_to_tensor(sentence, word2idx)
        targets = sequence_to_tensor(tags, tag2idx)

        # forward pass to get tag scores
        tag_scores = model(sentence_in)

        # compute the loss, and gradients 
        loss = loss_function(tag_scores, targets)
        epoch_loss += loss.item()
        loss.backward()
        
        # update the model parameters with optimizer.step()
        optimizer.step()
        
    # print out avg loss per 20 epochs
    if(epoch%20 == 19):
        print("Epoch: %d, loss: %1.5f" % (epoch + 1, epoch_loss/len(train)))


test_sentence = "Jun loves coding".lower().split()

# see what the scores are after training
inputs = sequence_to_tensor(test_sentence, word2idx)
tag_scores = model(inputs)
print(tag_scores)

# print the most likely tag index, by grabbing the index with the maximum score!
# recall that these numbers correspond to tag2idx = {"DET": 0, "NN": 1, "V": 2}
_, predicted_tags = torch.max(tag_scores, 1)
print('\n')
print('Predicted tags: \n',predicted_tags)

idx2tag = {v: k for k, v in tag2idx.items()}
for sentence, tags in test:
    inputs = sequence_to_tensor(sentence, word2idx)
    tag_scores = model(inputs)
    _, predicted_tags = torch.max(tag_scores, 1)
    print(f"{sentence} -> {tags} : {[idx2tag[idx] for idx in predicted_tags.cpu().tolist()]}")


Epoch: 20, loss: 0.00379
Epoch: 40, loss: 0.00130
Epoch: 60, loss: 0.00079
Epoch: 80, loss: 0.00051
Epoch: 100, loss: 0.00037
Epoch: 120, loss: 0.00026
Epoch: 140, loss: 0.00018
Epoch: 160, loss: 0.00014
Epoch: 180, loss: 0.00012
Epoch: 200, loss: 0.00010
Epoch: 220, loss: 0.00009
Epoch: 240, loss: 0.00007
Epoch: 260, loss: 0.00006
Epoch: 280, loss: 0.00005
Epoch: 300, loss: 0.00005
tensor([[-1.7831e+01, -1.9073e-05, -1.0869e+01],
        [-1.3935e+01, -1.1782e+01, -8.5830e-06],
        [-1.3681e+01, -1.3113e-06, -1.6356e+01]],
       grad_fn=<LogSoftmaxBackward0>)


Predicted tags: 
 tensor([1, 2, 1])
['jun', 'study', 'coding'] -> ['N', 'V', 'N'] : ['N', 'V', 'N']
['jun', 'loves', 'me'] -> ['N', 'V', 'N'] : ['N', 'V', 'N']
['she', 'loves', 'latte'] -> ['N', 'V', 'N'] : ['V', 'V', 'N']
['computer', 'science', 'want', 'chocolate', 'study'] -> ['N', 'N', 'V', 'N', 'V'] : ['N', 'N', 'V', 'N', 'V']
