导入包

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

定义语句及词性

In [2]:
train_data = [("The cat ate the fish".split(), ["DET", "NN", "V", "DET", "NN"]), 
             ("They read that book".split(), ["NN", "V", "DET", "NN"])]

test_data = [("They ate the fish".split())]

构建每个单词的索引字典

In [3]:
word_to_ix = {}
for sent, tags in train_data :
    for word in sent :
        if word not in word_to_ix :
            word_to_ix[word] = len(word_to_ix)
word_to_ix

{'The': 0,
 'cat': 1,
 'ate': 2,
 'the': 3,
 'fish': 4,
 'They': 5,
 'read': 6,
 'that': 7,
 'book': 8}

手工设置词性的索引字典

In [4]:
tag_to_ix = {"DET" : 0, "NN" : 1, "V" : 2}

构建网络

In [5]:
class LSTMTagger(nn.Module) :
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size) :
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self) :
        return (torch.zeros(1, 1, self.hidden_dim), torch.zeros(1, 1, self.hidden_dim))
    
    def forward(self, sentence) :
        embeds = self.word_embedding(sentence)
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out[ : , -1, : ])
        tag_score = F.log_softmax(tag_space, dim = 1)
        
        return tag_score

将数据转换成torch.LongTensor张量

In [6]:
def prepare_sequence(seq, to_ix) :
    idxs = [to_ix[w] for w in seq] 
    tensor = torch.LongTensor(idxs)
    return tensor

定义超参数，实例化模型，损失函数，优化器等。

In [7]:
EMBEDDING_DIM = 10
HIDDEN_DIM = 3
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

训练

In [8]:
for epoch in range(400) :
    for sentence, tags in train_data :
        model.zero_grad()
        model.hidden = model.init_hidden()
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
#         print(targets)
        
        tag_score = model(sentence_in)
        loss = loss_function(tag_score, targets)
        loss.backward()
        optimizer.step()
        
inputs = prepare_sequence(train_data[0][0], word_to_ix)
tag_score = model(inputs)
_, pred = torch.max(tag_score, 1)
pred = pred.data.numpy()
# print(pred.dtype)
print(train_data[0][0])
print(tag_score)
print(torch.max(tag_score, 1))

print(train_data[0][1])

['The', 'cat', 'ate', 'the', 'fish']
tensor([[-0.3272, -1.4703, -3.0117],
        [-4.8951, -0.0264, -3.9855],
        [-5.0057, -2.9583, -0.0604],
        [-0.0183, -4.7081, -4.6945],
        [-4.8977, -0.0199, -4.4018]], grad_fn=<LogSoftmaxBackward0>)
torch.return_types.max(
values=tensor([-0.3272, -0.0264, -0.0604, -0.0183, -0.0199], grad_fn=<MaxBackward0>),
indices=tensor([0, 1, 2, 0, 1]))
['DET', 'NN', 'V', 'DET', 'NN']


测试模型

In [9]:
test_inputs = prepare_sequence(test_data[0], word_to_ix)
tag_score = model(test_inputs)
print(test_data[0])
print(test_inputs)
print(tag_score)
print(torch.max(tag_score, 1))

['They', 'ate', 'the', 'fish']
tensor([5, 2, 3, 4])
tensor([[-6.2753, -0.0133, -4.4787],
        [-4.9165, -2.6408, -0.0819],
        [-0.0186, -4.4871, -4.9349],
        [-4.8952, -0.0196, -4.4273]], grad_fn=<LogSoftmaxBackward0>)
torch.return_types.max(
values=tensor([-0.0133, -0.0819, -0.0186, -0.0196], grad_fn=<MaxBackward0>),
indices=tensor([1, 2, 0, 1]))


In [10]:
prediction = torch.rand((1, 3))
target = torch.tensor([1])

Crossentropy = nn.CrossEntropyLoss()
loss = Crossentropy(prediction, target)
print(loss)

loss = F.log_softmax(prediction)
nll = nn.NLLLoss()
loss = nll(loss, target)
print(loss)

tensor(1.1135)
tensor(1.1135)


  loss = F.log_softmax(prediction)
