In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### A simple example of LSTM

In [21]:
torch.manual_seed(1)

lstm = nn.LSTM(3, 3) # Input dim is 3, output dim is 3

# torch.randn(sizes) 返回一个张量，包含了标准正态分布随机数
# shape of inputs: (5, 1, 3)
inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5

# initialize the hidden state
# shape of one of hidden: ((1, 1, 3), (1, 1, 3))
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    # shape of out: (1, 1, 3), shape of hidden: ((1, 1, 3), (1, 1, 3))
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# shape: (5, 1, 3)
inputs = torch.cat(inputs).view(len(inputs), 1, -1)

hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3)) # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]])
(tensor([[[-0.3368,  0.0959, -0.0538]]]), tensor([[[-0.9825,  0.4715, -0.0633]]]))


### An LSTM for Part-of-Speech Tagging

In [4]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])    
]

word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

print(word_to_ix)
tag_to_ix = {'DET': 0, 'NN': 1, 'V': 2}

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [5]:
class LSTMTagger(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tager_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)