This is the Pytorch LSTM tutorial code

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x291dfd721f0>

In [9]:
lstm = nn.LSTM(3, 3)

inputs = [torch.randn(1, 3) for _ in range(5)]

hidden_orig = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))

print("1 at a time")

hidden = (hidden_orig[0].clone(), hidden_orig[1].clone())

for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print(out)

print("All at once")

hidden = (hidden_orig[0].clone(), hidden_orig[1].clone())

inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))
out, hidden = lstm(inputs, hidden)
print(out)

1 at a time
tensor([[[ 0.0487, -0.0815,  0.0485]]], grad_fn=<StackBackward>)
tensor([[[0.0622, 0.1425, 0.2317]]], grad_fn=<StackBackward>)
tensor([[[0.0100, 0.0433, 0.2201]]], grad_fn=<StackBackward>)
tensor([[[-0.0230,  0.0558,  0.2682]]], grad_fn=<StackBackward>)
tensor([[[-0.0705, -0.1328,  0.1770]]], grad_fn=<StackBackward>)
All at once
tensor([[[-0.1812, -0.2320,  0.1414]],

        [[-0.1048, -0.1775,  0.4293]],

        [[-0.0504, -0.0545,  0.3019]],

        [[-0.0522,  0.0229,  0.3295]],

        [[-0.0771, -0.1430,  0.1931]]], grad_fn=<StackBackward>)


In [10]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [14]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
    
    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [16]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_func = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

for epoch in range(300):
    for sentence, tags in training_data:
        
        model.zero_grad()
        
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
        
        tag_scores = model(sentence_in)
        
        loss = loss_func(tag_scores, targets)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

tensor([[-1.1744, -1.2153, -0.9304],
        [-1.2151, -1.2137, -0.9009],
        [-1.2834, -1.2965, -0.7998],
        [-1.1434, -1.1411, -1.0166],
        [-1.2393, -1.2144, -0.8830]])
tensor([[-0.0197, -4.1972, -5.4001],
        [-4.4541, -0.0188, -4.9650],
        [-3.9025, -4.2872, -0.0345],
        [-0.0230, -4.2312, -4.7976],
        [-4.4421, -0.0229, -4.5176]])


From this blog post
https://blog.floydhub.com/long-short-term-memory-from-zero-to-hero-with-pytorch/

In [1]:
import torch
import torch.nn as nn

In [2]:
input_dim = 5
hidden_dim = 10
n_layers = 2

lstm_layer = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)

In [5]:
batch_size = 3
seq_len = 4

input = torch.randn(batch_size, seq_len, input_dim)
hidden_state = torch.randn(n_layers, batch_size, hidden_dim)
cell_state = torch.randn(n_layers, batch_size, hidden_dim)
hidden = (hidden_state, cell_state)

In [10]:
out, hidden = lstm_layer(input, hidden)
print(out.shape)
out = out[-1]
print(out.shape)
print(hidden)

torch.Size([3, 4, 10])
torch.Size([4, 10])
(tensor([[[ 0.0330, -0.0881, -0.0078, -0.1163,  0.0874,  0.0760, -0.0197,
          -0.0803, -0.1018,  0.0603],
         [-0.1470, -0.0346,  0.0479, -0.0801,  0.0127,  0.1999,  0.0457,
           0.0786, -0.0935,  0.0099],
         [-0.1871,  0.2550,  0.0072,  0.0504, -0.2007,  0.1470,  0.0471,
          -0.0156, -0.1872, -0.0632]],

        [[ 0.1027, -0.1853, -0.1014, -0.0551, -0.1181, -0.0447,  0.1280,
           0.1501, -0.0275, -0.1062],
         [ 0.1135, -0.1897, -0.1240, -0.0356, -0.1347, -0.0254,  0.1359,
           0.1381, -0.0315, -0.1119],
         [ 0.1088, -0.1700, -0.1523, -0.0407, -0.1275, -0.0368,  0.0894,
           0.1293, -0.0243, -0.1123]]], grad_fn=<StackBackward>), tensor([[[ 0.0521, -0.2119, -0.0128, -0.2276,  0.1629,  0.3429, -0.0384,
          -0.1511, -0.2867,  0.0971],
         [-0.2274, -0.0660,  0.1111, -0.1635,  0.0197,  0.4681,  0.1017,
           0.1376, -0.2106,  0.0196],
         [-0.3255,  0.4055,  0.0214,  