In [68]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x15b31edf110>

In [70]:
# lstm = nn.LSTM(3,3)
# inputs = [torch.randn(1,3) for _ in range(5)]
# hidden_1 = torch.randn(1, 1, 3)
# hidden_2 = torch.randn(1, 1, 3)
# hidden = (
#     hidden_1,
#     hidden_2
# )
# for i in inputs:
#     out, hidden = lstm(i.view(1, 1, -1), hidden)
#     print(f"Out: {out}")
#     print(f"Hidden: {hidden}")

# inputs = torch.cat(inputs).view(len(inputs), 1, -1)
# hidden = (hidden_1, hidden_2)
# out, hidden = lstm(inputs, hidden)
# print(f"Out: {out}")
# print(f"Hidden_1: {hidden[0]}")

In [94]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read the book".split(), ["NN", "V", "DET", "NN"])
]

word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

print(word_to_ix)
tag_to_ix = {"DET": 0, "NN":1, "V":2}

EMBEDDING_DIM = 6
HIDDEN_DIM = 3
    

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'book': 7}


In [95]:
class LSTMTagger(nn.Module):
    """Some Information about LSTMTagger"""
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        print(f"LSTM input shape: {embeds.view(len(sentence),1,-1).shape}")
        lstm_out, _ = self.lstm(embeds.view(len(sentence),1,-1))
        print(f"LSTM output shape: {lstm_out.shape}")
        
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

        

In [96]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)


LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
tensor([[-1.5289, -1.1466, -0.7647],
        [-1.6191, -1.0554, -0.7899],
        [-1.4730, -1.0420, -0.8723],
        [-1.4912, -0.9880, -0.9098],
        [-1.5096, -1.0932, -0.8122]])


In [97]:
for epoch in range(300):
    for sentence, tags in training_data:
        model.zero_grad()
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
        tag_scores = model(sentence_in)
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)


LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
LSTM input shape: torch.Size([4, 1, 6])
LSTM output shape: torch.Size([4, 1, 3])
LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
LSTM input shape: torch.Size([4, 1, 6])
LSTM output shape: torch.Size([4, 1, 3])
LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
LSTM input shape: torch.Size([4, 1, 6])
LSTM output shape: torch.Size([4, 1, 3])
LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
LSTM input shape: torch.Size([4, 1, 6])
LSTM output shape: torch.Size([4, 1, 3])
LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
LSTM input shape: torch.Size([4, 1, 6])
LSTM output shape: torch.Size([4, 1, 3])
LSTM input shape: torch.Size([5, 1, 6])
LSTM output shape: torch.Size([5, 1, 3])
LSTM input shape: torch.Size([4, 1, 6])
LSTM output shape: torch.Size([4, 1, 3])
LSTM input shape: torch.Size

In [98]:
print(model.parameters)

<bound method Module.parameters of LSTMTagger(
  (word_embeddings): Embedding(8, 6)
  (lstm): LSTM(6, 3)
  (hidden2tag): Linear(in_features=3, out_features=3, bias=True)
)>


In [99]:
for name, param in model.named_parameters():
    print(f"Name: {name}, size: {param.size()}, values: {param[:2]}")

Name: word_embeddings.weight, size: torch.Size([8, 6]), values: tensor([[ 0.4914,  1.3239, -0.0324,  0.9681,  0.8729, -0.3303],
        [ 0.0134, -1.9481, -0.0861, -0.1936,  0.0443, -0.9951]],
       grad_fn=<SliceBackward0>)
Name: lstm.weight_ih_l0, size: torch.Size([12, 6]), values: tensor([[-0.2571, -0.4307, -0.3790, -0.1553,  0.5264,  0.0968],
        [-0.0364,  0.8907, -0.5684, -0.0628, -0.4939, -0.5911]],
       grad_fn=<SliceBackward0>)
Name: lstm.weight_hh_l0, size: torch.Size([12, 3]), values: tensor([[ 0.4737,  0.1235, -0.4917],
        [ 0.6304, -0.5039,  0.7168]], grad_fn=<SliceBackward0>)
Name: lstm.bias_ih_l0, size: torch.Size([12]), values: tensor([ 0.5138, -0.2785], grad_fn=<SliceBackward0>)
Name: lstm.bias_hh_l0, size: torch.Size([12]), values: tensor([ 0.6383, -0.5067], grad_fn=<SliceBackward0>)
Name: hidden2tag.weight, size: torch.Size([3, 3]), values: tensor([[-2.0949,  1.0853, -1.7926],
        [ 2.6875,  1.1739,  1.4454]], grad_fn=<SliceBackward0>)
Name: hidden2ta