In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import nltk


torch.manual_seed(1)

<torch._C.Generator at 0x7fa7b0aa21d0>

In [3]:
# Setting up an LSTM as a basic tutorial. A top-down approach. 

# Tutorial found on PyTorch: https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html

lstm = nn.LSTM(3, 3) # Set input im as 3 and output dim as 3

In [4]:
inputs = [torch.randn(1,3) for _ in range(5)]
inputs # Sequence of length 5

[tensor([[-0.5525,  0.6355, -0.3968]]),
 tensor([[-0.6571, -1.6428,  0.9803]]),
 tensor([[-0.0421, -0.8206,  0.3133]]),
 tensor([[-1.1352,  0.3773, -0.2824]]),
 tensor([[-2.5667, -1.4303,  0.5009]])]

In [5]:
hidden = (torch.randn(1,1,3),
          torch.randn(1,1,3))

for ii in inputs:
    # Step through the sequence one element at a time
    # Hidden contains hidden state after each step
    out, hidden = lstm(ii.view(1,1,-1), hidden) 
    print(out, hidden)

tensor([[[-0.2682,  0.0304, -0.1526]]], grad_fn=<StackBackward0>) (tensor([[[-0.2682,  0.0304, -0.1526]]], grad_fn=<StackBackward0>), tensor([[[-1.0766,  0.0972, -0.5498]]], grad_fn=<StackBackward0>))
tensor([[[-0.5370,  0.0346, -0.1958]]], grad_fn=<StackBackward0>) (tensor([[[-0.5370,  0.0346, -0.1958]]], grad_fn=<StackBackward0>), tensor([[[-1.1552,  0.1214, -0.2974]]], grad_fn=<StackBackward0>))
tensor([[[-0.3947,  0.0391, -0.1217]]], grad_fn=<StackBackward0>) (tensor([[[-0.3947,  0.0391, -0.1217]]], grad_fn=<StackBackward0>), tensor([[[-1.0727,  0.1104, -0.2179]]], grad_fn=<StackBackward0>))
tensor([[[-0.1854,  0.0740, -0.0979]]], grad_fn=<StackBackward0>) (tensor([[[-0.1854,  0.0740, -0.0979]]], grad_fn=<StackBackward0>), tensor([[[-1.0530,  0.1836, -0.1731]]], grad_fn=<StackBackward0>))
tensor([[[-0.3600,  0.0893,  0.0215]]], grad_fn=<StackBackward0>) (tensor([[[-0.3600,  0.0893,  0.0215]]], grad_fn=<StackBackward0>), tensor([[[-1.1298,  0.4467,  0.0254]]], grad_fn=<StackBackward

In [6]:
# Using an LSTM for POS tagging

def prep_seq(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)



In [47]:
training_data = zip_object[1:100]
word_to_ix = {}

for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:  
            word_to_ix[word] = len(word_to_ix)  
#print(word_to_ix)
tag_to_ix = {"ADJ": 0, "ADP": 1, "ADV": 2, "CONJ":3, "DET":4, "NOUN":5, "NUM":6, "PRT":7, "PRON":8,
             "VERB":9, ".":10, "X":11}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [48]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)


        self.lstm = nn.LSTM(embedding_dim, hidden_dim)


        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [49]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


with torch.no_grad():
    inputs = prep_seq(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

for epoch in range(100):  
    for sentence, tags in training_data:
        
        model.zero_grad()


        sentence_in = prep_seq(sentence, word_to_ix)
        targets = prep_seq(tags, tag_to_ix)


        tag_scores = model(sentence_in)


        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()


with torch.no_grad():
    inputs = prep_seq(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

tensor([[-2.3495, -2.2596, -2.6705, -2.8485, -2.2588, -2.8152, -2.5928, -2.8620,
         -2.1860, -2.4379, -2.4248, -2.4247],
        [-2.2112, -2.2662, -2.6749, -2.9363, -2.3828, -2.7587, -2.7073, -2.9391,
         -2.1649, -2.4900, -2.3206, -2.3691],
        [-2.3951, -2.3126, -2.7538, -2.7509, -2.2297, -2.7975, -2.6108, -2.8141,
         -2.2154, -2.3957, -2.4084, -2.4064],
        [-2.3913, -2.1993, -2.5799, -2.9197, -2.2754, -2.8156, -2.5082, -2.9434,
         -2.1399, -2.4680, -2.4516, -2.4940],
        [-2.4136, -2.2777, -2.6898, -2.8358, -2.1953, -2.8558, -2.5945, -2.7952,
         -2.2150, -2.3867, -2.3995, -2.4658],
        [-2.4158, -2.2605, -2.6351, -2.8462, -2.2117, -2.8272, -2.5450, -2.8392,
         -2.1799, -2.4229, -2.4395, -2.5002],
        [-2.3626, -2.1972, -2.5301, -2.9704, -2.3089, -2.7904, -2.5379, -2.9664,
         -2.0923, -2.5040, -2.4166, -2.5497],
        [-2.4242, -2.2510, -2.5840, -2.8824, -2.2458, -2.7855, -2.5346, -2.8901,
         -2.1271, -2.4513, -2.

In [22]:
# Set up my own training data using NLTK

b = nltk.corpus.brown.tagged_words(tagset='universal')

In [25]:
b

[('The', 'DET'), ('Fulton', 'NOUN'), ...]

In [16]:
training_data

[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [27]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [28]:
words_b = [item[0] for item in b]
pos_b = [item[1] for item in b]

In [29]:
chunk_words = chunks(words_b, 25)
chunk_pos = chunks(pos_b, 25)


In [30]:
zip_object = list(zip(chunk_words, chunk_pos))

In [35]:
training_data = zip_object[1:1000]