## Recurrent Neural Network with LSTM layers
### Remixed into PyTorch from

https://machinelearningmastery.com/develop-word-based-neural-language-models-python-keras/

In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
def onehot(values, num_classes):
    output = torch.zeros((len(values), num_classes))
    output[np.arange(len(values)), values] = 1.
    return output

In [3]:
def split_words(paragraph):
    return paragraph.replace('\n', ' ').lower().split()

In [4]:
def generate_encoding(tokens):
    counts = {}
    for token in tokens:
        if token not in counts:
            counts[token] = 0
            
        counts[token] += 1
    
    sorted_tokens = sorted(counts.keys(), key=counts.get, reverse=True)

    encoding = dict(zip(sorted_tokens, range(1, 1+len(sorted_tokens))))

    return encoding

def tokenize(tokens, encoding):
    output = []
    for token in tokens:
        output.append(encoding[token])
    return output

In [5]:
def generate_seq(model, encoding, seed_text, n_words):
    curr_text, result = seed_text, seed_text

    for _ in range(n_words):
        # Text -> Int
        encoded = tokenize(split_words(curr_text), encoding)
        encoded = torch.Tensor(encoded).reshape((1, -1)).to(torch.long)

        # predict word
        word_vec = model.forward(encoded).argmax()

        # predicted idx -> word
        for word, idx in encoding.items():
            if idx == word_vec:
                curr_text = word
                break

        #
        result += f" {curr_text}"

    return result

### Model 1: One word in, one word out

In [6]:
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """

# Text -> Integer
encoding = generate_encoding(split_words(data))
encoded = tokenize(split_words(data), encoding)

vocab_size = len(encoding) + 1
print(f"Vocabulary Size: {vocab_size}")

# Word : Word sequences
sequences = [encoded[i-1:i+1] for i in range(1, len(encoded))]
sequences = torch.Tensor(sequences).to(torch.long)
print(f"Total Sequences: {sequences.shape[0]}")

X, y = sequences[:, 0], sequences[:, 1]
y = onehot(y, num_classes=vocab_size)

Vocabulary Size: 22
Total Sequences: 24


In [7]:
# Model
class RNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.l1 = nn.Embedding(vocab_size, 10)
        self.l2 = nn.LSTM(10, 50)  # NOTE Recurrent actiavtion = sigmoid
        self.l3 = nn.Linear(50, vocab_size)

    def forward(self, x):
        x = self.l1(x)
        x, _ = self.l2(x)
        x = F.tanh(x)
        x = F.sigmoid(self.l3(x))
        return x

model = RNN()
print(repr(model))

RNN(
  (l1): Embedding(22, 10)
  (l2): LSTM(10, 50)
  (l3): Linear(in_features=50, out_features=22, bias=True)
)


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.001)

In [11]:
# Train
for epoch in range(500):

    total_loss = 0
    for i, inputs in enumerate(X.reshape((-1, 1, 1))):
        labels = y[i].reshape((-1)).argmax().reshape(1)

        optimizer.zero_grad()

        outputs = model(inputs)
        outputs = outputs.reshape((1, -1))

        loss = criterion(outputs, labels.to(torch.long))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    if epoch % 25 == 24:
        print(f'{epoch+1:4.0f} | loss: {total_loss / 2000:.3f}')
        total_loss = 0.0

  25 | loss: 0.026
  50 | loss: 0.026
  75 | loss: 0.026
 100 | loss: 0.026
 125 | loss: 0.026
 150 | loss: 0.026
 175 | loss: 0.026
 200 | loss: 0.026
 225 | loss: 0.026
 250 | loss: 0.026
 275 | loss: 0.026
 300 | loss: 0.026
 325 | loss: 0.026
 350 | loss: 0.026
 375 | loss: 0.026
 400 | loss: 0.026
 425 | loss: 0.026
 450 | loss: 0.026
 475 | loss: 0.026
 500 | loss: 0.026


In [12]:
#
print(generate_seq(model, encoding, 'Jack', 6))

Jack and jill came tumbling after crown
