## Recurrent Neural Network with LSTM layers
### Remixed into PyTorch from

https://machinelearningmastery.com/develop-word-based-neural-language-models-python-keras/

In [2]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [3]:
def onehot(values, num_classes):
    output = torch.zeros((len(values), num_classes))
    output[np.arange(len(values)), values] = 1.
    return output

In [4]:
def split_words(paragraph):
    return paragraph.replace('\n', ' ').lower().split()

In [5]:
def generate_encoding(tokens):
    counts = {}
    for token in tokens:
        if token not in counts:
            counts[token] = 0
            
        counts[token] += 1
    
    sorted_tokens = sorted(counts.keys(), key=counts.get, reverse=True)

    encoding = dict(zip(sorted_tokens, range(1, 1+len(sorted_tokens))))

    return encoding

def tokenize(tokens, encoding):
    output = []
    for token in tokens:
        output.append(encoding[token])
    return output

In [6]:
def pre_pad(sequences, max_length, pad_value=0):
    padded = torch.full((len(sequences), max_length), pad_value, dtype=torch.long)
    for i, sequence in enumerate(sequences):
        padded[i, -len(sequence):] = torch.Tensor(sequence)

    return padded

In [7]:
def train_model(model, loss, optimizer, X, y, n_epoch=500):
    for epoch in range(n_epoch):

        total_loss = 0
        for i, inputs in enumerate(X):
            labels = y[i]

            optimizer.zero_grad()

            outputs = model(inputs)
            outputs = outputs.reshape((1, -1))

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if epoch % (n_epoch // 10) == (n_epoch // 10) - 1:
            print(f'{epoch+1:4.0f} | loss: {total_loss / 2000:.3f}')
            total_loss = 0.0
        
    return model

### Model 1: One word in, one word out

In [8]:
def word_by_word_seq(model, encoding, seed_text, n_words):
    curr_text, result = seed_text, seed_text

    for _ in range(n_words):
        # Text -> Int
        encoded = tokenize(split_words(curr_text), encoding)
        encoded = torch.Tensor(encoded).reshape((1, -1)).to(torch.long)

        # predict word
        word_vec = model.forward(encoded).argmax()

        # predicted idx -> word
        for word, idx in encoding.items():
            if idx == word_vec:
                curr_text = word
                break

        #
        result += f" {curr_text}"

    return result

In [9]:
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """

# Text -> Integer
encoding = generate_encoding(split_words(data))
encoded = tokenize(split_words(data), encoding)

vocab_size = len(encoding) + 1
print(f"Vocabulary Size: {vocab_size}")

# Word : Word sequences
sequences = [encoded[i-1:i+1] for i in range(1, len(encoded))]
sequences = torch.Tensor(sequences).to(torch.long)
print(f"Total Sequences: {sequences.shape[0]}")

X, y = sequences[:, 0], sequences[:, 1]
y = onehot(y, num_classes=vocab_size)

Vocabulary Size: 22
Total Sequences: 24


In [10]:
# Model
class RNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.l1 = nn.Embedding(vocab_size, 10)
        self.l2 = nn.LSTM(10, 50)
        self.l3 = nn.Linear(50, vocab_size)

    def forward(self, x):
        x = self.l1(x)
        x, _ = self.l2(x)
        x = F.tanh(x)
        x = F.sigmoid(self.l3(x))
        return x

model = RNN()
print(repr(model))

RNN(
  (l1): Embedding(22, 10)
  (l2): LSTM(10, 50)
  (l3): Linear(in_features=50, out_features=22, bias=True)
)


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.001)

In [12]:
# Train
model = train_model(model, criterion, optimizer, X.reshape((-1, 1, 1)), y.argmax(1).reshape((-1, 1)).to(torch.long), n_epoch=500)

  50 | loss: 0.029
 100 | loss: 0.027
 150 | loss: 0.026
 200 | loss: 0.026
 250 | loss: 0.026
 300 | loss: 0.026
 350 | loss: 0.026
 400 | loss: 0.026
 450 | loss: 0.026
 500 | loss: 0.026


In [13]:
print(word_by_word_seq(model, encoding, 'Jack', 6))

Jack and jill went up the hill


### Model 2: Line by line sequence

In [14]:
def seq_to_word_seq(model, encoding, max_length, seed_text, n_words):
    in_text = seed_text

    for _ in range(n_words):
        encoded = tokenize(split_words(in_text), encoding)
        encoded = pre_pad([encoded], max_length)

        yhat = model.forward(encoded)
        print(yhat)
        out_word = ''
        for word, index in encoding.items():
            if index == yhat:
                out_word = word
                break

        in_text += f" {out_word}"

    return in_text

In [15]:
data = """ Jack and Jill went up the hill\n
		To fetch a pail of water\n
		Jack fell down and broke his crown\n
		And Jill came tumbling after\n """

# Text -> Integer
encoding = generate_encoding(split_words(data))
encoded = tokenize(split_words(data), encoding)

vocab_size = len(encoding) + 1
print(f"Vocabulary Size: {vocab_size}")

# Line based sequences
sequences = list()
for line in data.split('\n'):
    encoded = tokenize(split_words(line), encoding)
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

# Pad
max_length = max([len(seq) for seq in sequences])
sequences = pre_pad(sequences, max_length)
print('Max Sequence Length: %d' % max_length)

X, y = sequences[:,:-1], sequences[:,-1]
y = onehot(y, num_classes=vocab_size)

Vocabulary Size: 22
Total Sequences: 21
Max Sequence Length: 7


In [16]:
# Model
class RNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.l1 = nn.Embedding(vocab_size, 10)
        self.l2 = nn.LSTM(10, 50)
        self.l3 = nn.Linear(50, vocab_size)

    def forward(self, x):
        x = self.l1(x)
        x, _ = self.l2(x)
        x = F.tanh(x)
        x = F.sigmoid(self.l3(x))
        return x

model = RNN()
print(repr(model))

RNN(
  (l1): Embedding(22, 10)
  (l2): LSTM(10, 50)
  (l3): Linear(in_features=50, out_features=22, bias=True)
)


In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.001)

In [18]:
# Train
model = train_model(model, criterion, optimizer, X.reshape((-1, 1, max_length)), y.argmax(1).reshape((-1, 1)).to(torch.long), n_epoch=500)

  50 | loss: 0.040
 100 | loss: 0.039
 150 | loss: 0.039
 200 | loss: 0.039
 250 | loss: 0.039
 300 | loss: 0.039
 350 | loss: 0.039
 400 | loss: 0.039
 450 | loss: 0.039
 500 | loss: 0.039


In [19]:
print(seq_to_word_seq(model, encoding, max_length-1, 'Jack', 4))
print(seq_to_word_seq(model, encoding, max_length-1, 'Jill', 4))

tensor([[[1.7461e-05, 9.9994e-01, 1.7323e-05, 1.0000e+00, 9.9961e-01,
          1.5241e-06, 1.4573e-04, 1.5090e-04, 1.7473e-05, 9.9959e-01,
          9.9951e-01, 1.5208e-04, 1.0835e-04, 9.9952e-01, 1.5560e-04,
          9.9962e-01, 1.3832e-04, 9.9957e-01, 1.4960e-06, 1.8293e-05,
          1.7916e-05, 1.7286e-05],
         [1.7461e-05, 9.9994e-01, 1.7323e-05, 1.0000e+00, 9.9961e-01,
          1.5241e-06, 1.4573e-04, 1.5090e-04, 1.7473e-05, 9.9959e-01,
          9.9951e-01, 1.5208e-04, 1.0835e-04, 9.9952e-01, 1.5560e-04,
          9.9962e-01, 1.3832e-04, 9.9957e-01, 1.4960e-06, 1.8293e-05,
          1.7916e-05, 1.7286e-05],
         [1.7461e-05, 9.9994e-01, 1.7323e-05, 1.0000e+00, 9.9961e-01,
          1.5241e-06, 1.4573e-04, 1.5090e-04, 1.7473e-05, 9.9959e-01,
          9.9951e-01, 1.5208e-04, 1.0835e-04, 9.9952e-01, 1.5560e-04,
          9.9962e-01, 1.3832e-04, 9.9957e-01, 1.4960e-06, 1.8293e-05,
          1.7916e-05, 1.7286e-05],
         [1.7461e-05, 9.9994e-01, 1.7323e-05, 1.0000e+0

RuntimeError: Boolean value of Tensor with more than one value is ambiguous