**Getting the Data from Site**

In [5]:
import urllib3

# Create a PoolManager instance
http = urllib3.PoolManager()

# Specify the URL you want to request
url = "https://www.gutenberg.org/cache/epub/11/pg11-images.html" #link to the Book
url_2 = "https://www.gutenberg.org/cache/epub/11/pg11.txt" #Link of plain text of the above mentioned book

# Send an HTTP GET request
Data = http.request("GET", url_2)

# Read and print the response content
Data = Data.data.decode('utf-8')


In [6]:
import numpy as np

# load ascii text and covert to lowercase
raw_text = Data
raw_text = raw_text.lower()

# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  167711
Total Vocab:  66


**Prepare the dataset of input to output pairs encoded as integers**

In [7]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  167611


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

# reshape X to be [samples, time steps, features]
X = torch.tensor(dataX, dtype=torch.float32).reshape(n_patterns, seq_length, 1)
X = X / float(n_vocab)
y = torch.tensor(dataY)
print(X.shape, y.shape)

torch.Size([167611, 100, 1]) torch.Size([167611])


In [9]:
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

class CharModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, n_vocab)
    def forward(self, x):
        x, _ = self.lstm(x)
        # take only the last output
        x = x[:, -1, :]
        # produce output
        x = self.linear(self.dropout(x))
        return x

In [8]:
n_epochs = 50
batch_size = 128
model = CharModel()

optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss(reduction="sum")
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)

best_model = None
best_loss = np.inf
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    model.eval()
    loss = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)
            loss += loss_fn(y_pred, y_batch)
        if loss < best_loss:
            best_loss = loss
            best_model = model.state_dict()
        print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))

torch.save([best_model, char_to_int], "single-char.pth")

Epoch 0: Cross-entropy: 479210.7812
Epoch 1: Cross-entropy: 457004.2500
Epoch 2: Cross-entropy: 443024.7188
Epoch 3: Cross-entropy: 431901.3125
Epoch 4: Cross-entropy: 422259.3750
Epoch 5: Cross-entropy: 414639.7500
Epoch 6: Cross-entropy: 408252.2812
Epoch 7: Cross-entropy: 397344.5000
Epoch 8: Cross-entropy: 391695.0938
Epoch 9: Cross-entropy: 383857.7188
Epoch 10: Cross-entropy: 377220.1562
Epoch 11: Cross-entropy: 370299.9062
Epoch 12: Cross-entropy: 364141.9062
Epoch 13: Cross-entropy: 357340.0625
Epoch 14: Cross-entropy: 353803.5938
Epoch 15: Cross-entropy: 348545.3750
Epoch 16: Cross-entropy: 340985.1562
Epoch 17: Cross-entropy: 338929.0000
Epoch 18: Cross-entropy: 332969.2500
Epoch 19: Cross-entropy: 328854.5625
Epoch 20: Cross-entropy: 324042.2188
Epoch 21: Cross-entropy: 320053.8125
Epoch 22: Cross-entropy: 315985.5625
Epoch 23: Cross-entropy: 312282.5000
Epoch 24: Cross-entropy: 307640.5000
Epoch 25: Cross-entropy: 303933.3125
Epoch 26: Cross-entropy: 302122.6562
Epoch 27: C

**Generating Text**

In [10]:
seq_length = 100
start = np.random.randint(0, len(raw_text)-seq_length)
prompt = raw_text[start:start+seq_length]

In [14]:
import numpy as np
import torch
import torch.nn as nn

best_model, char_to_int = torch.load("single-char.pth")
n_vocab = len(char_to_int)
int_to_char = dict((i, c) for c, i in char_to_int.items())

# reload the model
class CharModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, n_vocab)
    def forward(self, x):
        x, _ = self.lstm(x)
        # take only the last output
        x = x[:, -1, :]
        # produce output
        x = self.linear(self.dropout(x))
        return x

model = CharModel()
model.load_state_dict(best_model)

# randomly generate a prompt
seq_length = 50
raw_text = Data.lower()
start = np.random.randint(0, len(raw_text)-seq_length)
prompt = raw_text[start:start+seq_length]
pattern = [char_to_int[c] for c in prompt]

model.eval()
print('Prompt: "%s"' % prompt)
with torch.no_grad():
    for i in range(1000):
        # format input array of int into PyTorch tensor
        x = np.reshape(pattern, (1, len(pattern), 1)) / float(n_vocab)
        x = torch.tensor(x, dtype=torch.float32)
        # generate logits as output from the model
        prediction = model(x)
        # convert logits into one character
        index = int(prediction.argmax())
        result = int_to_char[index]
        print(result, end="")
        # append the new character into the prompt for the next iteration
        pattern.append(index)
        pattern = pattern[1:]
print()
print("Done.")

Prompt: "—“and perhaps you were never even introduced to a "

erere if lhrtle thitg thie she
toeee tai she was not in the
tooe of the tire, “ho wou make
toenting the darten of the
court woth the cart, and the dound
to the sooe, and thnn allie the
aourd tet an to coes and thing
an she could, and sai an the
cal no tie was ootning to the 
tooe of the sare oate and toe
prmee of the sime and the war.
and sande toel a little bool and all
to she shate oaat the whsle so
the foomt on the tas an the could
to the saade to the sheee toine an
the foumouse so he a det fntt
co ant the saadit sote the soeee
an      aed tie project gutenbered th hes bednd
thet she was soiniigg to the
tooe of the sooe, “in aset_ 
io whe winte tase toin thet
iire aod toned an the coum
teat the darte sait, “ho mo toene
io whu hid toe bare in the
courd toted the carerell an the
poosensn a aievert coun and tonne
an                                                                                                               