In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

In [4]:
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [5]:
chars = sorted(list(set(raw_text))) # Unique characters in the dataset
char_to_int = dict((c, i) for i, c in enumerate(chars)) # A dictionary to map unique characters to integers. When we call enumerate function, we will get something like this [(0, 'A'), (1, 'B'), (2, 'C'), (3, 'D')]
int_to_char = dict((i, c) for c, i in char_to_int.items()) # A dictionary to transform integers back to characters. This is just reversing of char_to_int.

In [6]:
n_chars = len(raw_text) # 143146 characters
n_vocab = len(chars) # 46. Note that we are calculating the list `chars`, not the number `n_chars`
dataX = []
dataY = []
for i in range(0, n_chars - 100, 1):
    seq_in = raw_text[i:i + 100] # This is like a sliding window, getting 100 characters at a time
    seq_out = raw_text[i + 100]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

In [7]:
n_sequence = len(dataX)
X = torch.tensor(dataX, dtype=torch.float32).reshape(n_sequence, 100, 1)
X = X / float(n_vocab) # Normalize the input between 0 to 1
y = torch.tensor(dataY)

In [8]:
class LanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True) 
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, n_vocab)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = x[:, -1, :] # X is the output of LSTM, it will have this shape [batch_size, seq_len, hidden_size]. We only need to get the last value of the output sequence so we use -1.
        x = self.dropout(x)
        x = self.linear(x) # Predicting one of the 46 characters in the vocab
        return x

In [9]:
model = LanguageModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=128)
model.train()
for epoch in range(5): 
    print("Running Epoch %d ..." % epoch)
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
print("Finished training")

Running Epoch 0 ...
Running Epoch 1 ...
Running Epoch 2 ...
Running Epoch 3 ...
Running Epoch 4 ...
Finished training


In [17]:
def predict(prompt):
    sequence = [char_to_int[c] for c in prompt]
    with torch.no_grad():
        for i in range(1000):
            x = np.reshape(sequence, (1, len(sequence), 1)) / float(n_vocab) # Reshape and normalize
            x = torch.tensor(x, dtype=torch.float32)
            prediction = model(x)
            index = int(prediction.argmax()) # Predict an array of 46 integers
            sequence.append(index) # Append the predicted integer into the current sequence
            sequence = sequence[1:] # Remove the first integer from the sequence 
    return ''.join([int_to_char[i] for i in sequence]) # Convert all the integers into characters

In [28]:
import gradio as gr
model.eval() # Change to evaluation mode because we don't want Dropout Layer to automatically drop Neural Network nodes when we are making prediction
def text_generation(prompt):
    return predict(prompt)
gr.Interface(fn=text_generation, inputs=["text"], outputs=["text"]).launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


