# Importing Packages

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

# load ascii text and covert to lowercase

In [2]:
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

# create mapping of unique chars to integers

In [3]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# summarize the loaded data

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab) 

Total Characters:  13637
Total Vocab:  41


# prepare the dataset of input to output pairs encoded as integers

In [5]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  13537


# reshape X to be [samples, time steps, features]

In [6]:
X = torch.tensor(dataX, dtype=torch.float32).reshape(n_patterns, seq_length, 1)
X = X / float(n_vocab)
y = torch.tensor(dataY)

In [7]:
class CharModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=2, batch_first=True, dropout=0.2)
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, n_vocab)
    def forward(self, x):
        x, _ = self.lstm(x)
        # take only the last output
        x = x[:, -1, :]
        # produce output
        x = self.linear(self.dropout(x))
        return x

In [8]:
n_epochs = 40
batch_size = 128
model = CharModel()

In [9]:
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss(reduction="sum")
loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)

In [10]:
best_model = None
best_loss = np.inf
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    model.eval()
    loss = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)
            loss += loss_fn(y_pred, y_batch)
        if loss < best_loss:
            best_loss = loss
            best_model = model.state_dict()
        print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))

Epoch 0: Cross-entropy: 39880.2852
Epoch 1: Cross-entropy: 39856.3789
Epoch 2: Cross-entropy: 39745.0703
Epoch 3: Cross-entropy: 38755.0742
Epoch 4: Cross-entropy: 36768.9492
Epoch 5: Cross-entropy: 35420.2539
Epoch 6: Cross-entropy: 34578.8750
Epoch 7: Cross-entropy: 33950.4766
Epoch 8: Cross-entropy: 33060.2422
Epoch 9: Cross-entropy: 32418.5059
Epoch 10: Cross-entropy: 31766.1328
Epoch 11: Cross-entropy: 31211.8066
Epoch 12: Cross-entropy: 30502.1895
Epoch 13: Cross-entropy: 29871.3340
Epoch 14: Cross-entropy: 29196.6133
Epoch 15: Cross-entropy: 28696.2520
Epoch 16: Cross-entropy: 27747.6758
Epoch 17: Cross-entropy: 27323.6309
Epoch 18: Cross-entropy: 26494.3848
Epoch 19: Cross-entropy: 25882.6211
Epoch 20: Cross-entropy: 25064.1777
Epoch 21: Cross-entropy: 24283.6074
Epoch 22: Cross-entropy: 23618.2051
Epoch 23: Cross-entropy: 22911.4004
Epoch 24: Cross-entropy: 22503.9043
Epoch 25: Cross-entropy: 21355.6230
Epoch 26: Cross-entropy: 20680.0879
Epoch 27: Cross-entropy: 20011.5215
Ep

The cross entropy almost always decreasing in each epoch. This means probably the model is not fully converged and we can train it for more epochs.

In [11]:
torch.save([best_model, char_to_int], "single-char_v1.1.pth")