Run the code and compute the prediction accuracy. Replace the LSTM layer with a regular
RNN layer and compare the prediction accuracy of the two designs. Replace the LSTM layer with two regular RNN layers, and compute the prediction accuracy. 

Final design: an LSTM class, a single layer RNN class, and a double layer RNN class. Train and compare accuracies on all three. Note that I trained on only the first 20,000 chars and also only trained three epochs per test in order to save time on training (full implementation would take significantly longer - just increase text size and also epoch number, and run on a gpu instead of my CPU.)

In [9]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

#define dataset class
class TextDataset(Dataset):
    def __init__(self, text, seq_length, vocab):
        self.seq_length = seq_length
        self.char2idx = {ch: i for i, ch in enumerate(vocab)}
        self.data = [self.char2idx[ch] for ch in text]

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data[idx:idx + self.seq_length]),
            torch.tensor(self.data[idx + 1:idx + self.seq_length + 1])
        )

# original lstm model
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.LSTM(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn(x)
        return self.fc(out)

# single layer rnn
class SingleRNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn(x)
        return self.fc(out)

# double layer rnn
class StackedRNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn1 = nn.RNN(embed_dim, hidden_size, batch_first=True)
        self.rnn2 = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn1(x)
        out, _ = self.rnn2(out)
        return self.fc(out)

# function for calculating accuracy
def compute_accuracy(model, dataloader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            preds = output.argmax(dim=2)
            correct += (preds == y).sum().item()
            total += y.numel()
    return correct / total

# prepare dataset
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

text = text[:20000]
vocab = sorted(set(text))
vocab_size = len(vocab)
seq_length = 25
dataset = TextDataset(text, seq_length, vocab)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

embed_dim = 64
hidden_size = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# train-and-evaluate function
def train_and_eval(ModelClass, name):
    model = ModelClass(vocab_size, embed_dim, hidden_size).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
    criterion = nn.CrossEntropyLoss()

    print(f"Training {name}...")
    for epoch in range(3):
        model.train()
        total_loss = 0
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out.view(-1, vocab_size), y.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"{name} - Epoch {epoch+1} complete - Loss: {total_loss:.4f}")

    acc = compute_accuracy(model, dataloader, device)
    print(f"{name} Accuracy: {acc:.4f}\n")

# train ALL models, print accuracies
train_and_eval(LSTMModel, "LSTM")
train_and_eval(SingleRNNModel, "Single-layer RNN")
train_and_eval(StackedRNNModel, "Stacked RNN (2 layers)")


Training LSTM...
LSTM - Epoch 1 complete - Loss: 2224.3572
LSTM - Epoch 2 complete - Loss: 1463.7744
LSTM - Epoch 3 complete - Loss: 1151.1786
LSTM Accuracy: 0.7527

Training Single-layer RNN...
Single-layer RNN - Epoch 1 complete - Loss: 2161.0434
Single-layer RNN - Epoch 2 complete - Loss: 1593.2497
Single-layer RNN - Epoch 3 complete - Loss: 1429.4207
Single-layer RNN Accuracy: 0.6719

Training Stacked RNN (2 layers)...
Stacked RNN (2 layers) - Epoch 1 complete - Loss: 1983.2438
Stacked RNN (2 layers) - Epoch 2 complete - Loss: 1298.2049
Stacked RNN (2 layers) - Epoch 3 complete - Loss: 1117.8939
Stacked RNN (2 layers) Accuracy: 0.7484

