In [1]:
with open("shakespeare.txt", "r", encoding="utf-8") as f:
    text = f.read()

In [2]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(f"Total characters: {vocab_size}")

Total characters: 65


In [3]:
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for i, ch in enumerate(chars)}

In [4]:
text_tokenized = [char2idx[ch] for ch in text]

In [5]:
seq_len = 100

input_seq = []
output_seq = []

In [6]:
for i in range(0, len(text_tokenized) - seq_len):
    input_seq.append(text_tokenized[i:i+seq_len])
    output_seq.append(text_tokenized[i+seq_len])


In [7]:
import torch
X = torch.tensor(input_seq[20000:70000], dtype=torch.long)
y = torch.tensor(output_seq[20000:70000], dtype=torch.long)
print(X.shape, y.shape)

torch.Size([50000, 100]) torch.Size([50000])


In [8]:
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


In [9]:
class CharDataset(Dataset):
    def __init__(self, text, seq_length, char2idx):
        self.text = text
        self.seq_length = seq_length
        self.char2idx = char2idx
        self.data = [char2idx[c] for c in text]

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = torch.tensor(self.data[idx:idx+self.seq_length], dtype=torch.long)
        y = torch.tensor(self.data[idx+self.seq_length], dtype=torch.long)
        return x, y


In [10]:
class ShakespeareLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
    
    def forward(self, x, hidden=None):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

In [11]:
embedding_dim = 128
hidden_size = 512
num_layers = 2
batch_size = 512
seq_len = 150
num_epochs = 30
lr = 0.001

In [12]:
dataset = CharDataset(text, seq_len, char2idx)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ShakespeareLSTM(vocab_size, embedding_dim, hidden_size, num_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [14]:
for epoch in range(num_epochs):

    model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:

        X_batch, y_batch = X_batch.to(device), y_batch.view(-1).to(device)
        
        optimizer.zero_grad()

        outputs, _ = model(X_batch)

        loss = criterion(outputs, y_batch)

        loss.backward()

        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 1.6175
Epoch 2, Loss: 1.3190
Epoch 3, Loss: 1.2428
Epoch 4, Loss: 1.1923
Epoch 5, Loss: 1.1516
Epoch 6, Loss: 1.1152
Epoch 7, Loss: 1.0838
Epoch 8, Loss: 1.0541
Epoch 9, Loss: 1.0255
Epoch 10, Loss: 1.0027
Epoch 11, Loss: 0.9778
Epoch 12, Loss: 0.9572
Epoch 13, Loss: 0.9397
Epoch 14, Loss: 0.9217
Epoch 15, Loss: 0.9085
Epoch 16, Loss: 0.8984
Epoch 17, Loss: 0.8892
Epoch 18, Loss: 0.8771
Epoch 19, Loss: 0.8764
Epoch 20, Loss: 0.8655
Epoch 21, Loss: 0.8647
Epoch 22, Loss: 0.8598
Epoch 23, Loss: 0.8573
Epoch 24, Loss: 0.8607
Epoch 25, Loss: 0.8666
Epoch 26, Loss: 0.8614
Epoch 27, Loss: 0.8614
Epoch 28, Loss: 0.8655
Epoch 29, Loss: 0.8714
Epoch 30, Loss: 0.8768


In [17]:
import torch
import torch.nn.functional as F

def generate_text(model, start_text, char2idx, idx2char, length=500, temperature=1.0, device="cpu"):
    model.eval()
    generated = [c for c in start_text]  # store generated chars

    # Convert start_text to tensor
    input_seq = torch.tensor([char2idx[c] for c in start_text], dtype=torch.long).unsqueeze(0).to(device)

    hidden = None
    with torch.no_grad():
        for _ in range(length):
            output, hidden = model(input_seq, hidden)  # output: [1, vocab_size]
            
            # Take last step logits directly
            output = output.squeeze(0)  # shape: [vocab_size]

            # Apply temperature
            probs = F.softmax(output / temperature, dim=-1)

            # Sample from probability distribution
            idx = torch.multinomial(probs, 1).item()
            char = idx2char[idx]

            generated.append(char)

            # Feed the predicted char as next input
            input_seq = torch.tensor([[idx]], dtype=torch.long).to(device)

    return "".join(generated)


In [18]:
start_text = "ROMEO:"
generated_text = generate_text(model, start_text, char2idx, idx2char, length=500, temperature=0.8, device=device)
print(generated_text)

ROMEO:
My noble lords,
Of whether I have my tongue presently;
And so before the corse of true and boldness
To Bolingbroke, to take up your bodies sound
With thee and grace I hate him from my soldiers.
My absence, here and hear my soul and weep!
Good world, the king's, and thou art not.

BENVOLIO:
Mistress, we owe to say the fault's to mother:
And so it is more sleep or tears are thus,
That in the time shall be thy wife to fly.

WARWICK:
Thou hast not wed to do some ill him.

QUEEN ELIZABETH:
Thou dids


In [21]:
start_text = "WARWICK:"
generated_text = generate_text(model, start_text, char2idx, idx2char, length=700, temperature=1.5, device=device)
print(generated_text)

WARWICK:
Dyed sleep unsworn, poor-lanched cle.
Which negt islead o'cur destroying fire;
There's good till Minoly.
Now, my Saint George, ebect us thyself,
Upon request in much jeholding heart; and, Edward,
Will not Italy to make mine ear to flie?
Spick loss that full of your deeds are been
Beauted.

BAPTISTA:
Ay, father.
This art, my dearh, and if thy foot at home?
Yet, baggag-ear, you make a forehalf love:
Give no wood-heart of retreaty, bohe not lights.
Riarch, sluile, to the tackle; without his Jen;'
But, we not so, is upridging told
I' hang upon reverent and mild interch
Red-ass, Norder Plottenly, unhappy indeed;
Pots thy Edward's coast commandeds strong bale tale.
At thy ill-skitful and be soy w
