In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random

# Sample text dataset
text = "hello world this is a simple text generation example using LSTM."
chars = sorted(set(text))  # Unique characters
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for ch, i in char_to_idx.items()}
vocab_size = len(chars)

# Convert text to sequences
def text_to_sequence(text, seq_length):
    inputs, targets = [], []
    for i in range(len(text) - seq_length):
        inputs.append([char_to_idx[ch] for ch in text[i:i+seq_length]])
        targets.append(char_to_idx[text[i+seq_length]])
    return torch.tensor(inputs), torch.tensor(targets)

seq_length = 10
X, Y = text_to_sequence(text, seq_length)

# Define RNN model
class CharRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_layers):
        super(CharRNN, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)  # Convert to embeddings
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
        self.num_layers = num_layers
        self.hidden_size = hidden_size

    def forward(self, x, hidden):
        x = self.embed(x)  # Convert to embeddings
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])  # Predict last character in sequence
        return out, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))

# Hyperparameters
embedding_dim = 16
hidden_size = 128
num_layers = 2
learning_rate = 0.005
num_epochs = 500
batch_size = X.shape[0]
teacher_forcing_ratio = 0.5  # Teacher forcing probability

# Model, loss function, optimizer
model = CharRNN(vocab_size, embedding_dim, hidden_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop with Teacher Forcing
for epoch in range(num_epochs):
    hidden = model.init_hidden(batch_size)
    optimizer.zero_grad()

    use_teacher_forcing = random.random() < teacher_forcing_ratio

    if use_teacher_forcing:
        # Use real target as input (Teacher Forcing)
        outputs, _ = model(X, hidden)
    else:
        # Free Running Mode (use predicted output as next input)
        outputs = []
        input_seq = X[:, 0].unsqueeze(1)  # Start with first char in each batch
        for _ in range(seq_length):
            output, hidden = model(input_seq, hidden)
            outputs.append(output)
            input_seq = torch.argmax(output, dim=1).unsqueeze(1)  # Next input = predicted char
        outputs = torch.stack(outputs, dim=1)[:, -1, :]  # Take last prediction

    loss = criterion(outputs, Y)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 50 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Text generation function with sampling
def generate_text(start_text, length=100, temperature=1.0, greedy=False):
    model.eval()
    input_seq = torch.tensor([[char_to_idx[ch] for ch in start_text]])
    hidden = model.init_hidden(1)

    result = start_text
    for _ in range(length):
        with torch.no_grad():
            output, hidden = model(input_seq, hidden)
            if greedy:
                predicted_idx = torch.argmax(output).item()  # Greedy search
            else:
                probabilities = torch.softmax(output / temperature, dim=1)
                predicted_idx = torch.multinomial(probabilities, 1).item()  # Sampling

            result += idx_to_char[predicted_idx]
            input_seq = torch.tensor([[predicted_idx]])

    return result

# Generate text with greedy search and temperature sampling
print("Greedy Search Output:")
print(generate_text("hello", 100, greedy=True))

print("\nTemperature Sampling (T=0.8):")
print(generate_text("hello", 100, temperature=0.8, greedy=False))


Epoch [50/500], Loss: 1.4572
Epoch [100/500], Loss: 0.2569
Epoch [150/500], Loss: 1.5625
Epoch [200/500], Loss: 0.0174
Epoch [250/500], Loss: 1.2188
Epoch [300/500], Loss: 0.0040
Epoch [350/500], Loss: 1.2114
Epoch [400/500], Loss: 0.0074
Epoch [450/500], Loss: 1.4155
Epoch [500/500], Loss: 0.0057
Greedy Search Output:
hellodddiiiiiiddiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii

Temperature Sampling (T=0.8):
hellodddidiiiiiiidiiiiiiiiiiiiiiiiiiii iii iini  iiiiiii iiiiininniiiiiiniiiiniinniiiiiiiiiiii iiii iiiii
