# 08. RNNs & LSTMs

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gaurav-redhat/pytorch_tutorial/blob/main/08_rnn_lstm/demo.ipynb)

---

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## Character-Level Language Model

In [None]:
# Sample text
text = "hello world, this is a simple example of character level language model."
chars = sorted(set(text))
char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for c, i in char_to_idx.items()}
vocab_size = len(chars)
print(f'Vocab size: {vocab_size}')
print(f'Characters: {chars}')

In [None]:
class CharLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim=32, hidden_dim=64):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
    
    def forward(self, x, hidden=None):
        embed = self.embedding(x)
        output, hidden = self.lstm(embed, hidden)
        logits = self.fc(output)
        return logits, hidden

model = CharLSTM(vocab_size)
print(model)

In [None]:
# Prepare data
seq_len = 20
data = torch.tensor([char_to_idx[c] for c in text])

# Create sequences
X, Y = [], []
for i in range(len(data) - seq_len):
    X.append(data[i:i+seq_len])
    Y.append(data[i+1:i+seq_len+1])
X = torch.stack(X)
Y = torch.stack(Y)
print(f'X shape: {X.shape}, Y shape: {Y.shape}')

In [None]:
# Train
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

for epoch in range(200):
    optimizer.zero_grad()
    logits, _ = model(X)
    loss = criterion(logits.view(-1, vocab_size), Y.view(-1))
    loss.backward()
    optimizer.step()
    if (epoch+1) % 50 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

In [None]:
# Generate text
model.eval()
start = 'hello'
generated = list(start)
hidden = None

x = torch.tensor([[char_to_idx[c] for c in start]])
with torch.no_grad():
    for _ in range(50):
        logits, hidden = model(x, hidden)
        probs = F.softmax(logits[0, -1], dim=-1)
        next_idx = torch.multinomial(probs, 1).item()
        generated.append(idx_to_char[next_idx])
        x = torch.tensor([[next_idx]])

print('Generated:', ''.join(generated))