### Import & Load Dataset

In [45]:
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt -O tiny_shakespeare.txt

with open("tiny_shakespeare.txt", "r", encoding="utf-8") as f:
    text = f.read()

print(text[:500])



--2025-11-08 16:17:42--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘tiny_shakespeare.txt’


2025-11-08 16:17:42 (19.8 MB/s) - ‘tiny_shakespeare.txt’ saved [1115394/1115394]

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citize

### Build the vocabulary

In [46]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for ch,i in stoi.items()}

def encode(s):
  return [stoi[c] for c in s]

def decode(l):
  return "".join([itos[i] for i in l])

### Create Training Batches

In [47]:
import torch

data = torch.tensor(encode(text), dtype=torch.long)
block_size = 64
batch_size = 32

def get_batch():
  idx = torch.randint(len(data) - block_size, (batch_size,))
  x = torch.stack([data[i:i+block_size] for i in idx])
  y = torch.stack([data[i+1:i+block_size+1] for i in idx])
  return x, y

### Defining RNN Model

In [48]:
import torch.nn as nn

class RNNLanguageModel(nn.Module):
  def __init__(self, vocab_size, hidden_size=128):
    super().__init__()
    self.embed = nn.Embedding(vocab_size, hidden_size)
    self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
    self.fc = nn.Linear(hidden_size, vocab_size)

  def forward(self, x, h=None):
    x = self.embed(x)
    out, h = self.rnn(x, h)
    logits = self.fc(out)
    return logits, h

### Training the RNN

In [49]:
model = RNNLanguageModel(vocab_size)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for step in range(2000):
  x, y = get_batch()

  logits, _ = model(x)
  loss = criterion(logits.view(-1, vocab_size), y.view(-1))

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if step % 200 == 0:
    print(f"Step {step}, loss {loss.item():.4f}")

Step 0, loss 4.2498
Step 200, loss 2.1256
Step 400, loss 1.9521
Step 600, loss 1.8858
Step 800, loss 1.8207
Step 1000, loss 1.8082
Step 1200, loss 1.7318
Step 1400, loss 1.7691
Step 1600, loss 1.6490
Step 1800, loss 1.7549


### Generate Text

In [53]:
model.eval()
context = torch.tensor([encode("KING: ")], dtype=torch.long) # Corrected typo and removed .cuda()
h = None

for _ in range(300):
  logits, h = model(context, h)
  probs = torch.softmax(logits[:, -1, :], dim=-1) # Corrected softmax calculation
  idx = torch.multinomial(probs, num_samples=1)
  context = torch.cat([context, idx], dim=1) # Corrected typo

print(decode(context[0].tolist()))

KING: let? so how and father? ba hend Birtom's will enco for the juys lasting in fire one fair nod thy hearn my fear dames letters thy orle.
The simes doth'd adgitert. The cheek so pleath?

TRANION:
My thy France of enjuch good
Ang fans!

ANGELO:
Poly sweet cyodalo in peven a have underaclest with gralken
