# Problem 2: No Positional Awareness

Demonstrates why transformers need positional encodings.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gaurav-redhat/transformer_problems/blob/main/02_positional_awareness/demo.ipynb)


In [None]:
!pip install torch matplotlib numpy -q
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np

# Self-attention without positional encoding
def attention(x, W_q, W_k, W_v):
    Q, K, V = x @ W_q, x @ W_k, x @ W_v
    scores = (Q @ K.T) / np.sqrt(K.shape[-1])
    return F.softmax(scores, dim=-1) @ V

# "Dog bites man" vs "man bites Dog"
torch.manual_seed(42)
dog, bites, man = torch.randn(8), torch.randn(8), torch.randn(8)
W_q = W_k = W_v = torch.randn(8, 8) * 0.1

x1 = torch.stack([dog, bites, man])  # Dog bites man
x2 = torch.stack([man, bites, dog])  # man bites Dog

out1, out2 = attention(x1, W_q, W_k, W_v), attention(x2, W_q, W_k, W_v)

print("Without positional encoding:")
print(f"  'Dog bites man' output[0] ≈ 'man bites Dog' output[2]: {torch.allclose(out1[0], out2[2], atol=1e-4)}")
print("\n⚠️ The model CAN'T distinguish word order!")


In [None]:
# Solution: Sinusoidal Positional Encoding
def sinusoidal_pe(max_len, d_model):
    pe = torch.zeros(max_len, d_model)
    position = torch.arange(max_len).unsqueeze(1).float()
    div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(np.log(10000.0) / d_model))
    pe[:, 0::2] = torch.sin(position * div_term)
    pe[:, 1::2] = torch.cos(position * div_term)
    return pe

pe = sinusoidal_pe(50, 64)
plt.figure(figsize=(10, 4))
plt.imshow(pe.T, cmap='RdBu', aspect='auto')
plt.xlabel('Position'); plt.ylabel('Dimension')
plt.title('Sinusoidal Positional Encoding - Each position has unique pattern!')
plt.colorbar()
plt.show()

print("✓ Now each position has a unique encoding that tells the model about order!")
