## Positional Encoding

<img src="../assets/positional-encoding.webp" width="900" height="500"/>

In [1]:
import torch
import torch.nn as nn

In [2]:
import math


# helper Module that adds positional encoding to the token embedding to introduce a notion of word order.
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size: int, dropout: float = 0.0, maxlen: int = 5000):
        if emb_size % 2 != 0:
            raise Exception("Embedding size must be even")
        super(PositionalEncoding, self).__init__()

        # fancy logarithmic stuff to finally calculate 10000^(-2i/emb_size)
        den = torch.exp(-torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))

        pos_embedding[:, 0::2] = torch.sin(
            pos * den
        )  # from 0th index to last index with a step of 2
        pos_embedding[:, 1::2] = torch.cos(
            pos * den
        )  # from 1st index to last index with a step of 2

        self.dropout = nn.Dropout(dropout)
        self.register_buffer(
            "pos_embedding", pos_embedding
        )  # register the tensor as buffer - not updated during backprop

    def forward(self, token_embedding):
        # no learnable parameters
        return self.dropout(token_embedding + self.pos_embedding)

### Testing Positional Encoding

1. Without batches

In [3]:
pe = PositionalEncoding(emb_size=6, dropout=0, maxlen=10)
x = torch.zeros(10, 6)  # 10 is the number of tokens, 5 is the embedding size
y = pe(x)

y.shape, pe.pos_embedding.shape

(torch.Size([10, 6]), torch.Size([10, 6]))

---

2. With batches

In [4]:
pe = PositionalEncoding(emb_size=6, dropout=0.1, maxlen=10)
x = torch.zeros(
    7, 10, 6
)  # 7 is the batch size, 10 is the number of tokens, 5 is the embedding size
y = pe(x)

y.shape, pe.pos_embedding.shape

(torch.Size([7, 10, 6]), torch.Size([10, 6]))