# Positional Encoding

This notebook experiments with different Positional Encoding techniques.

# Setup Notebook

## Imports

In [1]:
# Import Standard Libraries
import torch
import torch.nn as nn
import numpy as np

# Sinusoidal

In [2]:
class PositionalEncoder(nn.Module):
    """
    Define a Positional Encoder through sinusoidal functions.

    PE(pos, 2i) = sin(pos/(10000^(2i/d_model)))
    PE(pos, 2i + 1) = cost(pos/(10000^(2i/d_model)))

    pos: position in the sequence
    i: is the dimension index (2i = half of the model dimension d_model)
    d_model: is the model dimension (the embedding dimension)

    NOTE: 2i and 2i + 1 is for separate sine and cosine values into even and odd indices.
    """
    def __init__(self, embeddings_size, dropout_probability=0.1, max_len_sequence=5000):

        # Initialise the super class
        super(PositionalEncoder, self).__init__()

        # Set the dropout layer
        self.dropout = nn.Dropout(p=dropout_probability)

        # Initialise positional encoding matrix of dimension (max_length_sequence x embeddings_size)
        positional_encoding_matrix = torch.zeros(max_len_sequence, embeddings_size)

        # Create the position from 1 to the max length of the input sequence (reshape x -> (x, 1))
        position = torch.arange(0, max_len_sequence, dtype=torch.float).unsqueeze(1)

        # Create the dividend term as 10000^(2i/d)
        dividend_term = torch.exp(torch.arange(0, embeddings_size, 2).float() * (-np.log(10000.0) / embeddings_size))

        # Compute positional encoding for even and odd columns in the Positional Encoding Matrix
        positional_encoding_matrix[:, 0::2] = torch.sin(position * dividend_term)
        positional_encoding_matrix[:, 1::2] = torch.cos(position * dividend_term)

        # Add a dimension for the batch_size through 'unsqueeze(0)' in the first index and then transpose
        # NOTE: (max_length_sequence, embeddings_size) -> (max_length_sequence, batch_size, embeddings_size)
        positional_encoding_matrix = positional_encoding_matrix.unsqueeze(0).transpose(0, 1)

        # Save the PE matrix
        self.register_buffer('positional_encoding_matrix', positional_encoding_matrix)

    def forward(self, sequence):

        # Add the positional encoding to the input sequence (Just sum them up)
        output = sequence + self.positional_encoding_matrix[:sequence.size(0), :]

        # Apply dropout
        return self.dropout(output)


In [3]:
# Example of positional encoding
example_sequence_len = 5
example_embeddings_size = 4

# Initialise variables
example_pe = torch.zeros(example_sequence_len, example_embeddings_size)
example_position = torch.arange(0, example_sequence_len, dtype=torch.float).unsqueeze(1)
example_dividend_term = torch.arange(0, example_embeddings_size, 2).float()

# Compute positional encoding for even and odd columns in the Positional Encoding Matrix
example_pe[:, 0::2] = example_position * example_dividend_term
example_pe[:, 1::2] = (example_position * example_dividend_term) - 1

# Define a sequence
example_sequence = torch.tensor([
    [1, 11, 111, 1111],
    [2, 22, 222, 2222],
    [3, 33, 333, 3333],
    [4, 44, 444, 4444],
    [5, 55, 555, 5555]]
)

print('Sequence Length: ', example_sequence_len)
print('Embeddings Size: ', example_embeddings_size)
print('Position: ', example_position)
print('Dividend term: ', example_dividend_term)
print('Positional Encoding (Pre Transformation): ', example_pe)
print('Positional Encoding (Transformed): ', example_pe.unsqueeze(0).transpose(0, 1))
print('Positional Encoding (Pre Shape)', example_pe.shape)
print('Positional Encoding (After Shape)', example_pe.unsqueeze(0).transpose(0, 1).shape)
print('Sequence: ', example_sequence)
print('Sequence shape: ', example_sequence.shape)

Sequence Length:  5
Embeddings Size:  4
Position:  tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.]])
Dividend term:  tensor([0., 2.])
Positional Encoding (Pre Transformation):  tensor([[ 0., -1.,  0., -1.],
        [ 0., -1.,  2.,  1.],
        [ 0., -1.,  4.,  3.],
        [ 0., -1.,  6.,  5.],
        [ 0., -1.,  8.,  7.]])
Positional Encoding (Transformed):  tensor([[[ 0., -1.,  0., -1.]],

        [[ 0., -1.,  2.,  1.]],

        [[ 0., -1.,  4.,  3.]],

        [[ 0., -1.,  6.,  5.]],

        [[ 0., -1.,  8.,  7.]]])
Positional Encoding (Pre Shape) torch.Size([5, 4])
Positional Encoding (After Shape) torch.Size([5, 1, 4])
Sequence:  tensor([[   1,   11,  111, 1111],
        [   2,   22,  222, 2222],
        [   3,   33,  333, 3333],
        [   4,   44,  444, 4444],
        [   5,   55,  555, 5555]])
Sequence shape:  torch.Size([5, 4])
