# Positional Embedding

In [2]:
import torch.nn as nn
import torch
import math

embed_size = 768
n_layer = 12
n_head = 12
vocab_size = >30 k

In [1]:



class PositionalEmbedding(nn.Module):

    def __init__(self, d_model, max_len=512):
        super().__init__()

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float() # d_model = embedding_size , max_len = 512 : length of sequence
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]


In [50]:
# tensor board 
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir='./bert/')
# Compute the positional encodings once in log space.

max_len = 512 #  length of the sequence
d_model = 512 # embedding length
pe = torch.zeros(max_len, d_model).float()
print(f'pe_shape : {pe.shape}')
pe.require_grad = False

position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)

pe = pe.unsqueeze(0)

count  = 0
for i in range(max_len):
    d = dict()

        
    d["pos0"] = pe[0,0,i]
    writer.add_scalars(main_tag='positional_embeding',
    tag_scalar_dict= d
        ,global_step=i
        ) 

for i in range(max_len):
    d = dict()

        
    d["pos1"] = pe[0,1,i]
    writer.add_scalars(main_tag='positional_embeding',
    tag_scalar_dict= d
        ,global_step=i
        )
for i in range(max_len):
    d = dict()

        
    d["pos2"] = pe[0,2,i]
    writer.add_scalars(main_tag='positional_embeding',
    tag_scalar_dict= d
        ,global_step=i
        )

for i in range(max_len):
    d = dict()

        
    d["pos3"] = pe[0,3,i]
    writer.add_scalars(main_tag='positional_embeding',
    tag_scalar_dict= d
        ,global_step=i
        ) 
# writer.add_embedding(mat=pe[0],tag='embedding')
# pe,pe.shape

pe_shape : torch.Size([512, 512])


position embedding my implementation

![position_embedding](./assets/position_embedding.png)

In [79]:
max_token_length = 20
n_dim= 6
pos_len = max_token_length
dim_len = n_dim

# required shape (1,20,6)

max_len = pos_len #  length of the sequence
d_model = dim_len # embedding length
pe = torch.zeros(max_len, d_model).float()
print(f'pe_shape : {pe.shape}')
pe.require_grad = False

position = torch.arange(start=0,end=max_len,step=1,dtype=torch.float)

# position =  position.unsqueeze(dim=0) # adds dimentions at the [[]]

position =  position.unsqueeze(dim=1) # adds dimentions at the []
# position =  position.unsqueeze(dim=2) # adds dimentions at the [[]]

term = torch.arange(0, d_model, 2).float()

term_1000 = torch.ones()
div_term = torch.exp()
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()


print(term)





pe_shape : torch.Size([20, 6])
tensor([0., 2., 4.])


In [3]:
import numpy as np
class PositionalEncoding(nn.Module):

    def __init__(self, d_hid, n_position=200):
        super(PositionalEncoding, self).__init__()

        # Not a parameter
        self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))

    def _get_sinusoid_encoding_table(self, n_position, d_hid):
        ''' Sinusoid position encoding table '''
        # TODO: make it with torch instead of numpy

        def get_position_angle_vec(position):
            return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]

        sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
        sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
        sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1

        return torch.FloatTensor(sinusoid_table).unsqueeze(0)

    def forward(self, x):
        return x + self.pos_table[:, :x.size(1)].clone().detach()

In [9]:
model = PositionalEncoding(d_hid=768)

input = torch.rand((10,200,768),dtype=torch.float)

input.shape

output = model(input)
output.shape

torch.Size([10, 200, 768])