In [26]:
import torch 
import math
from torch import nn,Tensor
from torch.nn import MultiheadAttention,LayerNorm,Embedding
import numpy as np


In [7]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

In [24]:
##testing Positional Encoding 
batch_size=2
seq_len=10
embedding_dim=256
num_vocabs=1000


x = torch.randint(high=10, size=(batch_size, seq_len), dtype=torch.long)
x.shape


torch.Size([2, 10])

In [25]:
embedding_layer=Embedding(num_embeddings=num_vocabs,embedding_dim=embedding_dim)
embedded_test_tensor=embedding_layer(x)
embedded_test_tensor.shape


torch.Size([2, 10, 256])

In [13]:
PositionalEncoding(d_model=embedding_dim)(embedded_test_tensor).shape

torch.Size([2, 10, 256])

In [14]:
LayerNorm(embedded_test_tensor)

TypeError: empty() received an invalid combination of arguments - got (tuple, dtype=NoneType, device=NoneType), but expected one of:
 * (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


In [27]:
def gen_pe(max_length, d_model, n):

  # generate an empty matrix for the positional encodings (pe)
  pe = np.zeros(max_length*d_model).reshape(max_length, d_model) 

  # for each position
  for k in np.arange(max_length):

    # for each dimension
    for i in np.arange(d_model//2):

      # calculate the internal value for sin and cos
      theta = k / (n ** ((2*i)/d_model))       

      # even dims: sin   
      pe[k, 2*i] = math.sin(theta) 

      # odd dims: cos               
      pe[k, 2*i+1] = math.cos(theta)

  return pe

In [None]:
pe=gen_pe(max_length=num_vocabs,)