## CLS/pos Testing

In [4]:
import torch

d_model = 512
seq_len = 500
batch_size = 32
cls = torch.nn.Parameter(torch.rand(1, d_model))
input = torch.zeros(batch_size, seq_len, d_model)

output = torch.column_stack([cls.expand(batch_size, -1, -1), input])

In [5]:
output.shape

torch.Size([32, 501, 512])

In [7]:
output[0, :, 500]

tensor([0.6024, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 

In [12]:
import torch
from torch import nn, Tensor
import math
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 500):
        '''
        Positional encoding module based on: https://pytorch.org/tutorials/beginner/transformer_tutorial.html
        '''
        
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)

        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        batch_size, seq_len, d_model = x.shape
        x = x + self.pe[:x.size(seq_len)]
        return self.dropout(x)

In [17]:
pos_enc = PositionalEncoding(d_model, 0, seq_len+1)

In [23]:
pos_enc.pe[0, :, 0]

tensor([ 0.0000e+00,  8.4147e-01,  9.0930e-01,  1.4112e-01, -7.5680e-01,
        -9.5892e-01, -2.7942e-01,  6.5699e-01,  9.8936e-01,  4.1212e-01,
        -5.4402e-01, -9.9999e-01, -5.3657e-01,  4.2017e-01,  9.9061e-01,
         6.5029e-01, -2.8790e-01, -9.6140e-01, -7.5099e-01,  1.4988e-01,
         9.1295e-01,  8.3666e-01, -8.8513e-03, -8.4622e-01, -9.0558e-01,
        -1.3235e-01,  7.6256e-01,  9.5638e-01,  2.7091e-01, -6.6363e-01,
        -9.8803e-01, -4.0404e-01,  5.5143e-01,  9.9991e-01,  5.2908e-01,
        -4.2818e-01, -9.9178e-01, -6.4354e-01,  2.9637e-01,  9.6380e-01,
         7.4511e-01, -1.5862e-01, -9.1652e-01, -8.3177e-01,  1.7702e-02,
         8.5090e-01,  9.0179e-01,  1.2357e-01, -7.6825e-01, -9.5375e-01,
        -2.6237e-01,  6.7023e-01,  9.8663e-01,  3.9593e-01, -5.5879e-01,
        -9.9976e-01, -5.2155e-01,  4.3616e-01,  9.9287e-01,  6.3674e-01,
        -3.0481e-01, -9.6612e-01, -7.3918e-01,  1.6736e-01,  9.2003e-01,
         8.2683e-01, -2.6551e-02, -8.5552e-01, -8.9

In [25]:
pos_enc.pe[0, :, 1]

tensor([ 1.0000,  0.5403, -0.4161, -0.9900, -0.6536,  0.2837,  0.9602,  0.7539,
        -0.1455, -0.9111, -0.8391,  0.0044,  0.8439,  0.9074,  0.1367, -0.7597,
        -0.9577, -0.2752,  0.6603,  0.9887,  0.4081, -0.5477, -1.0000, -0.5328,
         0.4242,  0.9912,  0.6469, -0.2921, -0.9626, -0.7481,  0.1543,  0.9147,
         0.8342, -0.0133, -0.8486, -0.9037, -0.1280,  0.7654,  0.9551,  0.2666,
        -0.6669, -0.9873, -0.4000,  0.5551,  0.9998,  0.5253, -0.4322, -0.9923,
        -0.6401,  0.3006,  0.9650,  0.7422, -0.1630, -0.9183, -0.8293,  0.0221,
         0.8532,  0.8999,  0.1192, -0.7711, -0.9524, -0.2581,  0.6735,  0.9859,
         0.3919, -0.5625, -0.9996, -0.5178,  0.4401,  0.9934,  0.6333, -0.3090,
        -0.9673, -0.7362,  0.1717,  0.9218,  0.8243, -0.0310, -0.8578, -0.8960,
        -0.1104,  0.7767,  0.9497,  0.2495, -0.6800, -0.9844, -0.3837,  0.5698,
         0.9994,  0.5102, -0.4481, -0.9944, -0.6264,  0.3174,  0.9695,  0.7302,
        -0.1804, -0.9251, -0.8193,  0.03

In [20]:
test = output + pos_enc.pe[:output.size(1)]

In [21]:
test.shape

torch.Size([32, 501, 512])