In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import math

In [3]:
random_torch = torch.rand(4,4)
random_torch

tensor([[0.4947, 0.4680, 0.1846, 0.4052],
        [0.7304, 0.0817, 0.8249, 0.0355],
        [0.9310, 0.1803, 0.1162, 0.6853],
        [0.0309, 0.6938, 0.6365, 0.2480]])

In [6]:
class TokenEmbedding(nn.Embedding):
    def __init__(self, vocab_size, d_model):
        super(TokenEmbedding, self).__init__(vocab_size, d_model, padding_idx=1)

In [5]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len, device):
        super(PositionalEmbedding, self).__init__()
        self.encoding = torch.zeros(max_len, d_model, device=device)
        self.encoding.requires_grad = False
        pos=torch.arange(0, max_len, device=device)
        pos = pos.float().unsqueeze(dim = 1)
        _2i = torch.arange(0, d_model, step = 2, device = device).float()
        self.encoding[:,0::2] = torch.sin(pos/(10000**(_2i/d_model)))
        self.decoding[:,1::2] = torch.cos(pos/(10000**(_2i/d_model)))
        
    def forward(self,x):
        batch_size, seq_len = x.size()
        return self.encoding[:seq_len,:]
        

In [None]:
class TransformerEmbedding(nn.Module):
    def __init__(self, vocab_size, d_model, max_len, drop_prob, device):
        super(TransformerEmbedding,self).__init__()
        self.tok_emb = TokenEmbedding(vocab_size, d_model)
        self.pos_emb = PositionalEmbedding(d_model, max_len, device)
        self.drop_out = nn.Dropout(p=drop_prob)
        
    def forward(self,x):
        tok_emb = self.tok_emb
        pos_emb = self.pos_emb
        return self.drop_out(tok_emb+pos_emb)
    