# **Implementing Transformer by Replicating *Attention Is All You Need***

### 1. Input Embeddings -> 

In [5]:
%%writefile modules/inputEmbeddings.py
import torch
import torch.nn as nn
import math

class InputEmbeddings(nn.Module):
    
    def __init__(self, d_model:int, vocab_size:int ):
        super().__init__()
        self.d_model = d_model  #Dimensionality -> d_model = 512: You choose to represent each word by a 512-dimensional vector.
        self.vocab_size = vocab_size #Number of Tokens 
        self.embedding = nn.Embedding(vocab_size,d_model)
        
    def forward(self,x):
        return self.embedding(x)*math.sqrt(self.d_model)
        

Overwriting modules/inputEmbeddings.py


### 2. Positional Encoding -> 

\begin{align}
PE(pos, 2i)   &= \sin \left( \frac{pos}{10000^{\frac{2i}{d_{\text{model}}}}} \right) \\
PE(pos, 2i+1) &= \cos \left( \frac{pos}{10000^{\frac{2i}{d_{\text{model}}}}} \right)
\end{align}


In [None]:
import torch
import torch.nn as nn
import math
class PositionalEncoding(nn.Module):
    
    def __init__(self,d_model:int, seq_len:int, dropout:float ):
        super().__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = dropout
        
        #Creating a Matrix of shape (seq_len , d_model)
        pe = torch.zeros(seq_len,d_model)
        
        #Creating a position vector of length seq_len
        position = torch.arange(0,seq_len,dtype=torch.float).unsqueeze(1)
        