<a href="https://colab.research.google.com/github/cyborgx0x/ai-resources/blob/main/Dora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn

# Transformer Encoder Layer
class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward, dropout=0.1):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead)
        self.feed_forward = nn.Sequential(
            nn.Linear(d_model, dim_feedforward),
            nn.ReLU(),
            nn.Linear(dim_feedforward, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        src2 = self.self_attn(src, src, src)[0]
        src = src + self.dropout(src2)
        src = self.norm1(src)
        src2 = self.feed_forward(src)
        src = src + self.dropout(src2)
        src = self.norm2(src)
        return src

# Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, num_layers, d_model, nhead, dim_feedforward, dropout=0.1):
        super(TransformerEncoder, self).__init__()
        encoder_layers = [TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout) for _ in range(num_layers)]
        self.encoder = nn.Sequential(*encoder_layers)

    def forward(self, src):
        return self.encoder(src)

# Example usage
d_model = 512
nhead = 8
dim_feedforward = 2048
num_layers = 6
dropout = 0.1

# Create a Transformer Encoder
encoder = TransformerEncoder(num_layers, d_model, nhead, dim_feedforward, dropout)

# Input data (batch_size, sequence_length, d_model)
input_data = torch.randn(64, 20, d_model)

# Forward pass through the Transformer Encoder
output = encoder(input_data)

print("Input shape:", input_data.shape)
print("Output shape:", output.shape)


Input shape: torch.Size([64, 20, 512])
Output shape: torch.Size([64, 20, 512])


In [2]:
output

tensor([[[-0.0139,  0.8545, -0.7068,  ...,  0.6641, -0.3143, -0.2923],
         [ 0.2339, -0.4133, -0.9518,  ...,  0.5721,  1.2003, -1.3061],
         [-1.1030,  1.9347,  2.2691,  ..., -0.0179, -0.8216, -0.7187],
         ...,
         [-0.7892, -1.2245,  1.0740,  ...,  0.1568,  0.7171, -0.2822],
         [-1.7963, -0.5028,  0.3921,  ...,  0.7665, -0.2996, -1.7582],
         [-2.4838, -0.9078,  0.8984,  ..., -1.2333,  0.5882,  0.1788]],

        [[-2.4267,  0.2237, -0.4580,  ...,  0.8302, -1.1847, -0.4490],
         [-0.0203, -1.4386, -0.6680,  ...,  0.1567, -0.7015, -0.1183],
         [-1.3083,  0.8586,  0.2094,  ...,  0.6625, -0.8445, -1.1805],
         ...,
         [ 0.7975, -0.1029, -0.3853,  ...,  0.7752,  1.2238,  0.4952],
         [ 1.3483, -0.5947, -0.5905,  ...,  0.3817,  1.5105,  0.6956],
         [-2.2890,  1.6490,  0.2945,  ..., -0.1354,  1.4039,  0.3567]],

        [[-0.3118, -0.3506, -1.3435,  ..., -1.2020, -0.1497,  0.0778],
         [-1.9191, -0.1921, -1.1249,  ...,  0