<a href="https://colab.research.google.com/github/hobezhang/NLP-based-information-retrieval-system/blob/main/Transformer-Encoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import math
class MultiHeadAttention(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden_size = 512
    self.q_proj = nn.Linear(self.hidden_size, self.hidden_size)
    self.k_proj = nn.Linear(self.hidden_size, self.hidden_size)
    self.v_proj = nn.Linear(self.hidden_size, self.hidden_size)

  def forward(self, inputs):
    q = self.q_proj(inputs)
    k = self.k_proj(inputs)
    v = self.v_proj(inputs)

    return (torch.softmax(q@k.T,dim=-1)/math.sqrt(self.hidden_size))@v

class AddAndNorm(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden_size = 512
    self.layer_norm = nn.LayerNorm(self.hidden_size)

  def forward(self, inputs, sublayer_outputs):
    return self.layer_norm(inputs + sublayer_outputs)

class SubModule1(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden_size = 512
    self.multi_head_attention = MultiHeadAttention()
    self.add_and_norm = AddAndNorm()

  def forward(self, inputs):
    result = self.multi_head_attention(inputs)
    return self.add_and_norm(inputs,result)

class FeedForward(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden_size = 512
    self.linear1 = nn.Linear(self.hidden_size,self.hidden_size)
    self.linear2 = nn.Linear(self.hidden_size,self.hidden_size)
  def forward(self,inputs):
    return self.linear2(torch.relu(self.linear1(inputs)))


class SubModule2(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden_size = 512
    self.feed_forward = FeedForward()
    self.add_and_norm = AddAndNorm()
  def forward(self,inputs):
    result = self.feed_forward(inputs)
    return self.add_and_norm(inputs,result)


In [None]:
class Encoder(nn.Module):
  def __init__(self):
    super().__init__()
    self.submodule1 = SubModule1()
    self.submodule2 = SubModule2()

  def forward(self, inputs):

    result = self.submodule1(inputs)
    return self.submodule2(result)



In [None]:
a = torch.randn(10,512)
encoder = Encoder()
encoder(a)

tensor([[-0.4549,  2.4734,  0.1220,  ...,  0.1129, -0.4797, -0.5451],
        [-1.7322, -0.7907,  0.4058,  ..., -1.0210, -0.5395,  1.2944],
        [ 0.0660,  0.7119, -0.7878,  ..., -0.1319,  0.4728, -0.0316],
        ...,
        [ 0.3984,  1.3432, -1.3832,  ...,  0.4117, -0.9377, -0.1216],
        [-0.0570,  0.4974,  0.6445,  ..., -0.1424, -2.0119,  0.3062],
        [ 0.6127,  0.4205, -0.8019,  ...,  0.2607,  1.3379, -1.5454]],
       grad_fn=<NativeLayerNormBackward0>)