Skip to content
This repository has been archived by the owner on Apr 22, 2022. It is now read-only.

Commit

Permalink
Adding position-wide feed forward layer.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugarosa committed Jun 29, 2020
1 parent 1fe1bf3 commit 95cf660
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
2 changes: 2 additions & 0 deletions textformer/models/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
"""

from textformer.models.layers.attention import Attention
from textformer.models.layers.multi_head_attention import MultiHeadAttention
from textformer.models.layers.position_wide_forward import PositionWideForward
from textformer.models.layers.residual_attention import ResidualAttention
53 changes: 53 additions & 0 deletions textformer/models/layers/position_wide_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import torch.nn as nn
import torch.nn.functional as F


class PositionWideForward(nn.Module):
"""A PositionWideForward class is used to provide a position-wise feed forward layer for a neural network.
References:
A. Vaswani, et al. Attention is all you need. Advances in neural information processing systems (2017).
"""

def __init__(self, n_hidden, n_forward, dropout):
"""Initialization method.
Args:
n_hidden (int): Number of hidden units.
n_forward (int): Number of forward units.
dropout (float): Dropout probability.
"""

# Overriding its parent class
super(PositionWideForward, self).__init__()

# Defining the linear (feed forward) layers
self.fc1 = nn.Linear(n_hidden, n_forward)
self.fc2 = nn.Linear(n_forward, n_hidden)

# Defining the dropout layer
self.drop = nn.Dropout(dropout)

def forward(self, x):
"""Performs a forward pass over the layer.
Args:
x (torch.Tensor): Tensor containing the input states.
Returns:
The feed forward activations.
"""

# Performs the pass over first linear layer and activates using ReLU
x = F.relu(self.fc1(x))

# Pass down to the dropout layer
x = self.drop(x)

# Pass down over the second linear layer
x = self.fc2(x)

return x

0 comments on commit 95cf660

Please sign in to comment.