In [5]:
import torch
import torch.nn as nn

In [None]:
class ResidualModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(4, 3)
        self.latent_b_layer = nn.Linear(6, 2)  # Concatenate input_layer output and latent_a output
        self.output_layer = nn.Linear(2, 2)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x_input = self.input_layer(x)                           # (batch, 3)
        x_latent_a = self.relu(x_input)                         # (batch, 3)
        x_latent_b = torch.cat((x_input, x_latent_a), dim=1)    # (batch, 6)
        x_latent_b = self.relu(self.latent_b_layer(x_latent_b)) # (batch, 2)
        output = self.sigmoid(self.output_layer(x_latent_b))    # (batch, 2)
        return output

In [None]:
class DenseNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_to_latent_a = nn.Linear(4, 3)
        self.latent_a_to_latent_b = nn.Linear(7, 3)  # Concatenate x (4) + latent_a (3)
        self.latent_b_to_output = nn.Linear(6, 2)   # Concatenate latent_a (3) + latent_b (3)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x_latent_a = self.relu(self.input_to_latent_a(x))                 # (batch,3)
        x_concat_a = torch.cat((x, x_latent_a), dim=1)                    # (batch, 7)
        x_latent_b = self.relu(self.latent_a_to_latent_b(x_concat_a))     # (batch,3)
        x_concat_b = torch.cat((x_latent_a, x_latent_b), dim=1)           # (batch,6)
        output = self.sigmoid(self.latent_b_to_output(x_concat_b))        # (batch,2)
        return output

In [None]:
class AttentionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(4, 3)
        self.latent_b_layer = nn.Linear(6, 2)  # Concatenate input_layer output and attention weighted latent_a
        self.output_layer = nn.Linear(2, 2)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x_input = self.input_layer(x)                       # (batch, 3)
        x_latent_a = self.relu(x_input)                     # (batch, 3)
        attention_weights_a = self.softmax(x_latent_a)      # (batch, 3)
        x_attention_b = attention_weights_a * x_latent_a    # (batch, 3)

        x_latent_b_input = torch.cat((x_input, x_attention_b), dim=1)  # (batch, 6)
        x_latent_b = self.relu(self.latent_b_layer(x_latent_b_input))  # (batch, 2)
        attention_weights_b = self.softmax(x_latent_b)                 # (batch, 2)
        x_attention_output = attention_weights_b * x_latent_b          # (batch, 2)

        output = self.sigmoid(self.output_layer(x_attention_output))   # (batch, 2)
        return output

In [None]:
# Test runs
if __name__ == "__main__":
    for ModelClass, x_input in zip([ResidualModel, DenseNet, AttentionModel],
                                  [[2, 9, 4, 5], [9, 1, 2, 8], [1, 2, 3, 4]]):
        model = ModelClass()
        x_tensor = torch.Tensor([x_input])
        output = model(x_tensor)
        print(output.detach())  # no grad info

tensor([[0.3909, 0.4480]])
tensor([[0.0671, 0.0257]])
tensor([[0.3824, 0.5651]])
