In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from d2l import torch as d2l

In [2]:
# Sooner or later you will need layer that does not exist yet in DL framework, and you need to  build a custom layer

## Layers without parameters
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        return x - x.mean()  # Subtracts the mean from its input

In [4]:
layer = CenteredLayer()
layer(torch.tensor([1.0, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [5]:
# Using our custom layer 

net = nn.Sequential(nn.LazyLinear(128), CenteredLayer())

y = net(torch.rand(4, 8))
y.mean()



tensor(-3.7253e-09, grad_fn=<MeanBackward0>)

In [7]:
## Layers with Parameters

class MyLinear(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(num_inputs, num_outputs))
        self.bias = nn.Parameter(torch.randn(num_outputs,))

    def forward(self, x):
        linear = torch.matmul(x, self.weight.data) + self.bias.data
        return F.relu(linear)
    
# instantiate the class and accessing the parameters
linear = MyLinear(5, 3)
linear.weight

Parameter containing:
tensor([[-0.7140, -2.0931, -0.7308],
        [ 0.0928, -0.5587, -0.9895],
        [-1.6573,  0.1785,  0.7853],
        [-0.0580,  0.0449, -0.8161],
        [-0.9614,  0.6807, -0.9665]], requires_grad=True)

In [8]:
# construct models using custom layers

net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))
net(torch.rand(2, 64))

tensor([[0.],
        [0.]])

In [12]:
## Design a layer that takes an input and computes a tensor reduction
# y_k = \sum_{i, j} W_{ijk} x_i x_j

class TensorReduction(nn.Module):
    def __init__(self, num_in, num_out):
        super().__init__()
        self.weights = nn.Parameter(torch.rand(num_out, num_in, num_in))
        
    def forward(self, x):
        assert x.dim() == 2, "Input tensor should have shape(batch_size, input_size)"
        x_expanded = x.unsqueeze(1).unsqueeze(2) # expanding dimensions for broadcasting
        y = torch.sum(self.weights * x_expanded * x_expanded.permute(0, 2, 1), dim=[2, 2]) # Tensor reduction
        return y

in_size = 3
out_size = 2
batch_size = 8
net = TensorReduction(in_size, out_size)

x = torch.rand((batch_size, in_size))
print("input tensor:", x)

y = layer(x)  # Forward Pass
print("Output tensor:", y)

input tensor: tensor([[0.7086, 0.0318, 0.3474],
        [0.0707, 0.4000, 0.1186],
        [0.4314, 0.6837, 0.6274],
        [0.9733, 0.4104, 0.5377],
        [0.3204, 0.1288, 0.1624],
        [0.8017, 0.5001, 0.9650],
        [0.1058, 0.0189, 0.9028],
        [0.8297, 0.2473, 0.9562]])
Output tensor: tensor([[ 0.2386, -0.4382, -0.1226],
        [-0.3993, -0.0700, -0.3514],
        [-0.0386,  0.2137,  0.1574],
        [ 0.5033, -0.0596,  0.0677],
        [-0.1496, -0.3412, -0.3076],
        [ 0.3317,  0.0301,  0.4950],
        [-0.3642, -0.4511,  0.4328],
        [ 0.3597, -0.2227,  0.4862]])
