In [1]:
import torch
import torch.nn as nn

class AdaptiveLayerNorm(nn.Module):
    def __init__(self, num_features, esp=1e-5):
        super(AdaptiveLayerNorm, self).__init__()
        # esp防止分母为0
        self.esp = esp
        # 在自适应层归一化中，γ和β不再是固定的参数，而是动态生成的
        # 比如：可以通过线性层来动态生成γ和β
        self.gamma = nn.Linear(num_features, num_features)
        self.beta = nn.Linear(num_features, num_features)

    def forward(self, x, condition):
        mean = x.mean(dim=-1, keepdim=True)
        var = x.var(dim=-1, keepdim=True)
        normal_x = (x-mean) / torch.sqrt(var + self.esp)
        gamma = self.gamma(condition)
        beta = self.beta(condition)
        return gamma * normal_x + beta

input = torch.randn(2, 10)
condition = torch.randn(2, 10)
adaptive_layer_norm = AdaptiveLayerNorm(10)
output = adaptive_layer_norm(input, condition)   
output

tensor([[-0.6471,  0.1634,  0.9821, -0.2451,  0.3260,  0.2721,  0.3165,  0.3182,
          0.3237, -0.2489],
        [ 1.2208,  1.8165, -1.1213, -1.2845, -1.9062, -2.3097, -2.2504,  0.1096,
          0.1361, -1.6753]], grad_fn=<AddBackward0>)