In [17]:
import torch
import numpy as np

seed = int(input())  # Не забудьте ввести число при запуске!
np.random.seed(seed)
torch.manual_seed(seed)

NUMBER_OF_EXPERIMENTS = 200

class SimpleNet(torch.nn.Module):
    def __init__(self, activation):
        super().__init__()

        self.activation = activation
        self.fc1 = torch.nn.Linear(1, 1, bias=False)
        self.fc1.weight.data.fill_(1.)
        self.fc2 = torch.nn.Linear(1, 1, bias=False)
        self.fc2.weight.data.fill_(1.)
        self.fc3 = torch.nn.Linear(1, 1, bias=False)
        self.fc3.weight.data.fill_(1.)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        return x

    def get_fc1_grad_abs_value(self):
        return torch.abs(self.fc1.weight.grad)

def get_fc1_grad_abs_value(net, x):
    output = net.forward(x)
    output.backward()
    fc1_grad = net.get_fc1_grad_abs_value().item()
    net.zero_grad()
    return fc1_grad

activations = {'ELU': torch.nn.ELU(), 'Hardtanh': torch.nn.Hardtanh(),
               'LeakyReLU': torch.nn.LeakyReLU(), 'LogSigmoid': torch.nn.LogSigmoid(),
               'PReLU': torch.nn.PReLU(), 'ReLU': torch.nn.ReLU(), 'ReLU6': torch.nn.ReLU6(),
               'RReLU': torch.nn.RReLU(), 'SELU': torch.nn.SELU(), 'CELU': torch.nn.CELU(),
               'Sigmoid': torch.nn.Sigmoid(), 'Softplus': torch.nn.Softplus(),
               'Softshrink': torch.nn.Softshrink(), 'Softsign': torch.nn.Softsign(),
               'Tanh': torch.nn.Tanh(), 'Tanhshrink': torch.nn.Tanhshrink(),
               'Hardshrink': torch.nn.Hardshrink()}

for name, activation in activations.items():  # Итерируемся по парам (имя, объект активации)
    net = SimpleNet(activation=activation)  # Создаём новую сеть для каждой активации

    fc1_grads = []
    for x in torch.randn((NUMBER_OF_EXPERIMENTS, 1)):
        fc1_grads.append(get_fc1_grad_abs_value(net, x))

    print(f"Activation: {name}, Mean abs grad: {np.mean(fc1_grads)}") # Выводим для каждой активации

Activation: ELU, Mean abs grad: 0.4905975059140474
Activation: Hardtanh, Mean abs grad: 0.3392139330675127
Activation: LeakyReLU, Mean abs grad: 0.33811296799097496
Activation: LogSigmoid, Mean abs grad: 0.23797560934035572
Activation: PReLU, Mean abs grad: 0.3511333481586917
Activation: ReLU, Mean abs grad: 0.3519847784793819
Activation: ReLU6, Mean abs grad: 0.42508748886015385
Activation: RReLU, Mean abs grad: 0.387764573182867
Activation: SELU, Mean abs grad: 0.5944356032274664
Activation: CELU, Mean abs grad: 0.48986828326713294
Activation: Sigmoid, Mean abs grad: 0.0071115260634798
Activation: Softplus, Mean abs grad: 0.33270451071934076
Activation: Softshrink, Mean abs grad: 0.24824757158756255
Activation: Softsign, Mean abs grad: 0.06396905900910496
Activation: Tanh, Mean abs grad: 0.17454524965025484
Activation: Tanhshrink, Mean abs grad: 0.027310636688192523
Activation: Hardshrink, Mean abs grad: 0.6592802187800407


In [5]:
import torch
import numpy as np

seed = int(input())
np.random.seed(seed)
torch.manual_seed(seed)

NUMBER_OF_EXPERIMENTS = 200

class SimpleNet(torch.nn.Module):
    def __init__(self, activation):
        super().__init__()

        self.activation = activation
        self.fc1 = torch.nn.Linear(1, 1, bias=False)  # one neuron without bias
        self.fc1.weight.data.fill_(1.)  # init weight with 1
        self.fc2 = torch.nn.Linear(1, 1, bias=False)
        self.fc2.weight.data.fill_(1.)
        self.fc3 = torch.nn.Linear(1, 1, bias=False)
        self.fc3.weight.data.fill_(1.)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        return x

    def get_fc1_grad(self):
        return self.fc1.weight.grad

activation =  torch.nn.Tanh()

net = SimpleNet(activation=activation)

fc1_grads_sum = 0.0  # Сумма градиентов
for x in torch.randn((NUMBER_OF_EXPERIMENTS, 1)):
    output = net(x)
    output.backward()
    fc1_grads_sum += net.get_fc1_grad().item()  # Добавляем ГРАДИЕНТ (не абсолютное значение)
    net.zero_grad()

print(fc1_grads_sum / NUMBER_OF_EXPERIMENTS) #среднее значение градиентов
# print(abs(fc1_grads_sum / NUMBER_OF_EXPERIMENTS)) # модуль среднего

-0.010186273460276426
