In [None]:
import torch
import numpy as np

seed = int(11)
np.random.seed(seed)
torch.manual_seed(seed)

NUMBER_OF_EXPERIMENTS = 200

class SimpleNet(torch.nn.Module):
    def __init__(self, activation):
        super().__init__()

        self.activation = activation
        self.fc1 = torch.nn.Linear(1, 1, bias=False)  # one neuron without bias
        self.fc1.weight.data.fill_(1.)  # init weight with 1
        self.fc2 = torch.nn.Linear(1, 1, bias=False)
        self.fc2.weight.data.fill_(1.)
        self.fc3 = torch.nn.Linear(1, 1, bias=False)
        self.fc3.weight.data.fill_(1.)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        return x

    def get_fc1_grad_abs_value(self):
        return torch.abs(self.fc1.weight.grad)

def get_fc1_grad_abs_value(net, x):
    output = net.forward(x)
    output.backward()  # no loss function. Pretending that we want to minimize output
                       # In our case output is scalar, so we can calculate backward
    fc1_grad = net.get_fc1_grad_abs_value().item()
    net.zero_grad()
    return fc1_grad

from torch.nn import *

activs = [ELU, Hardtanh, LeakyReLU, LogSigmoid, PReLU, ReLU, ReLU6, RReLU, SELU, CELU, Sigmoid, Softplus, Softshrink, Softsign, Tanh, Tanhshrink, Hardshrink]

for i in activs:
    activation =  i() # Try different activations to get biggest gradient
              # ex.: torch.nn.Tanh()

    net = SimpleNet(activation=activation)

    fc1_grads = []
    for x in torch.randn((NUMBER_OF_EXPERIMENTS, 1)):
        fc1_grads.append(get_fc1_grad_abs_value(net, x))

# Проверка осуществляется автоматически, вызовом функции:
    print(i, np.mean(fc1_grads))
# (раскомментируйте, если решаете задачу локально)

<class 'torch.nn.modules.activation.ELU'> 0.46825177246239036
<class 'torch.nn.modules.activation.Hardtanh'> 0.2953543031180743
<class 'torch.nn.modules.activation.LeakyReLU'> 0.3869488415962411
<class 'torch.nn.modules.activation.LogSigmoid'> 0.2267814244981855
<class 'torch.nn.modules.activation.PReLU'> 0.35629074233864233
<class 'torch.nn.modules.activation.ReLU'> 0.39361816555727275
<class 'torch.nn.modules.activation.ReLU6'> 0.3758565194578841
<class 'torch.nn.modules.activation.RReLU'> 0.3941042095749435
<class 'torch.nn.modules.activation.SELU'> 0.5512814357271418
<class 'torch.nn.modules.activation.CELU'> 0.4496136348252185
<class 'torch.nn.modules.activation.Sigmoid'> 0.00696174914824951
<class 'torch.nn.modules.activation.Softplus'> 0.2403151054587215
<class 'torch.nn.modules.activation.Softshrink'> 0.24748901307582855
<class 'torch.nn.modules.activation.Softsign'> 0.06543945850222371
<class 'torch.nn.modules.activation.Tanh'> 0.16056774482713082
<class 'torch.nn.modules.acti