In [10]:
import numpy as np
import torch
from torch import nn

### 1.sigmoid

$$
\begin{equation}
\begin{split}
\sigma(x)=&\frac{1}{1+\exp(-x)}\\
\sigma'(x)=&\frac{1}{1+\exp(-x)} \cdot\left(\frac{\exp(-x)}{1+\exp(-x)}\right)\\
=&\sigma(x) \cdot(1-\sigma(x))
\end{split}
\end{equation}
$$

In [11]:
x = torch.tensor(1., requires_grad=True)

In [12]:
x

tensor(1., requires_grad=True)

In [13]:
y = x.sigmoid()
y

tensor(0.7311, grad_fn=<SigmoidBackward0>)

In [14]:
1 / (1 + np.exp(-1))

0.7310585786300049

In [15]:
# 求导
y.backward()

In [16]:
x.grad

tensor(0.1966)

In [17]:
x.sigmoid() * (1 - x.sigmoid())

tensor(0.1966, grad_fn=<MulBackward0>)

### 1.1 多元形式

\begin{aligned}
&y: \mathbb{R}^n \to \mathbb{R} \\
&y = \sum_{i} \sigma(x_i) = \sigma(x_1) + \sigma(x_2) + \sigma(x_3) + \cdots \\
&\frac{\partial y}{\partial x_1} = \frac{\partial y}{\partial \sigma(x_1)} \cdot \frac{\partial \sigma(x_1)}{\partial x_1} = 1 \cdot \sigma'(x_1) = \sigma(x_1) \cdot (1 - \sigma(x_1)) \\
&\frac{\partial y}{\partial x_2} = \frac{\partial y}{\partial \sigma(x_2)} \cdot \frac{\partial \sigma(x_2)}{\partial x_2} = 1 \cdot \sigma'(x_2) = \sigma(x_2) \cdot (1 - \sigma(x_2)) \\
\end{aligned}

In [18]:
x = torch.tensor([1., 2., 3.], requires_grad=True)
x

tensor([1., 2., 3.], requires_grad=True)

In [19]:
y = x.sigmoid().sum()
y

tensor(2.5644, grad_fn=<SumBackward0>)

In [20]:
x.sigmoid()

tensor([0.7311, 0.8808, 0.9526], grad_fn=<SigmoidBackward0>)

In [21]:
0.7311 + 0.8808 + 0.9526

2.5645

In [23]:
x.grad

In [24]:
y.backward()

In [25]:
x.grad

tensor([0.1966, 0.1050, 0.0452])

In [26]:
x.sigmoid() * (1 - x.sigmoid())

tensor([0.1966, 0.1050, 0.0452], grad_fn=<MulBackward0>)

### 1.2自定义函数

\begin{aligned}
y &= \sigma^2(x) \\
y' &= 2\sigma(x) \cdot (\sigma'(x)) = 2\sigma(x) \cdot (\sigma(x)(1 - \sigma(x))) \\
\end{aligned}

In [27]:
x = torch.tensor(2., requires_grad=True)

In [28]:
y = x.sigmoid() * x.sigmoid()

In [29]:
y

tensor(0.7758, grad_fn=<MulBackward0>)

In [30]:
x.grad

In [31]:
y.backward()

In [32]:
x.grad

tensor(0.1850)

In [35]:
2 * x.sigmoid() * (x.sigmoid() * (1 - x.sigmoid()))

tensor(0.1850, grad_fn=<MulBackward0>)