In [7]:
import torch
import numpy as np
from torch import nn

$$\sigma(x)=\frac{1}{1+exp(-x)}\\
\sigma'(x) = \frac{1}{1+exp(-x)}\cdot(\frac{exp(-x)}{1+exp(-x)})\\
=\sigma(x)\cdot(1-\sigma(x))
$$

# sigmoid

$$
\begin{equation}
\begin{split}
\sigma(x)=&\frac{1}{1+\exp(-x)}\\
\sigma'(x)=&\frac{1}{1+\exp(-x)}\left(\frac{\exp(-x)}{1+\exp(-x)}\right)\\
=&\sigma(x)(1-\sigma(x))
\end{split}
\end{equation}
$$

其中 \sigma \exp表示符号 \cdot表示点乘

而 \\表示分行

=& 表示等号的对齐

\left( 配合 \right)表示左右括号


In [2]:
x = torch.tensor(1.,requires_grad=True)

In [3]:
y = x.sigmoid()

In [4]:
y

tensor(0.7311, grad_fn=<SigmoidBackward0>)

In [5]:
y.backward()

In [6]:
x.grad # 各个节点上的导数值 ，dy/dx

tensor(0.1966)

In [11]:
x.sigmoid()*(1-x.sigmoid())

tensor(0.1966, grad_fn=<MulBackward0>)

In [10]:
## or
# sigmoid = nn.Sigmoid()
# sigmoid(x)

tensor(0.7311, grad_fn=<SigmoidBackward0>)

# 自定义函数

$$
y = \sigma^2(x)\\
y' = 2\sigma(x)\cdot(\sigma(x)(1-\sigma(x)))
$$

$$
\begin{equation}
\begin{split}
y =&\sigma^2(x)\\
y' =& 2\sigma(x)\cdot(\sigma(x)(1-\sigma(x)))
\end{split}
\end{equation}
$$

In [13]:
x = torch.tensor(2., requires_grad = True)

In [16]:
y = x.sigmoid()*x.sigmoid()

In [17]:
y

tensor(0.7758, grad_fn=<MulBackward0>)

In [18]:
x.grad

In [19]:
y.backward()

In [20]:
x.grad

tensor(0.1850)

In [21]:
2*x.sigmoid()*(x.sigmoid()*(1-x.sigmoid()))

tensor(0.1850, grad_fn=<MulBackward0>)

# softmax

In [22]:
def softmax(vector):
    return np.exp(vector)/np.exp(vector).sum()

In [23]:
softmax(np.array([1.2,3,-1.9,-0.8]))

array([0.13831198, 0.83673869, 0.00623084, 0.01871849])

In [24]:
x = torch.tensor([1.2,3,-1.9,-0.8])
nn.Softmax(dim=0)(x)

tensor([0.1383, 0.8367, 0.0062, 0.0187])

<img src='https://github.com/hitdoggy/-PyTorch/blob/main/1_1.png?raw=true' width='20%'>

$$
s(z_{i})=\frac{e^{z_{i}}}{\sum^{n}_{j=1}e^{z_{j}}}
$$

<img src='https://github.com/hitdoggy/-PyTorch/blob/main/1_2.png?raw=true' width=40%>

In [8]:
logits = torch.tensor([1.1,2.2,0.2,-2.0])
logits.softmax(dim=0)

tensor([0.2244, 0.6742, 0.0912, 0.0101])

In [9]:
nn.Softmax(dim=0)(logits)

tensor([0.2244, 0.6742, 0.0912, 0.0101])

In [15]:
z = torch.tensor([1.1,2.2,0.2,-2.0],requires_grad=True)
y = nn.Softmax(dim=0)(z)

In [11]:
y.backward()
x.grad

RuntimeError: grad can be implicitly created only for scalar outputs

RuntimeError: grad can be implicitly created only for scalar outputs

In [16]:
z = torch.tensor([1.1,2.2,0.2,-2.0],requires_grad=True)
y = nn.Softmax(dim=0)(z)
y[0].backward()
z.grad # 第一行求导结果

tensor([ 0.1741, -0.1513, -0.0205, -0.0023])

In [19]:
z = torch.tensor([1.1,2.2,0.2,-2.0],requires_grad=True)
y = nn.Softmax(dim=0)(z)

In [24]:
list(y)

[tensor(0.2244, grad_fn=<UnbindBackward0>),
 tensor(0.6742, grad_fn=<UnbindBackward0>),
 tensor(0.0912, grad_fn=<UnbindBackward0>),
 tensor(0.0101, grad_fn=<UnbindBackward0>)]

### 自动求雅可比行列式
torch.autograd.functional.jacobian(func, inputs, create_graph=False, strict=False, vectorize=False)

In [44]:
from torch.autograd.functional import jacobian
def func(z1,z2,z3,z4):
    z = [torch.exp(z1),torch.exp(z2),torch.exp(z3),torch.exp(z4)]
    print(z)
    return tuple([e/sum(z) for e in z])
z1, z2, z3, z4 = [torch.tensor(1.1),torch.tensor(2.2),torch.tensor(0.2),torch.tensor(-2.)]
jacobian(func,(z1, z2, z3, z4 ))

[tensor(3.0042, grad_fn=<ExpBackward0>), tensor(9.0250, grad_fn=<ExpBackward0>), tensor(1.2214, grad_fn=<ExpBackward0>), tensor(0.1353, grad_fn=<ExpBackward0>)]


((tensor(0.1741), tensor(-0.1513), tensor(-0.0205), tensor(-0.0023)),
 (tensor(-0.1513), tensor(0.2196), tensor(-0.0615), tensor(-0.0068)),
 (tensor(-0.0205), tensor(-0.0615), tensor(0.0829), tensor(-0.0009)),
 (tensor(-0.0023), tensor(-0.0068), tensor(-0.0009), tensor(0.0100)))