In [44]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [45]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [88]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)

Variable containing:
 0.0599 -0.0662 -0.1226 -0.0449 -0.0744 -0.0416 -0.0804 -0.0735  0.0353  0.1102
[torch.FloatTensor of size 1x10]



In [89]:
output = net(input)
target = Variable(torch.arange(1, 11))  # a dummy target, for example
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

Variable containing:
 38.7005
[torch.FloatTensor of size 1]



In [90]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
Variable containing:
1.00000e-02 *
  5.6923
  0.5129
  5.9297
 -4.0638
 -8.1425
 -4.2958
[torch.FloatTensor of size 6]



In [95]:
paras = list(net.parameters())

In [108]:
x = Variable(torch.randn(2,3))

In [110]:
x

Variable containing:
-1.0269 -2.4009 -1.0505
-1.0976 -0.7662 -0.1767
[torch.FloatTensor of size 2x3]

In [111]:
x.data.sub_(1)


-2.0269 -3.4009 -2.0505
-2.0976 -1.7662 -1.1767
[torch.FloatTensor of size 2x3]

$e^{i\pi} + 1 = 0$

$$e^x=\sum_{i=0}^\infty \frac{1}{i!}x^i$$

### Make sure leaf Variable 
### python super https://www.jianshu.com/p/5120440881fd
### autograd
### hook media https://discuss.pytorch.org/t/why-cant-i-see-grad-of-an-intermediate-variable/94