In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    
    def __init__(self):
        
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        
        x = x.view((-1, self.num_flat_features(x)))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
        
        
    def num_flat_features(self, x):
        
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [3]:
net = Net()

In [4]:
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [5]:
net.parameters()

<generator object Module.parameters at 0x7f972081dcd0>

In [7]:
params = list(net.parameters())

In [8]:
for p in params:
    print(p.size())

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [9]:
x = torch.rand((2, 3))

In [11]:
x.fill_(1)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [12]:
x

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [15]:
len(params)

10

In [16]:
inputs = torch.randn(size=(1, 1, 32, 32))

In [17]:
inputs

tensor([[[[-1.0251,  0.6438,  0.3357,  ...,  1.5866,  0.8936,  2.9557],
          [-0.0656,  1.0292, -0.6455,  ...,  1.6511,  1.5606, -1.3723],
          [ 2.7659,  1.1102,  0.8882,  ..., -0.9488,  0.2975,  0.3133],
          ...,
          [ 1.1361, -0.9634,  0.8121,  ..., -0.1257,  0.0504,  0.8545],
          [ 2.0184, -0.5523,  0.9145,  ...,  0.0813, -1.8437,  0.5511],
          [ 1.2408,  0.0611,  1.6646,  ..., -1.6698, -0.0533,  0.5696]]]])

In [18]:
out = net(inputs)

In [19]:
out

tensor([[ 0.0276, -0.0534, -0.0149, -0.1558, -0.1555, -0.0222, -0.0537, -0.1661,
          0.1940, -0.0087]], grad_fn=<AddmmBackward>)

In [22]:
net.zero_grad()

In [23]:
out

tensor([[ 0.0276, -0.0534, -0.0149, -0.1558, -0.1555, -0.0222, -0.0537, -0.1661,
          0.1940, -0.0087]], grad_fn=<AddmmBackward>)

In [25]:
out.size()

torch.Size([1, 10])

In [26]:
out.backward(torch.randn(size=(1, 10)))

In [27]:
test_tensor = torch.randn(size=(3, 4))

In [31]:
for item in test_tensor:
    print(item)
    for sub_item in item:
        print(sub_item, end=", ")
    print()

tensor([ 1.4815,  1.1542, -1.4461, -2.4227])
tensor(1.4815), tensor(1.1542), tensor(-1.4461), tensor(-2.4227), 
tensor([ 0.1591, -0.9388, -0.9478, -1.3850])
tensor(0.1591), tensor(-0.9388), tensor(-0.9478), tensor(-1.3850), 
tensor([-1.2354,  1.7608,  0.3578,  1.4433])
tensor(-1.2354), tensor(1.7608), tensor(0.3578), tensor(1.4433), 


In [29]:
test_tensor

tensor([[ 1.4815,  1.1542, -1.4461, -2.4227],
        [ 0.1591, -0.9388, -0.9478, -1.3850],
        [-1.2354,  1.7608,  0.3578,  1.4433]])

In [36]:
torch.randn(size=(1, )).item()

-3.3876800537109375

In [39]:
m = nn.Softmax(dim=1)

In [40]:
m(test_tensor)

tensor([[0.5572, 0.4017, 0.0298, 0.0112],
        [0.5326, 0.1776, 0.1761, 0.1137],
        [0.0247, 0.4941, 0.1215, 0.3597]])

In [41]:
target = torch.randn(size=(10, ))

In [42]:
target

tensor([ 0.8089, -1.3845,  0.5914,  0.7260,  1.0576, -0.9730, -0.7519,  0.3509,
        -1.1140, -0.5048])

In [44]:
target = target.view(size=(1, -1))

In [45]:
target

tensor([[ 0.8089, -1.3845,  0.5914,  0.7260,  1.0576, -0.9730, -0.7519,  0.3509,
         -1.1140, -0.5048]])

In [46]:
criterion = nn.MSELoss()

In [47]:
out.size()

torch.Size([1, 10])

In [48]:
loss = criterion(out, target)

In [49]:
loss

tensor(0.8615, grad_fn=<MseLossBackward>)

In [50]:
loss.grad_fn

<MseLossBackward at 0x7f9720326710>

input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss

In [53]:
loss.grad_fn.next_functions[0][0]

<AddmmBackward at 0x7f971f74f190>

In [54]:
loss.grad_fn.next_functions[0][0].next_functions

((<AccumulateGrad at 0x7f971fdaa610>, 0),
 (<ReluBackward0 at 0x7f971fd60a50>, 0),
 (<TBackward at 0x7f97203373d0>, 0))

In [55]:
net.conv1.bias.grad

tensor([ 0.0506,  0.0419, -0.1564, -0.0033, -0.0191, -0.1177])

In [56]:
net.zero_grad()

In [57]:
net.conv1.bias.grad

tensor([0., 0., 0., 0., 0., 0.])

In [58]:
loss.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [59]:
out = net(inputs)

RuntimeError: size mismatch, m1: [1 x 256], m2: [400 x 120] at /opt/conda/conda-bld/pytorch_1587428190859/work/aten/src/TH/generic/THTensorMath.cpp:41

In [60]:
inputs = torch.randn(size=(1, 1, 32, 32))
out = net(inputs)

In [62]:
loss = criterion(out, target)

In [63]:
net.zero_grad()

In [64]:
loss.backward()

In [65]:
net.conv1.bias.grad

tensor([ 0.0153,  0.0024,  0.0003,  0.0116, -0.0023,  0.0052])

In [68]:
for f in net.parameters():
    print(type(f))

<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>


In [69]:
f.data

tensor([ 0.0607, -0.0481,  0.0490, -0.0947, -0.1073, -0.0245, -0.0413, -0.0981,
         0.0986, -0.0475])

In [70]:
type(f.data)

torch.Tensor

In [71]:
f.data.grad

In [72]:
f.grad.data

tensor([-0.1556,  0.2582, -0.1268, -0.1713, -0.2377,  0.1943,  0.1368, -0.1040,
         0.2622,  0.0972])

In [74]:
print(type(net.parameters()))

<class 'generator'>


In [75]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [76]:
import torch.optim as optim

In [78]:
optimizer = optim.SGD(net.parameters(), lr=0.01)

In [79]:
optimizer.zero_grad()

In [80]:
net.conv1.bias.grad

tensor([0., 0., 0., 0., 0., 0.])

In [81]:
output = net(inputs)

In [82]:
loss = criterion(output, target)

In [83]:
loss.backward()

In [84]:
optimizer.step()