In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class NNet(nn.Module):
    
    def __init__(self):
        super(NNet, self).__init__()
        
        self.c1 = nn.Conv2d(1, 6, kernel_size=(5, 5))
        self.c2 = nn.Conv2d(6, 16, kernel_size=(5, 5))
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.c1(x)), kernel_size=(2, 2))
        x = F.max_pool2d(F.relu(self.c2(x)), kernel_size=(2, 2))
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


In [3]:
net = NNet()
print(net)

NNet(
  (c1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (c2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
net.parameters

<bound method Module.parameters of NNet(
  (c1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (c2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)>

In [5]:
input = torch.randn(1, 1, 32, 32)
label = torch.from_numpy(np.array([1.] + 9 * [0.])).view(1, -1)

out = net.forward(input)

In [6]:
out

tensor([[ 0.0466, -0.0656,  0.0167,  0.0031, -0.0467,  0.0159, -0.1037,  0.0023,
          0.0070, -0.0351]], grad_fn=<ThAddmmBackward>)

In [7]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [8]:
list(net.parameters())[0].grad

tensor([[[[-0.0971, -0.0092, -0.0454, -0.1085, -0.1635],
          [-0.0114, -0.0138, -0.0542, -0.0550, -0.0512],
          [ 0.0150, -0.0768,  0.0836,  0.0355, -0.0902],
          [ 0.0346, -0.0242, -0.0042,  0.0690,  0.1927],
          [ 0.0167,  0.0082,  0.0442,  0.0235,  0.1626]]],


        [[[-0.0148,  0.0803, -0.0722,  0.0754, -0.0263],
          [ 0.0490,  0.1051,  0.0579, -0.0146, -0.0240],
          [ 0.1057,  0.1505, -0.0623, -0.0548,  0.0359],
          [ 0.0485, -0.0473, -0.0758, -0.0769, -0.0777],
          [ 0.1022,  0.1315,  0.1245,  0.0029,  0.0206]]],


        [[[-0.0306, -0.0603, -0.0338,  0.0166, -0.0475],
          [ 0.0497, -0.0710, -0.0209,  0.0493, -0.0171],
          [ 0.0329,  0.1168,  0.0336, -0.0331, -0.0160],
          [-0.0466,  0.0264, -0.0531,  0.0650,  0.0589],
          [-0.0333, -0.0800,  0.0016,  0.0574,  0.0585]]],


        [[[ 0.0456,  0.0789,  0.0251, -0.0210, -0.0265],
          [ 0.0911,  0.0369, -0.0002, -0.0281, -0.1578],
          [-0.0859,

In [15]:
net.zero_grad()
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

loss.backward()

print(net.fc3.weight.grad)

tensor(0.5523, grad_fn=<MseLossBackward>)
tensor([[ 0.0000,  0.0091,  0.0023,  0.0000,  0.0000,  0.0020,  0.0030,  0.0000,
          0.0000,  0.0012,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0156,  0.0008,  0.0000,  0.0000,  0.0049,  0.0051,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0003,
          0.0000,  0.0047,  0.0064,  0.0050,  0.0000,  0.0000,  0.0124,  0.0000,
          0.0014,  0.0000,  0.0010,  0.0000,  0.0005,  0.0000,  0.0000,  0.0024,
          0.0076,  0.0000,  0.0000,  0.0055,  0.0127,  0.0000,  0.0000,  0.0123,
          0.0041,  0.0015,  0.0109,  0.0000,  0.0000,  0.0003,  0.0000,  0.0054,
          0.0000,  0.0000,  0.0015,  0.0000,  0.0007,  0.0000,  0.0009,  0.0000,
          0.0127,  0.0000,  0.0000,  0.0086,  0.0000,  0.0008,  0.0000,  0.0083,
          0.0029,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.0258, -0.0065,  0.0000,  0.0000, -0.0058, -0.0086,  0.0000,
          0.0000, -0.

In [16]:
import torch.optim as optim

In [17]:
optimizer = optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update