
A typical training procedure for a neural network is as follows:

1)Define the neural network that has some learnable parameters (or weights)

2)Iterate over a dataset of inputs

3)Process input through the network

4)Compute the loss (how far is the output from being correct)

5)Propagate gradients back into the network’s parameters

6)Update the weights of the network, typically using a simple update rule: weight = weight - learning_rate * gradient


# Define the network

In [5]:
import torch
import torch.nn as nn 
import torch.nn.functional as F 

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution kernel
        self.conv1=nn.Conv2d(1,6,3)
        self.conv2=nn.Conv2d(6,16,3)
        # an affine operation: y=Wx+b
        self.fc1=nn.Linear(16*6*6,120) # 6*6 from image dimension
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)

    def forward(self,x):
        # Max pooling over a (2,2) window
        x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        # If the size is a square you can only specify a single number
        x=F.max_pool2d(F.relu(self.conv2(x)),2)
        x=x.view(-1,self.num_flat_features(x))
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size=x.size()[1:]
        num_features=1
        for s in size:
            num_features *=s
        return num_features

net=Net()
print(net) 

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [6]:
params=list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's weight

10
torch.Size([6, 1, 3, 3])


In [7]:
input = torch.randn(1,1,32,32)
out=net(input)
print(out)

tensor([[-0.0819, -0.0674,  0.0458, -0.1515,  0.1032,  0.0147,  0.0723,  0.0383,
          0.0041, -0.0308]], grad_fn=<AddmmBackward>)


In [8]:
# Zero the gradient buffers of all parameters and backprops with random gradients:
net.zero_grad()
out.backward(torch.randn(1,10))

# Loss Function

In [9]:
# A loss function takes the (output,target) pair of inputs and computes a value that estimates how far away the output is from the target.abs

output=net(input)
target=torch.randn(10) # a dummy target ,for example
target=target.view(1,-1) # make it the same shape as output
criterion=nn.MSELoss()

loss=criterion(output,target)
print(loss)

tensor(2.2791, grad_fn=<MseLossBackward>)


# Backprop

In [12]:
net.zero_grad() # zeros the gradient buffers of all parameters

print('conv1.bias.grad before backword')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backword
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0258, -0.0227,  0.0301,  0.0231, -0.0190, -0.0108])


# Update the weights

In [13]:
import torch.optim as optim

# create your optimizer
optimizer=optim.SGD(net.parameters(),lr=0.01)

# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output=net(input)
loss=criterion(output,target)
loss.backward()
optimizer.step()    # Does the updatre