This notebook will be exploring how PyTorch is used to create a simple neural network. Tutorial for this follows from this link (https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [15]:
class Net(nn.Module):
    # Net using nn.Module to inherit functions
    def __init__(self):
        super(Net, self).__init__() # Super for inheritance
        # 1 input image channel, 6 output channels, 3x3 square conv kernels
        self.conv1 = nn.Conv2d(1, 6, 3) # nn.Conv2d for conv layer
        self.conv2 = nn.Conv2d(6, 16, 3)
        # Apply linear transformation to incoming data y = x*a.T + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6x6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # Number of Outputs: 10
    
    def forward(self, x):
        # Max Pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # If window is square, can just input single value
        x = x.view(-1, self.num_flat_features(x)) # Transforms to some number of rows and num_flat_features(x) columns
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    # Function multiply size of input from one layer to the next
    def num_flat_features(self, x):
        size = x.size()[1:] # all dims except batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        
        return num_features

net = Net()
print(net)
            

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [16]:
# Learnable parameters of a model are returned by net.parameters()

params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1#s weights

10
torch.Size([6, 1, 3, 3])


In [19]:
params[0]

Parameter containing:
tensor([[[[-0.0597, -0.2670,  0.0693],
          [-0.2468, -0.2974, -0.0986],
          [-0.2666, -0.0682, -0.2496]]],


        [[[-0.3012, -0.3180,  0.0383],
          [ 0.0104, -0.1344,  0.2940],
          [ 0.2860,  0.3258, -0.2030]]],


        [[[ 0.1441,  0.2834, -0.0368],
          [-0.0936,  0.0346, -0.2993],
          [-0.2390,  0.1726,  0.1542]]],


        [[[ 0.1103, -0.2230, -0.2753],
          [-0.1738,  0.2659,  0.0048],
          [ 0.1810, -0.0565,  0.2537]]],


        [[[-0.2153, -0.2751, -0.1657],
          [ 0.0751,  0.0516,  0.0401],
          [ 0.2659,  0.1900,  0.2065]]],


        [[[ 0.1854,  0.0511,  0.2777],
          [-0.0159, -0.2373, -0.0024],
          [-0.1387, -0.1050, -0.1392]]]], requires_grad=True)

In [20]:
# Try with random 32x32 input, size of MNIST dataset
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0425,  0.0506, -0.0261,  0.0432, -0.1079,  0.1634,  0.0515, -0.0571,
         -0.0591,  0.0404]], grad_fn=<AddmmBackward>)


In [21]:
input

tensor([[[[-0.5225, -1.1788, -0.7394,  ..., -1.9270, -0.8059, -0.3042],
          [ 0.8579,  0.7384, -1.0661,  ...,  0.1766, -1.3231,  1.5176],
          [-0.6065, -0.5460,  1.1917,  ...,  0.0115,  0.5774, -0.2858],
          ...,
          [ 1.3118, -0.7563, -0.3109,  ..., -0.1542, -0.3895, -0.9130],
          [ 0.0178,  0.2152,  1.2791,  ...,  0.8881, -0.1599, -1.2267],
          [ 1.0648, -0.2067,  0.8832,  ..., -0.3742,  0.3771,  0.3171]]]])

In [22]:
# Zero the gradient buffers of all parameters and backprops with random gradients.
net.zero_grad()
out.backward(torch.randn(1,10))

# Loss Function

In [24]:
# Compute loss between estimated output and target value
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss() # loss function MSE

loss = criterion(output, target) # loss between output computed and target values
print(loss)

tensor(0.5390, grad_fn=<MseLossBackward>)


In [26]:
# Calling loss.backwards() computes derivatives w.r.t. the loss
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU


<MseLossBackward object at 0x7fe74e63e0d0>
<AddmmBackward object at 0x7fe74e63ef10>
<AccumulateGrad object at 0x7fe74e64b950>


# Back Prop

In [27]:
# Clear existing gradients or else the gradients will be accumulated to existing gradients
net.zero_grad() # Zeroes the gradient buffers of all parameters

print('con1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

con1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0006, -0.0110,  0.0099, -0.0038, -0.0004,  0.0050])


# Updating weights

In [28]:
# Implement Gradient Descent with this
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [30]:
import torch.optim as optim

# Create optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in training loop
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does update

SyntaxError: invalid syntax (<ipython-input-30-41580d3ffb41>, line 4)