# 1. Create an NN 

Create a sample network

<img src="https://pytorch.org/tutorials/_images/mnist.png" width="700"/>

[Source](https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py)



In [1]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3)
            # 1 input image channel, 6 output channels, 3x3 square convolution
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3)
        self.fc1 = nn.Linear(in_features=16 * 6 * 6, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        # x = [:, 1, 32, 32]

        ## COnv Kernel
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
            # [:, 6, 15, 15]
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
            # [:, 16, 6, 6]

        ## Flatten
        x = x.view(-1, self.num_flat_features(x))
            # [:, 576]

        ## Linear layers
        x = F.relu(self.fc1(x))
            # [:, 120]
        x = F.relu(self.fc2(x))
            # [:, 84]
        x = self.fc3(x)
            # [:, 10]
        return x

    def num_flat_features(self, x):
        """A Function to get the dimentsion of flatting feature"""
        # x = [:, 16, 6, 6]

        # Get all dimensions except the batch dimension
        size = x.size()[1:]
            # [16, 6, 6]

        num_features = 1
        for s in size:
            num_features *= s
            # 16*6*6
        return num_features

In [2]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


#### Check nn - weights and weight shape

In [3]:
params = list(net.parameters())
for i in range(len(params)):
    print(params[i].size())

torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])
torch.Size([16])
torch.Size([120, 576])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


#### Try a forward fitting

In [4]:
import torch

X = torch.randn(3, 1, 32, 32)
y_ = net(X)
print(y_)

tensor([[-0.1181, -0.0074,  0.0331, -0.0807,  0.0395,  0.0615,  0.0152,  0.0141,
          0.0665, -0.0082],
        [-0.1121, -0.0290,  0.0348, -0.0921,  0.0466,  0.0634,  0.0045,  0.0332,
          0.0493, -0.0213],
        [-0.1042, -0.0297,  0.0234, -0.0877,  0.0373,  0.0660,  0.0114,  0.0213,
          0.0495, -0.0191]], grad_fn=<AddmmBackward>)


# 2. Loss function

In [9]:
## Suppose batch_size=3
# Get y_
X = torch.randn(3, 1, 32, 32)
y_ = net(X)
    # y_.shape = [3, 10]

# Get y: a dummy target, for example
y = torch.randn(3, 10)

# Calc loss
criterion = nn.MSELoss()
loss = criterion(y_, y)
print(loss.item())

1.108247995376587


# 3. Do a backprop with nn
#### foward prop

```
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss
```



In [10]:
# zeroes the gradient buffers of all parameters
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

# Do backward with loss
loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0037,  0.0002,  0.0019, -0.0026, -0.0058, -0.0018])


# 4. Optimizers - Update the weights
#### SGD algo in manual

```weight = weight - learning_rate * gradient
```

In [7]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

#### Use torch optim

In [8]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

#### Training steps ####
# zero the gradient buffers
optimizer.zero_grad()

# forward
y_ = net(X)

# Calc loss and do backward
loss = criterion(y_, y)
loss.backward()

# Update weights
optimizer.step()