<a href="https://colab.research.google.com/github/gimquokka/ML/blob/master/ML/Docs_Tutorial/PyTroch/Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural_Networks👍

## Define the network

In [160]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

  def __init__(self):
    super(Net, self).__init__()
    # 1 input image channel, 6 output chnnels, 2x2 square convolution filter
    self.conv1 = nn.Conv2d(1, 8, kernel_size=3 , stride=1)
    self.conv2 = nn.Conv2d(8, 16, 2)
    # The affine transformation: y = Wx + b
    self.fc1 = nn.Linear(16 * 7 * 7, 120)
    self.fc2 = nn.Linear(120, 60)
    self.fc3 = nn.Linear(60, 10)
  
  def forward(self, x):
    #Max pooling over a (2, 2) window
    x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
    x = F.max_pool2d(F.relu(self.conv2(x)), 2)
    x = x.view(-1, self.num_flat_features(x))
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.softmax(self.fc3(x), dim = 1) # => Compress output layer 
    return x

  def num_flat_features(self, x):
    size = x.size()[1:]
    num_features = 1
    for s in size:
      num_features *= s
    return num_features

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(2, 2), stride=(1, 1))
  (fc1): Linear(in_features=784, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)


In [161]:
params = list(net.parameters())
print(len(params))
print(params[7].size())
# print(params)

10
torch.Size([60])


In [162]:
# Check output dim of conv layer
conv1 = nn.Conv2d(1, 8, 3)
conv2 = nn.Conv2d(8, 16, 2)

a = torch.randn(1, 1, 32, 32)
a = F.max_pool2d(F.relu(conv1(a)), 2)
a = F.max_pool2d(F.relu(conv2(a)), (2, 2))

print(a.size())

torch.Size([1, 16, 7, 7])


In [163]:
input = torch.randn(1, 1, 32, 32)

out = net(input)

print(out)

tensor([[0.0862, 0.0990, 0.0931, 0.1076, 0.1073, 0.0896, 0.1119, 0.1051, 0.0878,
         0.1124]], grad_fn=<SoftmaxBackward>)


In [164]:
net.zero_grad()
out.backward(torch.randn(1, 10))

## Loss Function

In [165]:
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()

loss = criterion(output, target)

In [166]:
# grad stack on grad_fn of loss func
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7f4b3cfe4b70>
<SoftmaxBackward object at 0x7f4b3cffc278>
<AddmmBackward object at 0x7f4b3cfe4b70>
<AccumulateGrad object at 0x7f4b3cffc278>


## Backprop

In [169]:
net.zero_grad()

print('Check fc1.bias.grad before backward')
print(net.fc1.bias.grad)

loss.backward()

print('Check fc1.bias.grad after backward')
print(net.fc1.bias.grad)

Check fc1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
Check fc1.bias.grad after backward
tensor([ 0.0000e+00,  1.4832e-03,  4.1624e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  8.6503e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  6.1835e-05,
         1.2013e-03, -2.9132e-03,  0.0000e+00, -1.6802e-03, -2.7058e-03,
        -6.1333e-04, -1.4601e-04,  0.0000e+00,  1.1578e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+

## Update the weights

In [176]:
learning_rate = 0.01

for f in net.parameters():
  '''
  Wow... It is really work!
  '''
  # print(f.data.size())
  # print(f.grad.data.size())
  f.data.sub_(f.grad.data * learning_rate)


In [179]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # This command update parameter base on grad!?

tensor([ 0.0000e+00,  1.9066e-03,  1.9323e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00, -9.2258e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00, -8.3394e-05,
         7.8156e-04, -2.2930e-03,  0.0000e+00, -1.7706e-03, -2.4603e-03,
        -5.0006e-04, -7.5663e-04,  0.0000e+00,  2.0372e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00, -1.5127e-03,  6.5441e-04,
        -1.2675e-03, -2.4785e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  1.0365e-03,  1.7540e-03,  0.0000e+00,
        -1.6629e-04,  8.3852e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  1.4640e-03,  1.6038e-03,  1.5517e-04,
         1.7324e-03, -4.4343e-04,  6.0556e-04,  6.2550e-05, -6.9656e-05,
         0.0000e+00,  0.0000e+00,  9.3851e-04,  6.0052e-05,  0.0000e+00,
         0.0000e+00,  1.1789e-03,  0.0000e+00, -9.9203e-04, -1.0880e-03,
         1.3282e-03,  0.0000e+00,  4.9679e-04, -1.9