# Define the network

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
# torch.nn.Module - Base class for all neural network modules
# models should also subclass this class
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        # in_channels, out_channels, kernel_size
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation : y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5 , 120)
        # in_features, out_features
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # if the size  is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        # fully connected layer에 들어가기 전에 batch에 따라서 feature별로 row로 정리
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
# The learnable parameters of a model are returned by net.parameters()
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [5]:
# Mnist dataset 32 x 32
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)

Variable containing:
 0.1325 -0.0698 -0.0198  0.1482 -0.0227  0.0806  0.0565 -0.0952 -0.1165  0.0255
[torch.FloatTensor of size 1x10]



In [6]:
net.zero_grad() # set gradients of all model parameters to zero
out.backward(torch.randn(1, 10))
# 1) net - 모델의 그라디언트를 0으로 만들어줌
# 2) input variable를 차원을 정하고 만들어준다음에, net에 통과시켜서 나온 out을
# 3) backward attribute를 통해서 backprob (out 의 그라디언트에 randn(1,10) 랜덤으로 차원에 맞춰 설정)

# Loss Function

In [21]:
# A loss function takes the (output, target) pair of inputs
output = net(input)
target = Variable(torch.arange(1,11)) # a dummy target 1 - 10
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

Variable containing:
 38.5608
[torch.FloatTensor of size 1]



In [9]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU
print(loss.grad_fn.next_functions[0][0].next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7fa831b55390>
<AddmmBackward object at 0x7fa831b55320>
<ExpandBackward object at 0x7fa831b55390>
<AccumulateGrad object at 0x7fa831b554e0>


# Backprop

In [27]:
net.zero_grad() # zeroes the gradient buffers of all parameters
# if not, gradients will be accumulated to existing gradients

print('conv1.weight.grad before backward')
# net.conv1.__dict__
print(net.conv1.weight.grad)
loss.backward()

print("conv1.weight.grad after backward")
print(net.conv1.weight.grad)

conv1.weight.grad before backward
Variable containing:
(0 ,0 ,.,.) = 
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0

(1 ,0 ,.,.) = 
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0

(2 ,0 ,.,.) = 
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0

(3 ,0 ,.,.) = 
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0

(4 ,0 ,.,.) = 
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0

(5 ,0 ,.,.) = 
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
  0  0  0  0  0
[torch.FloatTensor of size 6x1x5x5]



RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

# Update the weights

In [23]:
import torch.optim as optim

In [29]:
# create optimizer
optimizer = optim.SGD(net.parameters(), lr = 0.01)

# in your training loop
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # does the update

None
