In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
data = torch.Tensor([[[[ 1, -1,  0,  0, -1],
                       [ 0,  1, -1,  1,  1],
                       [ 1, -1,  1,  0, -1],
                       [ 1,  1,  0, -1,  0],
                       [-1,  0,  0,  1,  0]]]])
target_onehot = torch.Tensor([[1, 0, 0]])
target = torch.tensor([0])

conv1_weight = torch.Tensor([[[[ 1, -1,  2],
                               [-1, -2,  0],
                               [ 0,  2, -1]]],
                             [[[-2,  1, -1],
                               [ 2,  0,  0],
                               [ 0,  1, -1]]]])

linear1_weight = torch.Tensor([[2, 0, 0, 2, 1, -2, -1, 0, 0, -1, 1, -3, -1, -3, -2, -1, 2, -3],
                               [-2, 3, 2, -2, -3, 0, 2, 2, 1, 3, -3, -3, 0, 1, 3, 2, 3, -2],
                               [-3, 3, 0, 1, -3, 1, 2, 3, -2, 2, 1, 0, 3, 0, -3, -3, -1, -3],
                               [-2, -1, 2, 0, -2, 0, 0, 2, -3, 3, 3, 2, 3, -3, -3, 0, -1, 0]])

linear2_weight = torch.Tensor([[0, 0, -1, 1],
                               [0, 1, 1, -3],
                               [1, -2, -3, 3]])

linear1_bias = torch.Tensor([[2, -1, 4, 0]])

linear2_bias = torch.Tensor([[3, -1, 2]])

conv = relu1 = flatten = linear1 = relu2 = linear2 = softmax = 0

In [5]:
class MyNet(nn.Module):
  def __init__(self):
    super(MyNet, self).__init__()
    self.conv1 = nn.Conv2d(1, 2, kernel_size=3, bias=False)
    self.linear1 = nn.Linear(18, 4)
    self.linear2 = nn.Linear(4, 3)
    self.relu = nn.ReLU()
    self.softmax = nn.Softmax()
    
    self.conv1.weight = nn.Parameter(conv1_weight)
    self.linear1.weight = nn.Parameter(linear1_weight)
    self.linear1.bias = nn.Parameter(linear1_bias)
    self.linear2.weight = nn.Parameter(linear2_weight)
    self.linear2.bias = nn.Parameter(linear2_bias)
  
  def forward(self, data):
    conv    = self.conv1(data)
    relu1   = self.relu(conv)
    flatten = relu1.view(relu1.size(0), -1) # Flatten
    linear1 = self.linear1(flatten)
    relu2   = self.relu(linear1)
    linear2 = self.linear2(relu2)
    softmax = self.softmax(linear2)
    return softmax

In [6]:
model = MyNet()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

result = model(data)
loss = criterion(torch.log(result), target)

print('Forward Propagation : ')
print('convolutional output :\n', conv)
print('relu activation output :\n', relu1)
print('flatten result :\n', flatten)
print('linear1 output :\n', linear1)
print('relu activation output :\n', relu2)
print('linear2 output :\n', linear2)
print('softmax activation output:\n', softmax)

print('\Loss Calculation')
print('result = ', result)
print('target = ', target)
print("loss = ",   loss)

print('\nBackward Propagation')
print('old linear 2 weight = ', linear2_weight)
print('old linear 1 weight = ', linear1_weight)
print('old convolution weight = ', conv1_weight)

optimizer.zero_grad()
loss.backward()
optimizer.step()

print('\nnew linear 2 grad = ', model.linear2.weight.grad)
print('new linear 2 bias grad = ', model.linear2.bias.grad)
print('new linear 1 grad = ', model.linear1.weight.grad)
print('new linear 1 bias grad = ', model.linear1.bias.grad)
print('new convolution grad = ', model.conv1.weight.grad)

print('\nnew linear 2 weight = ', model.linear2.weight)
print('new linear 2 bias = ', model.linear2.bias)
print('new linear 1 weight = ', model.linear1.weight)
print('new linear 1 bias = ', model.linear1.bias)
print('new convolution weight = ', model.conv1.weight)

Forward Propagation : 
convolutional output :
 0
relu activation output :
 0
flatten result :
 0
linear1 output :
 0
relu activation output :
 0
linear2 output :
 0
softmax activation output:
 0
\Loss Calculation
result =  tensor([[0.1189, 0.8789, 0.0022]], grad_fn=<SoftmaxBackward0>)
target =  tensor([0])
loss =  tensor(2.1291, grad_fn=<NllLossBackward0>)

Backward Propagation
old linear 2 weight =  tensor([[ 0.,  0., -1.,  1.],
        [ 0.,  1.,  1., -3.],
        [ 1., -2., -3.,  3.]])
old linear 1 weight =  tensor([[ 2.,  0.,  0.,  2.,  1., -2., -1.,  0.,  0., -1.,  1., -3., -1., -3.,
         -2., -1.,  2., -3.],
        [-2.,  3.,  2., -2., -3.,  0.,  2.,  2.,  1.,  3., -3., -3.,  0.,  1.,
          3.,  2.,  3., -2.],
        [-3.,  3.,  0.,  1., -3.,  1.,  2.,  3., -2.,  2.,  1.,  0.,  3.,  0.,
         -3., -3., -1., -3.],
        [-2., -1.,  2.,  0., -2.,  0.,  0.,  2., -3.,  3.,  3.,  2.,  3., -3.,
         -3.,  0., -1.,  0.]])
old convolution weight =  tensor([[[[ 1., -1.

