## Compute Gradient Manually Using Numpy

In [1]:
import numpy as np
import torch
print(f'numpy version: {np.__version__}')
print(f'pytorch version: {torch.__version__}')


##########################################
#      Define some helper functions
##########################################
def relu(x):
    return np.clip(x, 0, np.inf)


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def l2_loss(x, y):
    return (x - y) ** 2


def display(x, name):
    print(f'\n{name}: \n    {x}')

numpy version: 1.15.1
pytorch version: 1.0.0


In [2]:
#############################
#        Foward
#############################
X = np.array([[1, 2, 3], 
              [4, 5, 6], 
              [7, 8, 9]])
W = np.array([[0.1, -0.1], 
              [0.2, 0.01]])
H = np.array([0.05, -0.2, 0.2, 0.5])

Y = np.array([1.])

A00 = X[0, 0] * W[0, 0] + X[0, 1] * W[0, 1] + X[1, 0] * W[1, 0] + X[1, 1] * W[1, 1]
A01 = X[0, 1] * W[0, 0] + X[0, 2] * W[0, 1] + X[1, 1] * W[1, 0] + X[1, 2] * W[1, 1]
A10 = X[1, 0] * W[0, 0] + X[1, 1] * W[0, 1] + X[2, 0] * W[1, 0] + X[2, 1] * W[1, 1]
A11 = X[1, 1] * W[0, 0] + X[1, 2] * W[0, 1] + X[2, 1] * W[1, 0] + X[2, 2] * W[1, 1]

A = np.array([[A00, A01], [A10, A11]])
R = relu(A)
B = (R.reshape(-1) * H).sum()
Z = sigmoid(B)
L = l2_loss(Z, Y).sum()

print('*' * 80)
print('*' + ' ' * 25 + 'Forward (Numpy)')
print('*' * 80)
display(A, 'A')
display(B, 'B')
display(Z, 'Z')
display(L, 'L')


#############################
#        Backward
#############################
d_L = 1
d_Z = d_L * 2 * (Z - Y)
d_B = d_Z * sigmoid(B) * (1 - sigmoid(B))
d_T0 = d_T1 = d_T2 = d_T3 = d_B * 1

d_H0 = d_T0 * R[0, 0]
d_H1 = d_T1 * R[0, 1]
d_H2 = d_T2 * R[1, 0]
d_H3 = d_T3 * R[1, 1]
d_H = np.concatenate((d_H0, d_H1, d_H2, d_H3))

d_R00 = d_T0 * H[0]
d_R01 = d_T1 * H[1]
d_R10 = d_T2 * H[2]
d_R11 = d_T3 * H[3]
d_R = np.concatenate((d_R00, d_R01, d_R10, d_R11))

d_A00 = d_R00 * (1 if A00 > 0 else 0)
d_A01 = d_R01 * (1 if A01 > 0 else 0)
d_A10 = d_R10 * (1 if A10 > 0 else 0)
d_A11 = d_R11 * (1 if A11 > 0 else 0)
d_A = np.concatenate((d_A00, d_A01, d_A10, d_A11))

d_U0_00 = d_A00 * 1
d_U1_00 = d_A01 * 1
d_U2_00 = d_A10 * 1
d_U3_00 = d_A11 * 1
d_U_00 = np.concatenate((d_U0_00, d_U1_00, d_U2_00, d_U3_00))

d_W00 = d_U0_00 * X[0, 0] + d_U1_00 * X[0, 1] + d_U2_00 * X[1, 0] + d_U3_00 * X[1, 1]

print('*' * 80)
print('*' + ' ' * 25 + 'Backward (Numpy)')
print('*' * 80)
display(d_Z, 'd_Z')
display(d_B, 'd_B')
display(d_R, 'd_R')
display(d_H, 'd_H')
display(d_A, 'd_A')
display(d_U_00, 'd_U_00')
display(d_W00, 'd_W[0, 0]')

********************************************************************************
*                         Forward (Numpy)
********************************************************************************

A: 
    [[0.75 0.96]
 [1.38 1.59]]

B: 
    0.9165000000000001

Z: 
    0.7143284201512767

L: 
    0.08160825153326548
********************************************************************************
*                         Backward (Numpy)
********************************************************************************

d_Z: 
    [-0.57134316]

d_B: 
    [-0.11659019]

d_R: 
    [-0.00582951  0.02331804 -0.02331804 -0.05829509]

d_H: 
    [-0.08744264 -0.11192658 -0.16089446 -0.1853784 ]

d_A: 
    [-0.00582951  0.02331804 -0.02331804 -0.05829509]

d_U_00: 
    [-0.00582951  0.02331804 -0.02331804 -0.05829509]

d_W[0, 0]: 
    [-0.34394105]


## Pytorch (for validation purpose)

In [3]:
#########################
#        Forward
#########################
X = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], requires_grad=True)
W = torch.tensor([[0.1, -0.1], [0.2, 0.01]], requires_grad=True)
H = torch.tensor([0.05, -0.2, 0.2, 0.5], requires_grad=True)
Y = torch.tensor([1.], requires_grad=False)

A = torch.empty(4)
A[0] = (X[:2, :2] * W).sum()
A[1] = (X[:2, 1:] * W).sum()
A[2] = (X[1:, :2] * W).sum()
A[3] = (X[1:, 1:] * W).sum()
A.retain_grad()

R = torch.relu(A)
R.retain_grad()

B = (R * H).sum()
B.retain_grad()

Z = torch.sigmoid(B)
Z.retain_grad()
L = torch.nn.functional.mse_loss(Z, Y)

print('*' * 80)
print('*' + ' ' * 25 + 'Forward (Pytorch)')
print('*' * 80)
display(A, 'A')
display(B, 'B')
display(Z, 'Z')
display(L, 'L')

#########################
#       Backward
#########################
L.backward()

print('*' * 80)
print('*' + ' ' * 25 + 'Backward (Pytorch)')
print('*' * 80)
display(Z.grad.numpy(), 'd_Z')
display(B.grad.numpy(), 'd_B')
display(R.grad.numpy(), 'd_R')
display(H.grad.numpy(), 'd_H')
display(A.grad.numpy(), 'd_A')
display(W.grad.numpy(), 'd_W')

********************************************************************************
*                         Forward (Pytorch)
********************************************************************************

A: 
    tensor([0.7500, 0.9600, 1.3800, 1.5900], grad_fn=<CopySlices>)

B: 
    0.9164999723434448

Z: 
    0.714328408241272

L: 
    0.08160825818777084
********************************************************************************
*                         Backward (Pytorch)
********************************************************************************

d_Z: 
    -0.571343183517456

d_B: 
    -0.11659019440412521

d_R: 
    [-0.00582951  0.02331804 -0.02331804 -0.0582951 ]

d_H: 
    [-0.08744264 -0.11192659 -0.16089447 -0.18537842]

d_A: 
    [-0.00582951  0.02331804 -0.02331804 -0.0582951 ]

d_W: 
    [[-0.34394106 -0.40806568]
 [-0.5363149  -0.6004395 ]]
