In [1]:
import torch
import numpy as np
print(f'pytorch version: {torch.__version__}')

pytorch version: 1.0.0


## Using Numpy

### Forward

In [2]:
def np_sigmoid(x):
    return 1 / (1 + np.exp(-x))


def l2_loss(x, y):
    return 0.5 * (x - y) ** 2


x = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])
w1 = np.array([[0.1, -0.1], [0.2, 0.001]])
w2 = np.array([0.0005, -0.2, 0.2, 0.5])
y = np.array([-1., -1., 1., 1.])

L_0_0 = x[0, 0] * w1[0, 0] + x[0, 1] * w1[0, 1] + x[1, 0] * w1[1, 0] + x[1, 1] * w1[1, 1]
L_0_1 = x[0, 1] * w1[0, 0] + x[0, 2] * w1[0, 1] + x[1, 1] * w1[1, 0] + x[1, 2] * w1[1, 1]
L_1_0 = x[1, 0] * w1[0, 0] + x[1, 1] * w1[0, 1] + x[2, 0] * w1[1, 0] + x[2, 1] * w1[1, 1]
L_1_1 = x[1, 1] * w1[0, 0] + x[1, 2] * w1[0, 1] + x[2, 1] * w1[1, 0] + x[2, 2] * w1[1, 1]
L1 = np.array([L_0_0, L_0_1, L_1_0, L_1_1])

print('\nL1' + '\n' + '-' * 40)
print(L1)

L1A = np.tanh(L1)
L2 = np.array([L1A[0] * w2[0], L1A[1] * w2[1], L1A[2] * w2[2], L1A[3] * w2[3]])

print('\nL2' + '\n' + '-' * 40)
print(L2)

L2A = np_sigmoid(L2)
loss = l2_loss(L2A, y).sum()

print('\nloss' + '\n' + '-' * 40)
print(loss)


L1
----------------------------------------
[0.705 0.906 1.308 1.509]

L2
----------------------------------------
[ 3.03765944e-04 -1.43841367e-01  1.72753696e-01  4.53380714e-01]

loss
----------------------------------------
2.3767862745225306


### Backward

In [3]:
d_w2 = (L2A - y) * np_sigmoid(L2) * (1 - np_sigmoid(L2)) * L1A.reshape(-1)

print('\nd_w2' + '\n' + '-' * 40)
print(d_w2)

d_L1 = (L2A - y) * np_sigmoid(L2) * (1 - np_sigmoid(L2)) * w2 * (1 - np.tanh(L1) ** 2)

print('\nd_L1' + '\n' + '-' * 40)
print(d_L1)

d_w_0_0_0 = d_L1[0] * x[0, 0]
d_w_0_0_1 = d_L1[1] * x[0, 1]
d_w_0_0_2 = d_L1[2] * x[1, 0]
d_w_0_0_3 = d_L1[3] * x[1, 1]
d_w_0_0 = d_w_0_0_0 + d_w_0_0_1 + d_w_0_0_2 + d_w_0_0_3

print('\nd_w1[0, 0]' + '\n' + '-' * 40)
print(d_w_0_0)



d_w2
----------------------------------------
[ 0.22783599  0.26189097 -0.09793547 -0.08370645]

d_L1
----------------------------------------
[ 0.0001183  -0.03515696 -0.00575761 -0.00820593]

d_w1[0, 0]
----------------------------------------
-0.1342557202950444


## Pytorch (for validation purpose)

### Forward

In [4]:
x = torch.tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]], requires_grad=True)
w1 = torch.tensor([[0.1, -0.1], [0.2, 0.001]], requires_grad=True)
w2 = torch.tensor([0.0005, -0.2, 0.2, 0.5], requires_grad=True)
y = torch.tensor([-1., -1., 1., 1.], requires_grad=False)

L1 = torch.empty(4)
L1[0] = (x[:2, :2] * w1).sum()
L1[1] = (x[:2, 1:] * w1).sum()
L1[2] = (x[1:, :2] * w1).sum()
L1[3] = (x[1:, 1:] * w1).sum()
L1.retain_grad()

# L_0_0 = (x[:2, :2] * w1).sum()
# L_0_1 = (x[:2, 1:] * w1).sum()
# L_1_0 = (x[1:, :2] * w1).sum()
# L_1_1 = (x[1:, 1:] * w1).sum()

# L1 = torch.cat([L_0_0.unsqueeze(0), L_0_1.unsqueeze(0), L_1_0.unsqueeze(0), L_1_1.unsqueeze(0)]).view(2, 2)

print('\nL1' + '\n' + '-' * 40)
print(L1)

L1A = torch.tanh(L1)
L1A.retain_grad()

L2 = L1A * w2
L2.retain_grad()

print('\nL2' + '\n' + '-' * 40)
print(L2)

L2A = torch.sigmoid(L2)
loss = l2_loss(L2A, y).sum()

print('\nloss' + '\n' + '-' * 40)
print(loss)


L1
----------------------------------------
tensor([0.7050, 0.9060, 1.3080, 1.5090], grad_fn=<CopySlices>)

L2
----------------------------------------
tensor([ 3.0377e-04, -1.4384e-01,  1.7275e-01,  4.5338e-01],
       grad_fn=<MulBackward0>)

loss
----------------------------------------
tensor(2.3768, grad_fn=<SumBackward0>)


### Backward

In [5]:
loss.backward()

In [6]:
print('\nd_w2' + '\n' + '-' * 40)
print(w2.grad)

print('\nd_L1' + '\n' + '-' * 40)
print(L1.grad)

print('\nd_w1[0, 0]' + '\n' + '-' * 40)
print(w1.grad[0, 0])


d_w2
----------------------------------------
tensor([ 0.2278,  0.2619, -0.0979, -0.0837])

d_L1
----------------------------------------
tensor([ 0.0001, -0.0352, -0.0058, -0.0082])

d_w1[0, 0]
----------------------------------------
tensor(-0.1343)
