In [1]:
import torch
import numpy as np

# Forward

In [2]:
x = torch.tensor([[1],[-1]]).float()

In [7]:
w1 = torch.tensor([[2.,-3.],[0,-1],[-3,-2]], requires_grad = True)
b1 = torch.tensor([[-1.],[1],[0]], requires_grad = True)
w2 = torch.tensor([1.,-1,1], requires_grad = True)
b2 = torch.tensor([2.], requires_grad = True)

In [8]:
z1 = torch.matmul(w1, x) + b1
z1.retain_grad()
z1

tensor([[ 4.],
        [ 2.],
        [-1.]], grad_fn=<AddBackward0>)

In [9]:
x1 = torch.relu(z1)
x1.retain_grad()
x1

tensor([[4.],
        [2.],
        [0.]], grad_fn=<ReluBackward0>)

In [10]:
z2 = torch.matmul(w2, x1) + b2
z2.retain_grad()
z2

tensor([4.], grad_fn=<AddBackward0>)

In [11]:
y = torch.sigmoid(z2)
y.retain_grad()
y

tensor([0.9820], grad_fn=<SigmoidBackward>)

In [13]:
J = torch.square(y - 1)
J.retain_grad()
J

tensor([0.0003], grad_fn=<PowBackward0>)

In [14]:
J.backward(retain_graph=True)

# Backward

All the gradient here is $J$ w.r.t the variable you want, $$ \frac{\partial J}{\partial *}$$

In [16]:
y.grad, z2.grad

(tensor([-0.0360]), tensor([-0.0006]))

In [17]:
w2.grad

tensor([-0.0025, -0.0013,  0.0000])

In [18]:
x1.grad

tensor([[-0.0006],
        [ 0.0006],
        [-0.0006]])

In [19]:
z1.grad

tensor([[-0.0006],
        [ 0.0006],
        [ 0.0000]])

In [20]:
w1.grad

tensor([[-0.0006,  0.0006],
        [ 0.0006, -0.0006],
        [ 0.0000, -0.0000]])