In [1]:
import numpy as np
import torch
import torch.nn as nn
from IPython.display import Image

In [2]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2, 2)
        self.s1 = nn.Sigmoid()
        self.fc2 = nn.Linear(2, 2)
        self.s2 = nn.Sigmoid()
        self.fc1.weight = torch.nn.Parameter(torch.Tensor([[0.15, 0.2], [0.250, 0.30]]))
        self.fc1.bias = torch.nn.Parameter(torch.Tensor([0.35]))
        self.fc2.weight = torch.nn.Parameter(torch.Tensor([[0.4, 0.45], [0.5, 0.55]]))
        self.fc2.bias = torch.nn.Parameter(torch.Tensor([0.6]))

    def forward(self, x):
        x = self.fc1(x)
        x = self.s1(x)
        x = self.fc2(x)
        x = self.s2(x)
        return x


net = Net()
print(net)

Net(
  (fc1): Linear(in_features=2, out_features=2, bias=True)
  (s1): Sigmoid()
  (fc2): Linear(in_features=2, out_features=2, bias=True)
  (s2): Sigmoid()
)


In [3]:
# parameters: weight and bias
print(list(net.parameters()))
# input data
weight2 = list(net.parameters())[2]
data = torch.Tensor([0.05, 0.1])

[Parameter containing:
tensor([[0.1500, 0.2000],
        [0.2500, 0.3000]], requires_grad=True), Parameter containing:
tensor([0.3500], requires_grad=True), Parameter containing:
tensor([[0.4000, 0.4500],
        [0.5000, 0.5500]], requires_grad=True), Parameter containing:
tensor([0.6000], requires_grad=True)]


In [4]:
# output of last layer
out = net(data)
target = torch.Tensor([0.01, 0.99])  # a dummy target, for example
criterion = nn.MSELoss()
loss = criterion(out, target);
loss

tensor(0.2984, grad_fn=<MseLossBackward0>)

In [8]:
# A simple hook class that returns the input and output of a layer during forward/backward pass
class Hook:
    def __init__(self, module:nn.Module, backward=False):
        if not backward:
            self.hook = module.register_forward_hook(self.hook_fn)
        else:
            self.hook = module.register_backward_hook(self.hook_fn)

    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output

    def close(self):
        self.hook.remove()

In [10]:
# register hooks on each layer
hookF = [Hook(layer[1]) for layer in list(net._modules.items())]
hookB = [Hook(layer[1], backward=True) for layer in list(net._modules.items())]
# run a data batch
out = net(data)
# backprop once to get the backward hook results
# out.backward(torch.tensor([1, 1], dtype=torch.float), retain_graph=True)
out.backward(retain_graph=True)
#! loss.backward(retain_graph=True)  # doesn't work with backward hooks, 
#! since it's not a network layer but an aggregated result from the outputs of last layer vs target 

# grads = 

print('***' * 3 + '  Forward Hooks Inputs & Outputs  ' + '***' * 3)
for hook in hookF:
    print(hook.input)
    print(hook.output)
    print('---' * 17)
print('\n')
print('***' * 3 + '  Backward Hooks Inputs & Outputs  ' + '***' * 3)
for hook in hookB:
    print(hook.input)
    print(hook.output)
    print('---' * 17)

RuntimeError: grad can be implicitly created only for scalar outputs