In [1]:
import numpy as np
import torch

from nn import NeuralNetwork, LinearLayer, ActivationFunction, LossFunction

## Pytorch

In [2]:
tnet = torch.nn.Sequential(
    torch.nn.Linear(8, 16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 2),
)
tloss_fn = torch.nn.MSELoss()

In [3]:
tdata = torch.rand(10, 8, requires_grad=True)
tlabel = torch.rand(10, 2)
tpred = tnet(tdata)
tloss = tloss_fn(tpred, tlabel)

In [4]:
tloss.backward()
tgrad = tdata.grad

## Our Implementation

In [5]:
net = NeuralNetwork(
    [
        LinearLayer((8, 16)),
        ActivationFunction(),
        LinearLayer((16, 16)),
        ActivationFunction(),
        LinearLayer((16, 2)),
    ]
)
loss_fn = LossFunction()

We force the same initial weights for both networks

In [6]:
for (layer, tlayer) in zip(net.net, tnet):
    if isinstance(layer, LinearLayer):
        layer._params.weights[:] = tlayer.weight.detach().numpy()
        layer._params.bias[:] = tlayer.bias.detach().numpy()

In [7]:
data = tdata.detach().numpy()
label = tlabel.numpy()
pred = net.foward(data)
loss = loss_fn.foward(pred, label)

In [8]:
grad = net.backward(loss_fn.backward())

## Comparison

In [9]:
# check for correct gradient computation by comparing with pytorch
np.allclose(grad, tgrad.numpy()), np.linalg.norm(grad - tgrad.numpy())

(True, 2.5736320173170234e-09)