In [None]:
%load_ext autoreload
%autoreload 2

import time
import numpy as np
from torch import nn
import torch
from d2l import torch as d2l

import mytorch
from mytorch import nn as mynn

In [None]:
# problem size
batch_size = 128
in_channels = 3
out_channels = 2
h = 32
w = 31
kernel_size = (5, 5)

# pytorch conv2d
net = nn.Conv2d(in_channels, out_channels, kernel_size, bias=False)
weight = net.weight.data.detach().numpy()

# mytorch conv2d
my_net = mynn.Conv2d(in_channels, out_channels, kernel_size)
my_net.W = weight

X = np.random.randn(batch_size, in_channels, h, w)
Y = np.random.randn(batch_size, out_channels, h - kernel_size[0] + 1, w - kernel_size[1] + 1)
Xt = torch.tensor(X).float()
Yt = torch.tensor(Y).float()

## Compare `forward()`

In [None]:
tic = time.time()
torch_out = net(Xt)
toc = time.time()
torch_time = toc - tic

tic = time.time()
my_out = my_net.forward(X)
toc = time.time()
my_time = toc - tic

print('Difference:', np.linalg.norm(my_out - torch_out.data.numpy()))
print('\nTorch Time:', torch_time, '\nMy Time:', my_time)

## Compare `backward` and gradients

In [None]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.0)
optimizer.zero_grad()
torch_loss_fn = nn.MSELoss()
torch_loss = torch_loss_fn(torch_out, Yt)
torch_loss.backward(retain_graph=True)
torch_dLdW = net.weight.grad.data

my_mse_fn = mynn.MSELoss()
my_mse = my_mse_fn.forward(torch_out.detach().numpy(), Yt.detach().numpy())
dLdO = my_mse_fn.backward()
dLdX = my_net.backward(dLdO)
my_dLdW = my_net.dLdW

print('Difference in dLdW:', np.linalg.norm(my_dLdW - torch_dLdW.data.numpy()))

# differences in dLdX
Xt = torch.tensor(X, requires_grad=True).float()
Xt.retain_grad()
Yt = torch.tensor(Y, requires_grad=True).float()
Yt.retain_grad()
torch_out = net(Xt)
torch_loss = torch_loss_fn(torch_out, Yt)
torch_loss.backward(retain_graph=True)
torch_dLdX = Xt.grad.data
torch_dLdO = Yt.grad.data

print('Difference in dLdX:', np.linalg.norm(torch_dLdX - dLdX))