In [1]:
import torch
import torch.nn as nn
import numpy as np

import upsampling.layers as L

### Validating that custom layers are equivalent to PyTorch implementations

- Conv2d
- PixelShuffle
- ConvTranspose2d (Deconvolution)

These layers are written for code clarity, not for optimization. Because each layer is written as nested for-loops, its best to use small values for feature map dimensions, upscaling factors, and kernel sizes.

In [2]:
H = W = 4 # height/width - note using square feature maps
C = 3     # number of channels - note that the squeeze() operation below means this check only works for C > 1
K = 3     # kernel_size
N = 1     # batch_size
r = 2     # upscaling factor

# ------------------------------------------------------------------------------------- #
# Testing the Conv2d Operator
x = torch.randn(N, C, H, W)

m = nn.Conv2d(in_channels=C, out_channels=C, kernel_size=K, bias=False, padding=1)
n = L.Conv2d(in_channels=C, out_channels=C, kernel_size=K, padding=1)
n.weight = m.weight

print(f"Conv2            MSE = {(m(x).squeeze() - n(x.squeeze())).pow(2).mean().item():.3f}")

# ------------------------------------------------------------------------------------- #
# Testing the PixelShuffle Operator
x = torch.zeros(N, C*(r**2), H, W)

for c in range(C*(r**2)):
    x[:,c,:,:] = c

m = nn.PixelShuffle(r)
n = L.PixelShuffle(r)

print(f"PixelShuffle     MSE = {(m(x).squeeze() - n(x.squeeze())).pow(2).mean().item():.3f}")

# ------------------------------------------------------------------------------------- #
# Testing the Sub-Pixel Convolution Operator
x = torch.randn(N, C, H, W)
m_layer1 = nn.Conv2d(in_channels=C, out_channels=C*(r**2), kernel_size=K, padding=1, bias=False)
m_layer2 = nn.PixelShuffle(r)
p = m_layer2(m_layer1(x)).squeeze()

n_layer1 = L.Conv2d(in_channels=C, out_channels=C*(r**2), kernel_size=K, padding=1)
n_layer2 = L.PixelShuffle(r)
n_layer1.weight = m_layer1.weight
q = n_layer2(n_layer1(x.squeeze()))

print(f"Sub-Pixel Conv   MSE = {(p - q).pow(2).mean().item():.3f}")

# ------------------------------------------------------------------------------------- #
# Testing the Deconvolution Operator
x = torch.randn(N, C, H, W)
m = nn.ConvTranspose2d(in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, bias=False)
n = L.Deconvolution(in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r)
n.weight = m.weight = m.weight

print(f"Deconvolution    MSE = {(m(x).squeeze() - n(x.squeeze())).pow(2).mean().item():.3f}")


Conv2            MSE = 0.000
PixelShuffle     MSE = 0.000
Sub-Pixel Conv   MSE = 0.000
Deconvolution    MSE = 0.000


### Showing that a sub-pixel convolution is equivalent to a deconvolution using the WeightShuffle operator

[Shi *et al.* (2016)- Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network](https://arxiv.org/abs/1609.05158)

[Shi *et al.* (2016) - Is the deconvolution layer the same as a convolutional layer?](https://arxiv.org/abs/1609.07009)

Given that the convolution kernel size is 3, the sub-pixel convolution can be transformed into a deconvolution. This allows a hardware designer to separate training from inference - software from hardware - when accelerating upsampling solutions.

In [3]:
H = W = 2
K = 3
r = 2

x = torch.randn(N, C, H, W)

# Sub-pixel convolution operation
convolution   = L.Conv2d(in_channels=C, out_channels=C*(r**2), kernel_size=K, stride=1, padding=1)
pixel_shuff   = L.PixelShuffle(r)

# Deconvolution operation
deconvolution = L.Deconvolution(in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r)
weight_shuff  = L.WeightShuffle(r)

# Shuffle convolution weights to be equivalent to the deconvolution
deconvolution.weight = weight_shuff(convolution.weight)

# Run and compare
y_conv   = pixel_shuff(convolution(x.squeeze()))
y_deconv = deconvolution(x.squeeze())

print(f"Sub-pixel Convolution versus Deconvolution    MSE = {(y_conv - y_deconv).pow(2).mean().item():.3f}")

Sub-pixel Convolution versus Deconvolution    MSE = 0.000


### Showing that the deconvolution operators give the identical results

[Zhang *et al.* (2017) - A Design Methodology for Efficient Implementation of Deconvolutional Neural Networks on an FPGA](https://arxiv.org/abs/1705.02583)

[Colbert *et al* (2021) - A Competitive Edge: Can FPGAs Beat GPUs at DCNN Inference Acceleration in Resource-Limited Edge Computing Applications?](https://arxiv.org/abs/2102.00294)



In [4]:
# ------------------------------------------------------------------------------------- #
# Testing the Standard Deconvolution operator against the Reverse Deconvolution operator
x = torch.randn(N, C, H, W)
m = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STDD
)
n = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.REVD
)
n.weight = m.weight = m.weight

print(f"Deconvolution    MSE = {(n(x.squeeze()) - m(x.squeeze())).pow(2).mean().item():.3f}")

Deconvolution    MSE = 0.000


In [5]:
# ------------------------------------------------------------------------------------- #
# Testing the Standard Deconvolution operator against the Reverse Deconvolution-2 operator
x = torch.randn(N, C, H, W)
m = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STDD
)
n = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.REVD2
)
n.weight = m.weight = m.weight

print(f"Deconvolution    MSE = {(n(x.squeeze()) - m(x.squeeze())).pow(2).mean().item():.3f}")

Deconvolution    MSE = 0.000


In [6]:
H = W = 3
K = 3
r = 2
C = 1

x = torch.randn(N, C, H, W)
m = nn.Upsample(scale_factor=r, mode='nearest')
n = nn.Conv2d(in_channels=C, out_channels=C, kernel_size=K, padding=1, stride=1, bias=False)

z = torch.zeros(C, C, 2 + r, 2 + r)
for i in range(0, r):
    for j in range(0, r):
        z[:,:,i:i+K,j:j+K] += torch.rot90(n.weight.data, 2, [2,3])

n2 = nn.ConvTranspose2d(in_channels=C, out_channels=C, kernel_size=r+2, stride=r, padding=1, bias=False)
n2.weight.data = z

print(f"MSE = {(n(m(x)) - n2(x)).pow(2).mean().item():.3f}")

MSE = 0.000


In [None]:
H = W = 2
K = 3
r = 3
C = 1

x = torch.zeros(N, C, H, W)
x[0,0,0,1] = 1
n = nn.ConvTranspose2d(in_channels=C, out_channels=C, kernel_size=r+2, stride=r, padding=1, bias=False)
n.weight.data /= n.weight.data
n(x).squeeze()

In [None]:
x.shape

In [None]:
torch.rot90(n.weight.data, 2, [2,3]).squeeze()

In [None]:
x.shape