In [1]:
import torch
import torch.nn as nn
import numpy as np

import upsampling.operators as L

### Validating that custom layers are equivalent to PyTorch implementations

- Conv2d
- PixelShuffle
- ConvTranspose2d (Deconvolution)

These layers are written for code clarity, not for optimization. Because each layer is written as nested for-loops, its best to use small values for feature map dimensions, upscaling factors, and kernel sizes.

In [2]:
H = W = 4 # height/width - note using square feature maps
C = 3     # number of channels - note that the squeeze() operation below means this check only works for C > 1
K = 3     # kernel_size
N = 1     # batch_size
r = 2     # upscaling factor
P = (K - 1) // 2 # <---- needs to be same-padded to be valid

def p2p_accuracy(source:torch.Tensor, target:torch.Tensor, n_digits:int = 5) -> float:
    distance = L.round(source - target, n_digits).abs()
    correct = (distance == 0).float()
    return correct.mean()

# ------------------------------------------------------------------------------------- #
# Testing the Conv2d Operator
x = torch.randn(N, C, H, W)

m = nn.Conv2d(in_channels=C, out_channels=C, kernel_size=K, bias=False, padding=P)
n = L.Conv2d(in_channels=C, out_channels=C, kernel_size=K, padding=P)
n.weight = m.weight
acc = p2p_accuracy(m(x).squeeze(), n(x.squeeze(0)))
print(f"Conv2            pixel-to-pixel = {acc:.1%}")

# ------------------------------------------------------------------------------------- #
# Testing the PixelShuffle Operator
x = torch.zeros(N, C*(r**2), H, W)

for c in range(C*(r**2)):
    x[:,c,:,:] = c

m = nn.PixelShuffle(r)
n = L.PixelShuffle(r)
acc = p2p_accuracy(m(x).squeeze(), n(x.squeeze(0)))
print(f"PixelShuffle     pixel-to-pixel = {acc:.1%}")

# ------------------------------------------------------------------------------------- #
# Testing the Sub-Pixel Convolution Operator
x = torch.randn(N, C, H, W)
m_layer1 = nn.Conv2d(in_channels=C, out_channels=C*(r**2), kernel_size=K, padding=P, bias=False)
m_layer2 = nn.PixelShuffle(r)
p = m_layer2(m_layer1(x)).squeeze()

n = L.SubPixelConvolution(in_channels=C, out_channels=C*(r**2), kernel_size=K, scaling_factor=r, padding=P)
n.convolution.weight = m_layer1.weight
q = n(x.squeeze(0))
acc = p2p_accuracy(p, q)

print(f"Sub-Pixel Conv   pixel-to-pixel = {acc:.1%}")

x = torch.randn(N, C, H, W)
m = nn.Upsample(scale_factor=r, mode='nearest')
n = nn.Conv2d(in_channels=C, out_channels=C, kernel_size=K, padding=P, stride=1, bias=False)
p = n(m(x))

resize_conv = L.ResizeConvolution(scaling_factor=r, in_channels=C, out_channels=C, kernel_size=K, padding=P, stride=1)
resize_conv.convolution.weight = n.weight
q = resize_conv(x.squeeze(0))
acc = p2p_accuracy(p, q)

print(f"NN Resize Connv  pixel-to-pixel = {acc:.1%}")

# ------------------------------------------------------------------------------------- #
# Testing the Deconvolution Operator
x = torch.randn(N, C, H, W)
m = nn.ConvTranspose2d(in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=P*r, bias=False)
n = L.Deconvolution(in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=P*r)
n.weight = m.weight
acc = p2p_accuracy(m(x).squeeze(), n(x.squeeze(0)))

print(f"Deconvolution    pixel-to-pixel = {acc:.1%}")

Conv2            pixel-to-pixel = 100.0%
PixelShuffle     pixel-to-pixel = 100.0%
Sub-Pixel Conv   pixel-to-pixel = 100.0%
NN Resize Connv  pixel-to-pixel = 100.0%
Deconvolution    pixel-to-pixel = 100.0%


### Showing that a sub-pixel convolution is equivalent to a deconvolution using the weight shuffle algorithm

[Shi *et al.* (2016)- Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network](https://arxiv.org/abs/1609.05158)

[Shi *et al.* (2016) - Is the deconvolution layer the same as a convolutional layer?](https://arxiv.org/abs/1609.07009)

Given that the convolution kernel size is 3, the sub-pixel convolution can be transformed into a deconvolution. This allows a hardware designer to separate training from inference - software from hardware - when accelerating upsampling solutions.

In [3]:
H = W = 2
K = 3
r = 2
C = 1
P = (K - 1) // 2 # <---- needs to be same-padded to be valid

x = torch.randn(N, C, H, W)

# Sub-pixel convolution operation
subpixel_conv = L.SubPixelConvolution(in_channels=C, out_channels=C*(r**2), kernel_size=K, scaling_factor=r, padding=P)

# Deconvolution operation
deconvolution = L.Deconvolution(in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=P*r)
weight_shuff  = L.WeightShuffle(r)

# Shuffle convolution weights to be equivalent to the deconvolution
deconvolution.weight = weight_shuff(subpixel_conv.convolution.weight)

# Run and compare
y_conv   = subpixel_conv(x.squeeze(0))
y_deconv = deconvolution(x.squeeze(0))

acc = p2p_accuracy(y_conv, y_deconv)

print(f"Sub-pixel Convolution versus Deconvolution    pixel-to-pixel = {acc:.1%}")

Sub-pixel Convolution versus Deconvolution    pixel-to-pixel = 100.0%


### Showing that a NN resize convolution is equivalent to a deconvolution using the weight convolution algorithm

[Odena *et al* (2016 - Deconvolution and Checkerboard Artifacts](https://distill.pub/2016/deconv-checkerboard/)

[Aitken *et al.* (2017) - Checkerboard artifact free sub-pixel convolution: A note on sub-pixel convolution, resize convolution and convolution resize](https://arxiv.org/abs/1707.02937)

Given that the resuze convolution uses NN interpolation, the resize convolution can be transformed into a deconvolution. This allows a hardware designer to separate training from inference - software from hardware - when accelerating upsampling solutions.

In [4]:
H = W = 2
K = 3
r = 2
C = 1
P = (K - 1) // 2 # <---- needs to be same-padded to be valid

x = torch.randn(N, C, H, W)

# NN resize convolution operation
resize_conv = L.ResizeConvolution(scaling_factor=r, in_channels=C, out_channels=C, kernel_size=K, padding=P, stride=1)

# Deconvolution operation
deconvolution = L.Deconvolution(in_channels=C, out_channels=C, kernel_size=r + K - 1, stride=r, padding=P)

# Convolved the convolution weights to be equivalent to the deconvolution
deconvolution.weight = L.weight_convolution(resize_conv.convolution.weight,
                                            in_channels=C,
                                            out_channels=C,
                                            kernel_size=K,
                                            scaling_factor=r)

# Run and compare
y_conv   = resize_conv(x.squeeze(0))
y_deconv = deconvolution(x.squeeze(0))

acc = p2p_accuracy(y_conv, y_deconv)

print(f"NN resize convolution versus deconvolution    pixel-to-pixel = {acc:.1%}")

NN resize convolution versus deconvolution    pixel-to-pixel = 100.0%


### Showing that the deconvolution operators give the identical results

[Zhang *et al.* (2017) - A Design Methodology for Efficient Implementation of Deconvolutional Neural Networks on an FPGA](https://arxiv.org/abs/1705.02583)

[Colbert *et al* (2021) - A Competitive Edge: Can FPGAs Beat GPUs at DCNN Inference Acceleration in Resource-Limited Edge Computing Applications?](https://arxiv.org/abs/2102.00294)



In [5]:
# ------------------------------------------------------------------------------------- #
# Testing the Standard Deconvolution algorithm against the Reverse Deconvolution algorithm
x = torch.randn(N, C, H, W)
m = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STDD
)
n = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.REVD
)
n.weight = m.weight = m.weight

acc = p2p_accuracy(n(x.squeeze(0)), m(x.squeeze(0)))

print(f"Pixel-to-pixel accuracy = {acc:.1%}")

Pixel-to-pixel accuracy = 100.0%


In [6]:
# ------------------------------------------------------------------------------------- #
# Testing the Standard Deconvolution algorithm against the Reverse Deconvolution-2 algorithm
x = torch.randn(N, C, H, W)
m = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STDD
)
n = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.REVD2
)
n.weight = m.weight = m.weight

acc = p2p_accuracy(n(x.squeeze(0)), m(x.squeeze(0)))

print(f"Pixel-to-pixel accuracy = {acc:.1%}")

Pixel-to-pixel accuracy = 100.0%


In [7]:
# ------------------------------------------------------------------------------------- #
# Testing the Standard Deconvolution algorithm against the Fractionally Strided Convolution algorithm
x = torch.randn(N, C, H, W)
m = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STDD
)
n = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STRD
)
n.weight = m.weight

acc = p2p_accuracy(n(x.squeeze(0)), m(x.squeeze(0)))

print(f"Pixel-to-pixel accuracy = {acc:.1%}")

Pixel-to-pixel accuracy = 100.0%


In [8]:
# ------------------------------------------------------------------------------------- #
# Testing the Standard Deconvolution algorithm against Transforming Deconvolution to Convolution
x = torch.randn(N, C, H, W)
m = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.STDD
)
n = L.Deconvolution(
    in_channels=C, out_channels=C, kernel_size=K*r, stride=r, padding=r, algorithm=L.DeconvolutionAlgorithms.TDC
)
n.weight = m.weight

acc = p2p_accuracy(n(x.squeeze(0)), m(x.squeeze(0)))

print(f"Pixel-to-pixel accuracy = {acc:.1%}")

Pixel-to-pixel accuracy = 100.0%
