<a href="https://colab.research.google.com/github/cressidasuphina/ivy/blob/master/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn

In [2]:
class CustomConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size,  weight, bias, stride=1, padding=0):
        super(CustomConv2d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding


        self.weight = weight #torch.randn(out_channels, in_channels, kernel_size, kernel_size)
        self.bias = bias #torch.zeros(out_channels)

    def forward(self, input):
        batch_size, in_channels, in_height, in_width = input.size()
        out_height = (in_height + 2 * self.padding - self.kernel_size) // self.stride + 1
        out_width = (in_width + 2 * self.padding - self.kernel_size) // self.stride + 1

        output = torch.zeros(batch_size, self.out_channels, out_height, out_width)
        # Iterating over each element in the batch, output channel, and spatial dimensions
        for b in range(batch_size):
            for c_out in range(self.out_channels):
                for h_out in range(out_height):
                    for w_out in range(out_width):
                      # Computing the input slice to apply convolution
                        h_start = h_out * self.stride
                        w_start = w_out * self.stride
                        h_end = h_start + self.kernel_size
                        w_end = w_start + self.kernel_size

                        input_slice = input[b, :, h_start:h_end, w_start:w_end]
                        # Computing the output value using convolution and bias

                        output[b, c_out, h_out, w_out] = torch.sum(input_slice * self.weight[c_out]) + self.bias[c_out]

        return output

    def backward(self, input, grad_output):
        batch_size, in_channels, in_height, in_width = input.size()
        out_channels, _, kernel_height, kernel_width = self.weight.size()
        _, _, out_height, out_width = grad_output.size()

        grad_input = torch.zeros_like(input)
        grad_weight = torch.zeros_like(self.weight)
        grad_bias = torch.zeros_like(self.bias)

        for b in range(batch_size):
            for c_out in range(out_channels):
                for h_out in range(out_height):
                    for w_out in range(out_width):
                        # Computing the input slice corresponding to the current gradient
                        h_start = h_out * self.stride
                        h_start = h_out * self.stride
                        w_start = w_out * self.stride
                        h_end = h_start + kernel_height
                        w_end = w_start + kernel_width

                        input_slice = input[b, :, h_start:h_end, w_start:w_end]
                        # Updating gradients using chain rule and derivatives

                        grad_input[b, :, h_start:h_end, w_start:w_end] += self.weight[c_out] * grad_output[b, c_out, h_out, w_out]
                        grad_weight[c_out] += input_slice * grad_output[b, c_out, h_out, w_out]
                        grad_bias[c_out] += grad_output[b, c_out, h_out, w_out]

        return grad_input, grad_weight, grad_bias

In [3]:
batch_size = 1
input_channels = 3
output_channels = 8
input_height = 28
input_width = 28
kernel_size = 3
output_height = input_height - kernel_size + 1
output_width = input_width - kernel_size + 1


grad_output = torch.randn(batch_size, output_channels, output_height, output_width)
input_data = torch.randn(batch_size, input_channels, input_height, input_width)
input_data.requires_grad = True
# PyTorch conv2d operation using the same inputs
pytorch_conv2d = torch.nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size)
pytorch_output = pytorch_conv2d(input_data)

weights = pytorch_conv2d.weight
bias = pytorch_conv2d.bias

# Computing the gradient of the loss with respect to the weights using PyTorch's autograd
pytorch_output.backward(grad_output)
pytorch_grad_weight = pytorch_conv2d.weight.grad
pytorch_grad_input = input_data.grad
pytorch_grad_bias = pytorch_conv2d.bias.grad

In [4]:
custom_conv2d = CustomConv2d(in_channels=1, out_channels=8, kernel_size=3, weight=weights, bias=bias ,stride=1, padding=0)

In [5]:
custom_output = custom_conv2d.forward(input_data)
custom_output[0,0,0]

tensor([-0.1085,  0.1187, -0.8726, -1.0744,  0.5482, -0.4275, -0.1119,  0.1941,
        -0.1385,  0.6839, -0.2433,  0.9042, -0.0550,  0.8143, -0.5339, -0.1639,
        -0.1327, -0.5068, -0.7393, -0.1712, -0.1252,  0.3437,  0.1382, -0.0077,
        -0.1278, -0.2230], grad_fn=<SelectBackward0>)

In [6]:
pytorch_output[0,0,0]

tensor([-0.1085,  0.1187, -0.8726, -1.0744,  0.5482, -0.4275, -0.1119,  0.1941,
        -0.1385,  0.6839, -0.2433,  0.9042, -0.0550,  0.8143, -0.5339, -0.1639,
        -0.1327, -0.5068, -0.7393, -0.1712, -0.1252,  0.3437,  0.1382, -0.0077,
        -0.1278, -0.2230], grad_fn=<SelectBackward0>)

In [13]:
#Pytorch Conv2d forward output and my custom conv2d comparison
mse_weights = ((pytorch_output - custom_output) ** 2).mean().item()
mse_weights

4.831173116859886e-15

In [8]:
# Getting my custom Conv2d grad_input, grad_weight, grad_bias and its comparisons
grad_input, grad_weight, grad_bias = custom_conv2d.backward(input_data, grad_output)

In [9]:
mse_input = ((pytorch_grad_input - grad_input) ** 2).mean().item()
mse_input

2.600986273419443e-14

In [10]:
mse_bias = ((pytorch_grad_bias - grad_bias) ** 2).mean().item()
mse_bias

1.1181100489920937e-10

In [11]:
mse_weights = ((pytorch_grad_weight - grad_weight) ** 2).mean().item()
mse_weights

1.9024425090830022e-10

In [130]:
def conv_transpose2d_basic(input, weight, bias, stride=1, padding=0, output_padding=0, dilation=1):

    batch_size, in_channels, in_height, in_width = input.size()
    _, out_channels, kernel_height, kernel_width = weight.size()


    # Calculating output dimensions
    out_height = (in_height - 1 + 2 * padding) * stride - 2 * padding + dilation * (kernel_height - 1) + output_padding + 1
    out_width = (in_width - 1 + 2 * padding) * stride - 2 * padding + dilation * (kernel_width - 1) + output_padding + 1



    output = torch.zeros(batch_size, out_channels, out_height, out_width)

    # Iterating over each batch and channel
    for b in range(batch_size):
        for c_out in range(out_channels):
            for h_out in range(out_height):
                for w_out in range(out_width):
                    # Computing the receptive field in the input
                    h_start = h_out * stride - padding + dilation * (kernel_height - 1)
                    w_start = w_out * stride - padding + dilation * (kernel_width - 1)
                    h_end = h_start + dilation * (kernel_height - 1) + 1
                    w_end = w_start + dilation * (kernel_width - 1) + 1

                    if h_start < 0 or w_start < 0 or h_end > in_height or w_end > in_width:
                        continue

                    # Computing the sum of products for this output location
                    for c_in in range(in_channels):
                        output[b, c_out, h_out, w_out] += torch.sum(
                            input[b, c_in, h_start:h_end:dilation, w_start:w_end:dilation] * weight[c_in]
                        )

                    # Adding bias if provided
                    if bias is not None:
                        output[b, c_out, h_out, w_out] += bias[c_out]

    return output

# Example usage
batch_size = 1
input_channels = 3
output_channels = 8
kernel_size = 3
input_height = 28
input_width = 28




input_data = torch.randn(batch_size, input_channels, input_height, input_width)

input_data.requires_grad = True

#PyTorch ConvTranspose2d operation
#torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)
pytorch_conv_transpose2d = nn.ConvTranspose2d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size)
pytorch_output = pytorch_conv_transpose2d(input_data)


weights = pytorch_conv_transpose2d.weight
bias = pytorch_conv_transpose2d.bias

# Custom ConvTranspose2d operation
output = conv_transpose2d_basic(input_data, weights, bias)
#My custom ConvTranspose2d forward operation and Pytorch ConvTranspose2d operation comparison
mse = ((output - pytorch_output) ** 2).mean().item()
print("MSE:", mse)

MSE: 0.44191673398017883


In [132]:
def conv_transpose2d_basic_backward(output_grad, input, weight, bias, stride=1, padding=0, output_padding=0, dilation=1):
    batch_size, out_channels, out_height, out_width = output_grad.size()
    in_channels, _, kernel_height, kernel_width = weight.size()

    grad_input = torch.zeros_like(input)
    grad_weight = torch.zeros_like(weight)
    grad_bias = torch.zeros_like(bias) if bias is not None else None

    for b in range(batch_size):
        for c_out in range(out_channels):
            for h_out in range(out_height):
                for w_out in range(out_width):
                    h_start = h_out * stride - padding + dilation * (kernel_height - 1)
                    w_start = w_out * stride - padding + dilation * (kernel_width - 1)
                    h_end = h_start + dilation * (kernel_height - 1) + 1
                    w_end = w_start + dilation * (kernel_width - 1) + 1
                    #checking if the computed receptive field is within valid bounds to prevent indexing errors, if the condition is not met, the continue statement skips thatt iteration of the loop
                    if h_start < 0 or w_start < 0 or h_end > input.size(2) or w_end > input.size(3):
                        continue
                    #Calculating gradients for input, weight, and bias
                    for c_in in range(in_channels):
                        grad_input[b, c_in, h_start:h_end:dilation, w_start:w_end:dilation] += (
                            output_grad[b, c_out, h_out, w_out] * weight[c_in, c_out]
                        )
                        grad_weight[c_in, c_out] += (
                            output_grad[b, c_out, h_out, w_out]
                            * input[b, c_in, h_start:h_end:dilation, w_start:w_end:dilation]
                        )

                    if grad_bias is not None:
                        grad_bias[c_out] += output_grad[b, c_out, h_out, w_out]

    return grad_input, grad_weight, grad_bias

output_height = 30 #input_height - kernel_size + 1
output_width = 30 #input_width - kernel_size + 1

grad_output = torch.randn(batch_size, output_channels, output_height, output_width)

# Compute the gradient of the loss with respect to the weights using PyTorch's autograd
pytorch_output.backward(grad_output)
pytorch_grad_weight = pytorch_conv_transpose2d.weight.grad
pytorch_grad_input = input_data.grad
pytorch_grad_bias = pytorch_conv_transpose2d.bias.grad


grad_input, grad_weight, grad_bias = conv_transpose2d_basic_backward(grad_output, input_data, weights, bias )

# Calculating mse between PyTorch gradients and custom gradients
mse_input = ((pytorch_grad_input - grad_input) ** 2).mean().item()
mse_input

0.5641183257102966