In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def conv2d_to_matmul(input, conv_layer):
    # Extract Conv2D parameters
    in_channels = conv_layer.in_channels
    out_channels = conv_layer.out_channels
    kernel_size = conv_layer.kernel_size
    stride = conv_layer.stride
    padding = conv_layer.padding
    dilation = conv_layer.dilation

    # Unfold (im2col) the input tensor
    input_unf = F.unfold(input, kernel_size=kernel_size, dilation=dilation, padding=padding, stride=stride)

    # Reshape the weight tensor of the conv layer
    weight = conv_layer.weight.view(out_channels, -1)

    # Perform matrix multiplication
    output_unf = weight @ input_unf

    # Reshape the output to the correct dimensions
    output_height = (input.size(2) + 2*padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) // stride[0] + 1
    output_width = (input.size(3) + 2*padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[1] + 1
    output = output_unf.view(1, out_channels, output_height, output_width)

    return output

# Example usage
input = torch.randn(1, 3, 5, 5)  # Batch size 1, 3 channels, 5x5 image
conv_layer = nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1)

# Get the output using the conv2d_to_matmul function
output = conv2d_to_matmul(input, conv_layer)

# Verify against the standard convolution
conv_output = conv_layer(input)

print("Output from matrix multiplication:")
print(output)

print("Output from standard convolution:")
print(conv_output)

# Verify that the outputs are the same
print("Difference between outputs:", torch.abs(output - conv_output).max())


Output from matrix multiplication:
tensor([[[[-0.2798, -0.0576,  0.4760, -0.0022,  0.5965],
          [-0.3831, -0.6861,  0.1310, -0.2115,  0.6901],
          [ 0.1581, -0.3168, -0.4913, -0.9078, -0.0365],
          [-0.1768, -0.2058, -1.0859, -0.3986, -0.3868],
          [-0.3603, -0.2312, -1.2773, -0.3161,  0.3108]],

         [[ 0.1639,  0.0842,  0.9165, -0.2983, -0.2242],
          [-0.0229, -0.0566, -0.1032, -0.2204, -0.1244],
          [ 0.2391,  0.2545,  0.6743, -0.2300, -0.2058],
          [-0.1515,  0.7881,  0.5873, -0.8179, -0.1022],
          [ 0.1383, -0.2363, -0.0521, -0.9489, -0.5987]]]],
       grad_fn=<ViewBackward0>)
Output from standard convolution:
tensor([[[[-0.4578, -0.2355,  0.2980, -0.1801,  0.4185],
          [-0.5611, -0.8640, -0.0470, -0.3895,  0.5121],
          [-0.0199, -0.4947, -0.6693, -1.0858, -0.2144],
          [-0.3547, -0.3838, -1.2639, -0.5765, -0.5648],
          [-0.5383, -0.4092, -1.4553, -0.4941,  0.1328]],

         [[ 0.2126,  0.1329,  0.9652,

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def conv2d_to_matmul(input, conv_layer):
    # Extract Conv2D parameters
    in_channels = conv_layer.in_channels
    out_channels = conv_layer.out_channels
    kernel_size = conv_layer.kernel_size
    stride = conv_layer.stride
    padding = conv_layer.padding
    dilation = conv_layer.dilation

    # Unfold (im2col) the input tensor
    input_unf = F.unfold(input, kernel_size=kernel_size, dilation=dilation, padding=padding, stride=stride)

    # Reshape the weight tensor of the conv layer
    weight = conv_layer.weight.view(out_channels, -1)

    # Perform matrix multiplication
    output_unf = weight @ input_unf

    # Add the bias
    if conv_layer.bias is not None:
        output_unf += conv_layer.bias.unsqueeze(1)

    # Reshape the output to the correct dimensions
    output_height = (input.size(2) + 2*padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) // stride[0] + 1
    output_width = (input.size(3) + 2*padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[1] + 1
    output = output_unf.view(1, out_channels, output_height, output_width)

    return output

# Example usage
input = torch.randn(1, 3, 5, 5)  # Batch size 1, 3 channels, 5x5 image
conv_layer = nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1)

# Get the output using the conv2d_to_matmul function
output = conv2d_to_matmul(input, conv_layer)

# Verify against the standard convolution
conv_output = conv_layer(input)

print("Output from matrix multiplication:")
print(output)

print("Output from standard convolution:")
print(conv_output)

# Verify that the outputs are the same
print("Difference between outputs:", torch.abs(output - conv_output).max())


Output from matrix multiplication:
tensor([[[[ 0.0647, -0.8771,  0.1172, -0.5919, -0.2884],
          [-0.3678,  0.0288, -0.7642,  0.3269,  0.8414],
          [-0.6333,  0.6534, -0.2398, -0.1189,  0.4273],
          [ 0.0820, -0.0814,  0.0694, -0.1249, -0.5725],
          [ 0.0334, -0.4556, -0.1662,  0.8488, -0.5382]],

         [[ 0.1186,  0.5653, -0.8192,  0.3583,  0.0349],
          [ 0.4989,  0.4936,  1.6190,  0.3322,  0.5158],
          [ 0.5491, -0.8562,  0.6179, -0.2699, -0.4139],
          [-0.1246,  0.0294,  0.9565, -0.3862,  0.1130],
          [ 0.3713,  0.5061, -0.0591,  0.4057,  0.5807]]]],
       grad_fn=<ViewBackward0>)
Output from standard convolution:
tensor([[[[ 0.0647, -0.8771,  0.1172, -0.5919, -0.2884],
          [-0.3678,  0.0288, -0.7642,  0.3269,  0.8414],
          [-0.6333,  0.6534, -0.2398, -0.1189,  0.4273],
          [ 0.0820, -0.0814,  0.0694, -0.1249, -0.5725],
          [ 0.0334, -0.4556, -0.1662,  0.8488, -0.5382]],

         [[ 0.1186,  0.5653, -0.8192,

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def conv2d_to_matmul(input, conv_layer):


    out_channels = conv_layer.out_channels
    kernel_size = conv_layer.kernel_size
    stride = conv_layer.stride
    padding = conv_layer.padding
    dilation = conv_layer.dilation

 
    input_unf = F.unfold(input, kernel_size=kernel_size, dilation=dilation, padding=padding, stride=stride)

    weight = conv_layer.weight.view(out_channels, -1)


    output_unf = weight @ input_unf

    if conv_layer.bias is not None:
        output_unf += conv_layer.bias.unsqueeze(1)

    output_height = (input.size(2) + 2*padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) // stride[0] + 1
    output_width = (input.size(3) + 2*padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) // stride[1] + 1
    output = output_unf.view(1, out_channels, output_height, output_width)

    return output


with torch.no_grad():
    input = torch.randn(1, 3, 55, 55)  
    conv_layer = nn.Conv2d(in_channels=3, out_channels=20, kernel_size=3, stride=1, padding=1)

    output = conv2d_to_matmul(input, conv_layer)


    conv_output = conv_layer(input)

print("Output from matrix multiplication:")
print(output)

print("Output from standard convolution:")
print(conv_output)


print("Difference between outputs:", torch.abs(output - conv_output).max())

Output from matrix multiplication:
tensor([[[[-4.0380e-01, -3.1406e-01, -4.7838e-01,  ..., -1.5355e-01,
            4.0880e-01,  2.2182e-01],
          [ 4.4625e-01,  5.9604e-01, -4.1285e-01,  ...,  1.6947e-01,
           -6.5281e-03, -8.1054e-01],
          [ 3.7832e-01, -1.7156e-01, -4.7183e-02,  ...,  4.3828e-01,
           -2.5419e-01,  1.3473e+00],
          ...,
          [-5.4286e-01,  1.8729e-03,  1.5082e+00,  ..., -4.5478e-01,
           -4.7381e-01,  3.9000e-01],
          [ 4.4608e-01, -9.0825e-01, -7.6615e-01,  ..., -3.3419e-01,
           -7.8931e-01,  7.2388e-02],
          [-3.6955e-01,  3.7082e-01,  2.9245e-01,  ...,  6.5912e-01,
            2.9895e-01,  2.3225e-01]],

         [[ 4.0400e-01, -1.4922e-01,  4.4431e-01,  ...,  4.1689e-01,
            2.9981e-01, -7.6605e-01],
          [ 8.3809e-01,  5.0024e-01,  1.0052e+00,  ...,  2.5175e-02,
           -6.3276e-02,  5.1493e-01],
          [-3.4004e-01,  1.3410e+00,  5.6880e-01,  ...,  1.9813e-01,
            4.7307e-01,