# Why is a convolution layer on a 1x1 input is identical to a linear layer.

code example an explanation below

In [1]:
import torch
import torch.nn as nn

# Set a random seed for reproducibility
torch.manual_seed(42)

# Create a 1x1 input (batch_size=1, channels=3, height=1, width=1)
input_tensor = torch.randn(1, 3, 1, 1) 
input_tensor

tensor([[[[0.3367]],

         [[0.1288]],

         [[0.2345]]]])

## Convolution Layer
Now, let's create a 1x1 convolution layer:

In [2]:
# Create a 1x1 convolution layer (in_channels=3, out_channels=2)
conv_layer = nn.Conv2d(in_channels=3, out_channels=2, kernel_size=1)

# Apply the convolution
conv_output = conv_layer(input_tensor)

print("Convolution output shape:", conv_output.shape)
print("Convolution output:", conv_output)

Convolution output shape: torch.Size([1, 2, 1, 1])
Convolution output: tensor([[[[0.5129]],

         [[0.0281]]]], grad_fn=<ConvolutionBackward0>)


## Linear Layer
Next, we'll create an equivalent linear layer:

In [3]:
conv_layer.weight.shape

torch.Size([2, 3, 1, 1])

In [4]:
# Create an equivalent linear layer
linear_layer = nn.Linear(in_features=3, out_features=2)

# Ensure the linear layer has the same weights and bias as the conv layer
# note this is the kernel o
with torch.no_grad():
    linear_layer.weight.copy_(conv_layer.weight.view(2, 3))
    linear_layer.bias.copy_(conv_layer.bias)

# Reshape the input tensor for the linear layer
linear_input = input_tensor.view(1, 3)

# Apply the linear transformation
linear_output = linear_layer(linear_input)

print("Linear output shape:", linear_output.shape)
print("Linear output:", linear_output)


Linear output shape: torch.Size([1, 2])
Linear output: tensor([[0.5129, 0.0281]], grad_fn=<AddmmBackward0>)


In [5]:
linear_input @ linear_layer.weight.T + linear_layer.bias

tensor([[0.5129, 0.0281]], grad_fn=<AddBackward0>)

In [6]:
# Compare the outputs
print("Are the outputs equal?", torch.allclose(conv_output.squeeze(), linear_output))


Are the outputs equal? True


This code demonstrates that:

- The 1x1 convolution operates on a 4D tensor (batch, channels, height, width), while the linear layer operates on a 2D tensor (batch, features).
- When we reshape the input and use the same weights and biases, the outputs are identical.
- The only difference is in the shape of the output: the convolution preserves the 4D structure, while the linear layer produces a 2D output.

This equivalence is why 1x1 convolutions are often used in neural network architectures for dimensionality reduction or feature mixing across channels, effectively acting as learned linear transformations applied to each spatial location independently.

In [7]:
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# 1. Create input: shape (batch_size, channels, height, width)
#    For 1x1 convolution, height and width are 1
input_data = np.random.rand(1, 3, 1, 1)

# 2. Create weights: shape (output_channels, input_channels, kernel_height, kernel_width)
#    For 1x1 convolution, kernel_height and kernel_width are 1
weights = np.random.rand(2, 3, 1, 1)

# 3. Create bias: shape (output_channels,)
bias = np.random.rand(2)

# 4. Perform 1x1 convolution
def conv_1x1(input_data, weights, bias):
    # Reshape input: (1, 3, 1, 1) -> (3,)
    input_flat = input_data.reshape(-1)
    
    # Reshape weights: (2, 3, 1, 1) -> (2, 3)
    weights_flat = weights.reshape(weights.shape[0], -1)
    
    # Perform dot product
    output = np.dot(weights_flat, input_flat) + bias
    
    # Reshape output to (1, 2, 1, 1)
    return output.reshape(1, -1, 1, 1)

In [8]:
input_data.reshape(-1)

array([0.37454012, 0.95071431, 0.73199394])

In [9]:
# 5. Compute the output
output = conv_1x1(input_data, weights, bias)

# print("Input shape:", input_data.shape)
# print("Input data:\n", input_data)
# print("\nWeights shape:", weights.shape)
# print("Weights:\n", weights)
# print("\nBias:", bias)
# print("\nOutput shape:", output.shape)
# print("Output:\n", output)

# 6. Verify with manual calculation
manual_output = np.zeros((1, 2, 1, 1))
for i in range(2):  # For each output channel
    manual_output[0, i, 0, 0] = np.sum(input_data[0, :, 0, 0] * weights[i, :, 0, 0]) + bias[i]

print("\nManual calculation output:\n", manual_output)
print("Are they equal?", np.allclose(output, manual_output))


Manual calculation output:
 [[[[1.19481039]]

  [[1.30583774]]]]
Are they equal? True


In [10]:
manual_output

array([[[[1.19481039]],

        [[1.30583774]]]])

In [11]:
input_data.flatten() 

array([0.37454012, 0.95071431, 0.73199394])

In [12]:
weights.shape

(2, 3, 1, 1)

In [13]:

# Set random seed for reproducibility
torch.manual_seed(42)

# Create a convolutional layer
conv_layer = nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, padding=1)

# Print the shape of the weights
print("Shape of conv_layer.weight:", conv_layer.weight.shape)

# Access the kernels/weights
kernels = conv_layer.weight

print("\nKernels (weights) of the convolutional layer:")
for i, kernel in enumerate(kernels):
    print(f"Kernel for output channel {i}:")
    print(kernel)
    print()

# Demonstrate that kernels are indeed the weights
print("Are kernels the same as weights?", torch.equal(kernels, conv_layer.weight))


Shape of conv_layer.weight: torch.Size([2, 3, 3, 3])

Kernels (weights) of the convolutional layer:
Kernel for output channel 0:
tensor([[[ 0.1471,  0.1597, -0.0451],
         [ 0.1768, -0.0422,  0.0388],
         [-0.0937,  0.1130,  0.1697]],

        [[-0.1412,  0.1673,  0.0360],
         [ 0.1422,  0.0261,  0.0928],
         [-0.0272,  0.1484,  0.0284]],

        [[-0.0898,  0.0491, -0.0887],
         [-0.0226, -0.0782,  0.1277],
         [-0.1519, -0.0887, -0.0543]]], grad_fn=<UnbindBackward0>)

Kernel for output channel 1:
tensor([[[-0.1157,  0.0182, -0.1901],
         [ 0.1738, -0.1635,  0.1486],
         [ 0.0320, -0.0625,  0.1189]],

        [[ 0.0300,  0.1555,  0.0210],
         [-0.0607,  0.0517, -0.0522],
         [ 0.0810,  0.1718,  0.1112]],

        [[-0.0841,  0.1111,  0.0344],
         [ 0.0977, -0.1173, -0.1905],
         [-0.0744, -0.1476,  0.1579]]], grad_fn=<UnbindBackward0>)

Are kernels the same as weights? True


## What are Pointwise (1x1) convolutional layers?

Pointwise convolutions, also known as 1x1 convolutions, are a special type of convolutional layer that operates on each pixel independently across all channels. They are widely used in deep learning architectures for various purposes, including dimensionality reduction, feature combination, and computational efficiency. Let's explore pointwise convolutions and their applications using PyTorch examples.

### Pointwise Convolutions Explained

A pointwise convolution uses a 1x1 kernel that iterates through every single point in the input tensor. It effectively performs a linear combination of the input channels at each spatial location, allowing for cross-channel information mixing without affecting the spatial dimensions.

In [14]:
# Input tensor: (batch_size, in_channels, height, width)
input_tensor = torch.randn(1, 64, 32, 32)

# Pointwise convolution layer
pointwise_conv = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1)

# Apply pointwise convolution
output = pointwise_conv(input_tensor)

print(f"Input shape: {input_tensor.shape}")
print(f"Output shape: {output.shape}")


Input shape: torch.Size([1, 64, 32, 32])
Output shape: torch.Size([1, 128, 32, 32])


In this example, we create a pointwise convolution layer that transforms a tensor with 64 input channels into 128 output channels, maintaining the spatial dimensions (32x32)

### Applications of Pointwise Convolutions
Pointwise convolutions are used in various ways by practitioners in deep learning:

    1. Dimensionality Reduction

Pointwise convolutions can efficiently reduce the number of channels in a feature map, decreasing computational complexity:

In [17]:
# Dimensionality reduction

dim_reduction = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=1)
reduced_features = dim_reduction(torch.randn(1, 256, 32, 32))
print(f"Reduced features shape: {reduced_features.shape}")


Reduced features shape: torch.Size([1, 64, 32, 32])


2. Network-in-Network Architecture

Pointwise convolutions are key components in Network-in-Network architectures, adding non-linearity between layers:

In [18]:
class NetworkInNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.pointwise1 = nn.Conv2d(64, 64, kernel_size=1)
        self.pointwise2 = nn.Conv2d(64, 64, kernel_size=1)
        
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.pointwise1(x))
        x = torch.relu(self.pointwise2(x))
        return x

nin_model = NetworkInNetwork()
output = nin_model(torch.randn(1, 3, 32, 32))
print(f"NIN output shape: {output.shape}")


NIN output shape: torch.Size([1, 64, 32, 32])


Pointwise convolutions are versatile and computationally efficient, making them a popular choice in modern deep learning architectures. They allow for flexible channel manipulation without affecting spatial dimensions, enabling the creation of more compact and efficient models