In [1]:
import numpy as np

# A simple 5x5 single-channel image
input_array = np.array([
    [0, 0, 0, 10, 10],
    [0, 0, 0, 10, 10],
    [0, 0, 0, 10, 10],
    [0, 0, 0, 10, 10],
    [0, 0, 0, 10, 10]
])

# A 3x3 kernel for vertical edge detection
kernel = np.array([
    [1, 0, -1],
    [1, 0, -1],
    [1, 0, -1]
])

To keep the output size the same as the input when using a 3x3 kernel, our first step is to add a 1-pixel border of padding


In [2]:
padded_array = np.pad(input_array, pad_width=1, mode='constant', constant_values=0)

print("Padded Array:\n", padded_array)

Padded Array:
 [[ 0  0  0  0  0  0  0]
 [ 0  0  0  0 10 10  0]
 [ 0  0  0  0 10 10  0]
 [ 0  0  0  0 10 10  0]
 [ 0  0  0  0 10 10  0]
 [ 0  0  0  0 10 10  0]
 [ 0  0  0  0  0  0  0]]


In [15]:
# Get the top-left 3x3 patch
patch = padded_array[0:3, 0:3]

# Perform the operation
output_pixel = np.sum(patch * kernel)

print("First Patch:\n", patch)
print("\nKernel:\n", kernel)
print("\nFirst Output Pixel Value:", output_pixel)

First Patch:
 [[0 0 0]
 [0 0 0]
 [0 0 0]]

Kernel:
 [[ 1  0 -1]
 [ 1  0 -1]
 [ 1  0 -1]]

First Output Pixel Value: 0


In [16]:
output_array = np.zeros_like(input_array, dtype=np.float32)

for y in range(input_array.shape[1]):
    for x in range(input_array.shape[0]):
        current_patch = padded_array[y : y + 3 , x : x + 3]
        
        output_array[y,x] = np.sum(current_patch * kernel)
        
print("Output Array:\n", output_array)
        

Output Array:
 [[  0.   0. -20. -20.  20.]
 [  0.   0. -30. -30.  30.]
 [  0.   0. -30. -30.  30.]
 [  0.   0. -30. -30.  30.]
 [  0.   0. -20. -20.  20.]]


# Making our custom Conv2d class

In [17]:
class Conv2d:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        
        kernel_shape = (out_channels, in_channels, kernel_size, kernel_size)
        self.kernel = np.random.randn(*kernel_shape) * 0.01
        self.bias = np.zeros(out_channels)

In [20]:
import numpy as np

# We'll use our PlainConv2d class from before
class PlainConv2d:
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        kernel_shape = (out_channels, in_channels, kernel_size, kernel_size)
        self.kernel = np.random.randn(*kernel_shape) * 0.01
        self.bias = np.zeros(out_channels)
    # (It also has a forward method we wrote)

# Now, our network class
class SimpleNet:
    def __init__(self):
        self.conv1 = PlainConv2d(1, 8, 3)
        self.conv2 = PlainConv2d(8, 16, 3)

    def parameters(self):
        params = []
        # We loop through all attributes of this class
        for attr in vars(self).values():
            # If an attribute is a layer, get its parameters
            if isinstance(attr, PlainConv2d):
                params.append(attr.kernel)
                params.append(attr.bias)
        return params

# Let's test it
model = SimpleNet()
param_list = model.parameters()
print(f"We manually found {len(param_list)} parameter arrays.")

We manually found 4 parameter arrays.


In [21]:
import numpy as np

class PlainConv2d:
    """The basic layer with its own parameters."""
    def __init__(self, in_c, out_c, k_size):
        kernel_shape = (out_c, in_c, k_size, k_size)
        self.kernel = np.random.randn(*kernel_shape) * 0.01
        self.bias = np.zeros(out_c)

    def parameters(self):
        # A layer just returns its own parameters in a list.
        return [self.kernel, self.bias]

class ResidualBlock:
    """A module that contains other modules (layers)."""
    def __init__(self):
        self.conv1 = PlainConv2d(8, 8, 3)
        self.conv2 = PlainConv2d(8, 8, 3)

    def parameters(self):
        params = []
        # This is the key: it calls .parameters() on its children.
        params.extend(self.conv1.parameters())
        params.extend(self.conv2.parameters())
        return params

class DeeperNet:
    """The top-level model, which contains other modules."""
    def __init__(self):
        self.block1 = ResidualBlock()
        self.block2 = ResidualBlock()

    def parameters(self):
        params = []
        # It also calls .parameters() on its children.
        params.extend(self.block1.parameters())
        params.extend(self.block2.parameters())
        return params

# --- Let's test it ---
deep_model = DeeperNet()
final_params = deep_model.parameters()
print(f"The recursive method found {len(final_params)} parameter arrays.")
# Output should be 8: (2 params/conv) * (2 convs/block) * (2 blocks)

The recursive method found 8 parameter arrays.


In [22]:
import numpy as np

# --- We'll use the same building blocks from before ---

class PlainConv2d:
    """The basic layer with its own parameters."""
    def __init__(self, in_c, out_c, k_size):
        kernel_shape = (out_c, in_c, k_size, k_size)
        self.kernel = np.random.randn(*kernel_shape) * 0.01
        self.bias = np.zeros(out_c)

    def parameters(self):
        return [self.kernel, self.bias]

class ResidualBlock:
    """A module that contains other modules (layers)."""
    def __init__(self):
        self.conv1 = PlainConv2d(8, 8, 3)
        self.conv2 = PlainConv2d(8, 8, 3)

    def parameters(self):
        params = []
        params.extend(self.conv1.parameters())
        params.extend(self.conv2.parameters())
        return params

# --- Here is our new, deeper network ---

class VeryDeepNet:
    """A deeper model containing a list of sub-modules."""
    def __init__(self):
        # We create a list of 10 residual blocks
        self.blocks = [ResidualBlock() for _ in range(10)]

    def parameters(self):
        params = []
        # We loop through our list of blocks
        for block in self.blocks:
            # And recursively get the parameters from each one
            params.extend(block.parameters())
        return params

# --- Let's test it ---
deep_model = VeryDeepNet()
final_params = deep_model.parameters()
print(f"The recursive method found {len(final_params)} parameter arrays in the 10-block network.")

The recursive method found 40 parameter arrays in the 10-block network.


In [23]:
import numpy as np
import torch
import torch.nn as nn
import time

class NumpyConv2d:
    """A Conv2d layer built from scratch using only NumPy."""
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        # Store hyperparameters
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        # Initialize learnable parameters (weights and bias)
        kernel_shape = (out_channels, in_channels, kernel_size, kernel_size)
        self.kernel = np.random.randn(*kernel_shape) * 0.01
        self.bias = np.random.randn(out_channels) * 0.01

    def forward(self, x):
        """Performs the forward pass of the convolution."""
        # Get input dimensions
        batch_size, in_channels, in_h, in_w = x.shape

        # Pad the input
        padded_x = np.pad(x, ((0,0), (0,0), (self.padding, self.padding), (self.padding, self.padding)))

        # Calculate output dimensions
        out_h = (in_h + 2 * self.padding - self.kernel_size) // self.stride + 1
        out_w = (in_w + 2 * self.padding - self.kernel_size) // self.stride + 1

        # Create empty output tensor
        output = np.zeros((batch_size, self.out_channels, out_h, out_w))

        # --- The Main Convolution Loop ---
        for i in range(batch_size):           # Loop over each image in the batch
            for c_out in range(self.out_channels): # Loop over each output channel (filter)
                for y in range(out_h):         # Loop over the vertical dimension
                    for x in range(out_w):     # Loop over the horizontal dimension
                        # Find the corners of the current slice
                        y_start = y * self.stride
                        y_end = y_start + self.kernel_size
                        x_start = x * self.stride
                        x_end = x_start + self.kernel_size

                        # Get the 3D patch from the padded input
                        patch = padded_x[i, :, y_start:y_end, x_start:x_end]
                        
                        # Get the 3D kernel for this output channel
                        kernel_slice = self.kernel[c_out, :, :, :]

                        # Perform the element-wise multiplication and sum, then add bias
                        output[i, c_out, y, x] = np.sum(patch * kernel_slice) + self.bias[c_out]

        return output

In [26]:
# --- Setup ---
batch_size, in_c, h, w = 11, 7, 30, 30
out_c, k, s, p = 17, 7, 3, 2

# Create random input data
numpy_input = np.random.randn(batch_size, in_c, h, w)
torch_input = torch.from_numpy(numpy_input).float()

# 1. Our NumPy Layer
numpy_layer = NumpyConv2d(in_c, out_c, k, s, p)
numpy_output = numpy_layer.forward(numpy_input)

# 2. The Real PyTorch Layer
torch_layer = nn.Conv2d(in_c, out_c, k, s, p)
# IMPORTANT: We copy our NumPy weights into the PyTorch layer
torch_layer.weight.data = torch.from_numpy(numpy_layer.kernel).float()
torch_layer.bias.data = torch.from_numpy(numpy_layer.bias).float()
torch_output = torch_layer(torch_input)

# --- Verification ---
# Convert torch output to numpy to compare
torch_output_np = torch_output.detach().numpy()
# Check if the outputs are almost identical (to handle tiny floating point differences)
if np.allclose(numpy_output, torch_output_np):
    print("✅ Success! The outputs are identical.")
else:
    print("❌ Failure! The outputs do not match.")

# --- Performance Test ---
print("\n--- Speed Comparison ---")
# Time our NumPy implementation
start_time = time.time()
numpy_layer.forward(numpy_input)
numpy_time = time.time() - start_time
print(f"Our NumPy Conv: {numpy_time:.4f} seconds")

# Time the PyTorch implementation
start_time = time.time()
torch_layer(torch_input)
torch_time = time.time() - start_time
print(f"PyTorch nn.Conv2d: {torch_time:.6f} seconds")

# Calculate how much faster PyTorch is
if torch_time > 0:
    print(f"\nPyTorch is roughly {int(numpy_time / torch_time)}x faster!")

❌ Failure! The outputs do not match.

--- Speed Comparison ---
Our NumPy Conv: 0.0670 seconds
PyTorch nn.Conv2d: 0.001159 seconds

PyTorch is roughly 57x faster!
