# CNNs from Scratch

- Code based on [GitHub: TheIndependentCode/Neural-Network](https://github.com/TheIndependentCode/Neural-Network)
    - Also see [YouTube (TheIndependentCode): Neural Networks](https://youtube.com/playlist?list=PLQ4osgQ7WN6PGnvt6tzLAVAEMsL3LBqpm&si=nYp4J9dIP13Bcaxs)
        - Accompanies GitHub repo

- Following [YouTube (AI with Frank): \[Tutorial\] Convolutional layers implementations under the hood](https://youtu.be/-Y4ST8eNySI?si=1L7VuFRZPsFxkPnt)

### Setup

In [77]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Hyperparams
# N, C_in, H_in, W_in = (2, 3, 4, 4)
N, C_in, H_in, W_in = (1, 3, 4, 4)
K = 2
S = 1
C_out = 1
seed = 1337

# Random seed
np.random.seed(seed)
torch.random.manual_seed(seed)

x_np = np.random.rand(N, C_in, H_in, W_in).astype(np.float32)
x_pt = torch.tensor(x_np)

# Alternative: pytorch init
# x_pt = torch.tensor((N, C_in, H_in, W_in), dtype=torch.float32)
# x_np = x_pt.numpy()

# Let's cheat for comparison sake: create our nn layer and steal weights from that
conv = nn.Conv2d(in_channels=C_in, out_channels=C_out, kernel_size=K, stride=S, bias=True)
weight_pt = conv.weight.detach().clone()
bias_pt = conv.bias.detach().clone()

weight_np = weight_pt.numpy()
bias_np = bias_pt.numpy()

print(f"x.shape: (N, C_in, H_in, W_in) = {tuple(x_pt.shape)}")
print(f"weight.shape: (C_out, C_in, K, K) = {tuple(conv.weight.shape)}")
print(f"bias.shape: (C_out,) = {tuple(conv.bias.shape)}")

# Calculate output shapes (used below)
# H_out = floor(H-K/s) + 1   (same for W_out)
# e.g. 4x4, K=2, S=1 -> floor((4-2)/1) + 1 -> 3x3
# e.g. 4x4, K=3, S=1 -> floor((4-3)/1) + 1 -> 2x2
# e.g. 4x4, K=2, S=3 -> floor((4-2)/3) + 1 -> 1x1
H_out = math.floor((H_in - K) / S) + 1
W_out = math.floor((W_in - K) / S) + 1
print(f"Calculated (H_out, W_out) = {H_out, W_out}")

x.shape: (N, C_in, H_in, W_in) = (1, 3, 4, 4)
weight.shape: (C_out, C_in, K, K) = (1, 3, 2, 2)
bias.shape: (C_out,) = (1,)
Calculated (H_out, W_out) = (3, 3)


In [None]:
# PyTorch module
with torch.no_grad():
    out_pt = conv(x_pt)
    print(f"out.shape: (N, C_out, H_out, W_out) = {tuple(out_pt.shape)}")
    assert out_pt.shape == (N, C_out, H_out, W_out)

out.shape: (N, C_out, H_out, W_out) = (1, 1, 3, 3)


In [None]:
# PyTorch functional
with torch.no_grad():
    out_pt_func = F.conv2d(input=x_pt, weight=weight_pt, bias=bias_pt, stride=S)
    print(f"All equal: {torch.all(out_pt == out_pt_func)}")

Allclose: True


In [101]:
# Naive loop
#   x.shape: (N, C_in, H_in, W_in) = (1, 3, 4, 4)
#   weight.shape: (C_out, C_in, K, K) = (6, 3, 2, 2)
#   bias.shape: (C_out,) = (6,)
#   out.shape: (N, C_out, H_out, W_out) = (1, 6, 3, 3)

# Rem: H_out = floor(H-K/s) + 1
#   e.g. 4x4, K=2, S=1 -> floor((4-2)/1) + 1 -> 3x3
# At output location i, input location is i*s -> i*s

# Stride kernel over image, compute (C_in,K,K) dot (C_in,H',W') with input region H' x W'
out_loop_np = np.zeros((N, C_out, H_out, W_out), dtype=np.float32)
for n in range(N):
    for c_out in range(C_out):
        out_loop_np[n, c_out]
        for y_out in range(H_out):
            y_in = y_out * S
            for x_out in range(W_out):
                x_in = x_out * S
                patch = x_np[n, :, y_in:y_in+K, x_in:x_in+K].flatten()  # (1,C_in,K,K)
                kernel = weight_np[c_out].flatten()  # (C_in,K,K)
                out_loop_np[n, c_out, y_out, x_out] = patch @ kernel + bias_np[c_out]

print(f"Allclose: {np.allclose(out_pt.numpy(), out_loop_np)}")

Allclose: True


In [None]:
def conv2d_im2col(
    x: np.ndarray | torch.Tensor, 
    weight: np.ndarray | torch.Tensor, 
    bias: np.ndarray | torch.Tensor, 
    stride: int, 
    backend: str = 'numpy') -> np.ndarray | torch.Tensor:
    # - Compute X_out = X @ W  
    #   - Need output for every dot product
    #   - e.g. output size = N * C_out * H_out * W_out
    # - W: rows = out channels, kernels = kernel patches
    #   - Shape (C_out, C_in*K*K)
    #   - Only requries view/reshape of original weights, no copy
    # - X: rows = image patches, cols = output spatial locations
    #   - Shape (C_in*K*K, N*H_out*W_out)
    # - X_out: W @ X
    #   - Shape (C_out, N*H_out*W_out)
    #   - Permute/reshape to (N,C_out,H_out,W_out)
    assert backend in ['numpy', 'torch']
    
    # Get shapes
    N, C_in, H_in, W_in = x.shape
    C_out, C_in, K, _K = weight.shape
    assert bias.shape == (C_out,)
    
    H_out = math.floor((H_in - K) / S) + 1
    W_out = math.floor((W_in - K) / S) + 1
    
    # Setup (use 'zeros' instead of 'empty' for debugging)
    if backend == 'numpy':
        X = np.empty((C_in * K * K, N * H_out * W_out), dtype=np.float32)
        W = weight.reshape((C_out, C_in * K * K))
        assert all([isinstance(a, np.ndarray) for a in [x, weight, bias]]), \
            "Must pass in numpy arrays for backend == 'numpy'"
        assert W.base is not None, "Expected a view for W not a copy"  # pyright: ignore[reportAttributeAccessIssue]
    elif backend == 'torch':
        X = torch.empty((C_in * K * K, N * H_out * W_out), dtype=torch.float32)
        W = weight.view((C_out, C_in * K * K))        
        assert all([isinstance(t, torch.Tensor) for t in [x, weight, bias]]), \
            "Must pass in torch tensors for backend == 'torch'"

    # im2col
    for n in range(N):
        for y_out in range(H_out):
            y_in = y_out * stride
            for x_out in range(W_out):
                x_in = x_out * stride   
                col = (n * H_out * W_out) + (y_out * W_out) + x_out
                img_patch = x[n, :, y_in:y_in+K, x_in:x_in+K].flatten()  # patch, (1,C_in,K,K)
                X[:, col] = img_patch  # pyright: ignore[reportArgumentType]
                if isinstance(X, np.ndarray):  # backend == 'numpy'
                    assert X[:, col].base is not None, "Expected a view for X[i] not a copy"

    if backend == 'numpy':
        assert isinstance(X, np.ndarray) and isinstance(W, np.ndarray)
        
        # Make contiguous; probably not necessary (numpy should do it) but good to be explicit
        X = np.ascontiguousarray(X)

        # (C_out, C_in*K*K) @ (C_in*K*K, N*H_out*W_out) -> (C_out, N*H_out*W_out)
        out_im2col = W @ X + bias.reshape(C_out, 1)  # Explicit reshape (not required)
        out_im2col = out_im2col.reshape(C_out, N, H_out, W_out)
        assert out_im2col.base is not None, "Expected a view for out_im2col_mat.reshape"

        out_im2col = np.ascontiguousarray(np.transpose(out_im2col, (1, 0, 2, 3)))        
        
    elif backend == 'torch':
        assert isinstance(X, torch.Tensor) and isinstance(W, torch.Tensor)
        X = X.contiguous()
        
        # (C_out, C_in*K*K) @ (C_in*K*K, N*H_out*W_out) -> (C_out, N*H_out*W_out)
        out_im2col = W @ X + bias.view(C_out, 1)  # Explicit view (not required)  # pyright: ignore[reportArgumentType]
        
        # (C_out, N*H_out*W_out) -> (C_out, N, H_out, W_out) -> (N, C_out, H_out, W_out)
        out_im2col = out_im2col.view(C_out, N, H_out, W_out).permute(1, 0, 2, 3).contiguous()
        
    return out_im2col
        
out_im2col_np = conv2d_im2col(x=x_np, weight=weight_np, bias=bias_np, stride=S, backend='numpy')
print(f"Allclose numpy: {np.allclose(out_pt.numpy(), out_im2col_np)}")

out_im2col_pt = conv2d_im2col(x=x_pt, weight=weight_pt, bias=bias_pt, stride=S, backend='torch')
print(f"Allclose numpy: {torch.allclose(out_pt, out_im2col_pt)}")  # pyright: ignore[reportArgumentType]

Allclose numpy: True
Allclose numpy: True
