# Building Models

Illustration: CNN - Convolutional Neural Network

course: https://learn.deeplearning.ai/courses/pytorch-fundamentals/

lesson: https://learn.deeplearning.ai/specializations/pytorch-for-deep-learning-professional-certificate/lesson/wrr1egt/cnns---part-1%3A-filters%2C-patterns%2C-and-feature-maps 

neural network concepts
- computational graphs (static vs dynamic - GREAT): https://learn.deeplearning.ai/specializations/pytorch-for-deep-learning-professional-certificate/lesson/deeone4/dynamic-graphs 
- convolutional filters (cnn)
- feature maps (cnn): output of a filtered image
- convolve (cnn) - slide the filters over the image
- the input image in a cnn is three-dimensional (RGB color channels, width, height)
- pooling (cnn) - reduces size of feature map, for cost 
- regularization: dropout and batch normalization

resources
- training neural networks (stanford vid - GREAT explainer vid for intuition): https://www.youtube.com/watch?v=wEoyxE0GP2M
- cnn basics (c321n stanford - GREAT VISUAL EXPLANATIONS for intuition)* : https://cs231n.github.io/convolutional-networks/
- convnets walkthrough by Karpathy: https://www.youtube.com/watch?v=u6aEYuemt0M 
- wavenet to EEG brain data: https://arxiv.org/html/2510.15947v1 

learning rates tuning
File: "Screenshot 2026-01-14 at 2.55.35â€¯PM.png"

In [None]:
# imports

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# Model

class SimpleCNN(nn.Module):
    
    def __init__(self):
        
        # defines model architecture

        # currently for dataset images of size 28x28, for variable image sizes, use adaptive pooling (ask LLM for illustration)

        # super(SimpleCNN, self).__init__()
        super().__init__()

        # first convolutional block
        self.conv1 = nn.Conv2d(
            in_channels=1,  # in channel: 1 for grayscale value, 3 if RGB 
            out_channels=32,    # out channels are num of features (num of feature maps, each of an input image size if first layer)
            kernel_size=3,  # filter size - kernel size of 3 means 3x3
            padding=1   # padding a filter so can center near edges or corners of the input image
        )
        self.relu1 = nn.ReLU()  # activation layer
        self.pool1 = nn.MaxPool2d(  # pooling layer - for max pooling, max value of a feature map window is kept (2x2 window here if kernel_size=2), rest of feature map window is thrown away
            kernel_size=2,
            # stride=2    # stride slides or convolves n steps/pixels across the image
        )

        # second convolutional block
        self.conv2 = nn.Conv2d(
            in_channels=32, # in channel: 32 here since output from previous conv layer is 32
            out_channels=64,
            kernel_size=3,
            padding=1
        )
        self.relu2 = nn.ReLU()  # activation layer
        self.pool2 = nn.MaxPool2d(  # pooling layer 
            kernel_size=2
        )

        # flatten layer 
        # before feeding into final fully connected layers
        self.flatten = nn.Flatten() # flattens the 3 dimensional input into a vector for output

        # -- fully connected layers --
        flattened_size = 64 * 7 * 7 # flattened size: num of inputs (from output num of layer before flattening) * feature map size (7 x 7 here since initial image size is 28x28, halved after 2x2 pooling window in first layer to 14x14, then halved again to 7x7 here)
        self.fc1 = nn.Linear(
            flattened_size,
            128 # output channel
        )
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.5)  # regularization: dropout prevents overfitting and increases model robustness to other datasets by dropping percentage of neurons during training

        # output layer
        # classification layer
        self.fc2_output = nn.Linear(
            128,    # input size from output of previous layer
            10    # output neurons - num of categories for classification
        )

    def forward(self, x):
        # defines data flow through model layers

        # first conv block
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        # second conv block
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        # flattening layer
        x = self.flatten(x)

        # fully connected layer 1
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.dropout3(x)

        # output layer - fully connected layer 2
        x = self.fc2_output(x)

        return x
    
# create model instance
model = SimpleCNN()
print(model)


In [10]:
# Model: Modularize for maintainability and reusability
# refactor explicit architecture above into modular blocks
# rely on nn.Sequential

class SimpleCNNModular(nn.Module):

    def __init__(self):
        """
        define nn architecture with modular blocks
        """
        
        # currently for dataset images of size 28x28, for variable image sizes, use adaptive pooling (ask LLM for illustration)

        super().__init__()

        self.features = nn.Sequential(
            ConvBlock(1, 32),
            ConvBlock(32, 64)
        )

        self.flatten = nn.Flatten()
        
        flattened_size = 64 * 7 * 7
        classification_categories = 10
        self.classifier = nn.Sequential(
            nn.Linear(  # layer to process flattened layer - fully connected layer
                flattened_size,
                128
            ),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(  # output layer - fully connected layer
                128,
                classification_categories
            )
        )
        
    def forward(self, x):
        
        print(f"forward pass")

        print(f"NN shape upon input: {x.shape}")
        
        x = self.features(x)
        print(f"NN shape after features extraction: {x.shape}")

        x = self.flatten(x)
        print(f"NN shape after flattening: {x.shape}")

        x = self.classifier(x)
        print(f"NN shape after classification: {x.shape}")

        return x


class ConvBlock(nn.Module):

    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.block = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=3,
                padding=1
            ),
            # nn.BatchNorm2d(out_channels), # batch normalization layer: stabilize to prevent unstable activations by normalizing each mini batch per feature channel (calculate mean and variance per batch, then normalize the batch)
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2, 
                stride=2
            )
        )

    def forward(self, x):
        return self.block(x)
    
# initialize model
model = SimpleCNNModular()
# print(model)

In [11]:
# Model Inspection + Debugging
# more advanced techniques in exercise 7 file "pytorch-fundamentals-7-model-debugging.ipynb"

# understanding a layer's shape: torch.Size([output_neurons_size, input_neurons_size]) => think of matrix of size m x n => m for output neurons size, n for input neurons size  

# How many parameters does the model have?
total_params = sum(param.numel() for param in model.parameters())
print(f"Total num of parameters: {total_params}")

# How can I see the shapes for each layer?
for name, param in model.named_parameters():    # layer name => named parameters
    print(f"{name}: {param.shape}")


# How can I see the shapes for nested blocks (modularized blocks as in conv blocks above via nn.Sequential())
for name, module in model.named_children(): # only show top-level modules (children modules of root module)
    print(name, module)

for name, module in model.named_modules():  # shows all modules including nested sub-modules
    if name:    # skips printing the model itself
        print(name, module)


# [Troubleshooting] RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x2048 and 1024x512)
# shape mismatch between inputs to a linear layer
# print the name and shape of the layers to see if there is a discrepancy with the shapes in the error message
# also add debugging statements in the forward pass method within model via shape tracing
for name, param in model.named_parameters():    # layer name => named parameters
    print(f"{name}: {param.shape}")



Total num of parameters: 421642
features.0.block.0.weight: torch.Size([32, 1, 3, 3])
features.0.block.0.bias: torch.Size([32])
features.1.block.0.weight: torch.Size([64, 32, 3, 3])
features.1.block.0.bias: torch.Size([64])
classifier.0.weight: torch.Size([128, 3136])
classifier.0.bias: torch.Size([128])
classifier.3.weight: torch.Size([10, 128])
classifier.3.bias: torch.Size([10])
features Sequential(
  (0): ConvBlock(
    (block): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (1): ConvBlock(
    (block): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
)
flatten Flatten(start_dim=1, end_dim=-1)
classifier Sequential(
  (0): Linear(in_features=3136, out_features=128, bias=True)
 