In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.models import SqueezeNet

In [2]:
import helper_utils 

In [3]:
dataset = helper_utils.get_dataset()

transform = transforms.ToTensor()
dataset.transform = transform

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./dataset\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:05<00:00, 4.76MB/s]


Extracting ./dataset\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./dataset\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./dataset\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 149kB/s]


Extracting ./dataset\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./dataset\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./dataset\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:01<00:00, 2.31MB/s]


Extracting ./dataset\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./dataset\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./dataset\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<?, ?B/s]

Extracting ./dataset\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./dataset\FashionMNIST\raw






In [4]:
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [7]:
img_batch, label_batch = next(iter(dataloader))
print("Batch shape:", img_batch.shape)  # Should be [batch_size, 1, 28, 28]

Batch shape: torch.Size([64, 1, 28, 28])


In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolutional Block
        self.conv = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully Connected Block
        # For Fashion MNIST: input images are 28x28,
        # after conv+pool: 32x14x14
        self.fc1 = nn.Linear(32 * 14 * 14, 128)
        self.relu_fc = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)  # 10 classes for Fashion MNIST

    def forward(self, x):
        x = self.pool(self.relu(self.conv(x)))
        x = self.relu_fc(self.fc1(x))
        x = self.fc2(x)
        return x

In [16]:
simple_cnn = SimpleCNN()

try:
    output = simple_cnn(img_batch)  
except Exception as e:
    print(f"\033[91mError during forward pass: {e}\033[0m")

In [19]:
class SimpleCNNDebug(SimpleCNN):
    def __init__(self):
        super().__init__()
        # The super().__init__() call above properly initializes all layers from SimpleCNN
        # No need to redefine the layers here

    def forward(self, x):
        print("Input shape:", x.shape)
        print(
            " (Layer components) Conv layer parameters (weights, biases):",
            self.conv.weight.shape,
            self.conv.bias.shape,
        )
        x_conv = self.relu(self.conv(x))

        print("===")

        print("(Activation) After convolution and ReLU:", x_conv.shape)
        x_pool = self.pool(x_conv)
        print("(Activation) After pooling:", x_pool.shape)
        x_flatten=torch.flatten(x_pool,start_dim=1)
        print(f"new shape after being flattened: {x_flatten.shape}")
        print(
            "(Layer components) Linear layer fc1 parameters (weights, biases):",
            self.fc1.weight.shape,
            self.fc1.bias.shape,
        )

        x_fc1 = self.relu_fc(self.fc1(x_flatten))

        print("===")

        print("(Activation) After fc1 and ReLU:", x_fc1.shape)

        print(
            "(Layer components) Linear layer fc2 parameters (weights, biases):",
            self.fc2.weight.shape,
            self.fc2.bias.shape,
        )
        x = self.fc2(x_fc1)

        print("===")

        print("(Activation) After fc2 (output):", x.shape)
        return x

In [20]:
simple_cnn_debug = SimpleCNNDebug()

try:
    output_debug = simple_cnn_debug(img_batch)  
except Exception as e:
    print(f"\033[91mError during forward pass in debug model: {e}\033[0m")

Input shape: torch.Size([64, 1, 28, 28])
 (Layer components) Conv layer parameters (weights, biases): torch.Size([32, 1, 3, 3]) torch.Size([32])
===
(Activation) After convolution and ReLU: torch.Size([64, 32, 28, 28])
(Activation) After pooling: torch.Size([64, 32, 14, 14])
new shape after being flattened: torch.Size([64, 6272])
(Layer components) Linear layer fc1 parameters (weights, biases): torch.Size([128, 6272]) torch.Size([128])
===
(Activation) After fc1 and ReLU: torch.Size([64, 128])
(Layer components) Linear layer fc2 parameters (weights, biases): torch.Size([10, 128]) torch.Size([10])
===
(Activation) After fc2 (output): torch.Size([64, 10])
