In [1]:
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(42)

# a sample tensor representing our cifar10 data (batch of 32)
images = torch.randn(size=(32, 3, 32, 32)) # [batch_size, color_channels, height, width]
test_image = images[0] 
print(f"Image batch shape: {images.shape} -> [batch_size, color_channels, height, width]")
print(f"Single image shape: {test_image.shape} -> [color_channels, height, width]") 
print(f"Single image pixel values:\n{test_image}")

Image batch shape: torch.Size([32, 3, 32, 32]) -> [batch_size, color_channels, height, width]
Single image shape: torch.Size([3, 32, 32]) -> [color_channels, height, width]
Single image pixel values:
tensor([[[ 1.9269e+00,  1.4873e+00,  9.0072e-01,  ...,  4.1759e-02,
          -2.5158e-01,  8.5986e-01],
         [-1.3847e+00, -8.7124e-01, -2.2337e-01,  ...,  1.8446e+00,
          -1.1845e+00,  1.3835e+00],
         [ 1.4451e+00,  8.5641e-01,  2.2181e+00,  ..., -8.2777e-01,
           1.3347e+00,  4.8354e-01],
         ...,
         [ 5.1823e-02, -3.2848e-01, -2.2472e+00,  ...,  1.4557e+00,
          -3.4610e-01, -2.6338e-01],
         [-4.4770e-01, -7.2882e-01, -1.6066e-01,  ...,  5.4047e-01,
           4.3507e-01, -2.2717e+00],
         [-1.3386e-01, -5.8557e-02,  1.2574e-01,  ...,  1.1085e+00,
           5.5442e-01,  1.5818e+00]],

        [[-1.2248e+00,  9.6289e-01, -1.5785e+00,  ...,  7.8247e-01,
          -6.4659e-02, -2.2984e-04],
         [ 6.8309e-01,  1.0637e-01,  3.5032e-01, 

https://poloclub.github.io/cnn-explainer/

in_channels (int) - Number of channels in the input image.

out_channels (int) - Number of channels produced by the convolution.

kernel_size (int or tuple) - Size of the convolving kernel/filter.

stride (int or tuple, optional) - How big of a step the convolving kernel takes at a time. Default: 1.

padding (int, tuple, str) - Padding added to all four sides of input. Default: 0.

In [3]:
torch.manual_seed(42)
# let's see what happens when we create a convulution layer and put the image through it
# (try changing any of the parameters and see what happens)
conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0) 

# Pass the data through the convolutional layer
conv_layer(test_image)

tensor([[[ 1.1667e+00,  1.4325e-01,  1.6771e-02,  ...,  3.1925e-02,
          -7.1931e-02,  3.7874e-01],
         [ 9.5715e-01,  3.2787e-01,  2.6548e-01,  ..., -3.0249e-01,
          -1.8824e-01, -3.8694e-01],
         [ 2.7237e-02,  4.1275e-01,  1.0648e+00,  ..., -5.8316e-01,
          -7.7496e-01,  7.0928e-01],
         ...,
         [-7.4959e-01, -5.7313e-01,  2.3699e-01,  ...,  8.5081e-01,
           2.3852e-01, -4.1881e-01],
         [-5.5988e-01,  9.8861e-02, -8.2409e-01,  ..., -4.4532e-02,
           2.9962e-01, -2.9472e-02],
         [ 1.2873e-01, -7.5458e-01, -4.0582e-01,  ..., -4.7212e-02,
           1.3111e+00,  6.3414e-01]],

        [[ 3.3298e-01,  3.3936e-01, -6.4400e-01,  ...,  1.4035e-01,
          -6.6302e-01, -5.8043e-03],
         [ 5.9086e-01, -1.1782e+00,  5.4756e-01,  ..., -5.9479e-01,
          -5.3999e-01, -4.8962e-01],
         [-1.6372e+00,  1.5448e-01,  2.5152e-01,  ..., -6.4175e-01,
          -1.3647e-01, -3.4338e-01],
         ...,
         [-6.1613e-01, -1

In [4]:
conv_layer(test_image).shape

torch.Size([10, 30, 30])

In [5]:
test_image.shape

torch.Size([3, 32, 32])

In [6]:
# as you can see we begin to compress the image as we see in tinyvgg

In [7]:
# if we change the conv layer parameters we might even get a different pixel count
torch.manual_seed(42)
# Create a new conv_layer with different values 
conv_layer_2 = nn.Conv2d(in_channels=3, # same number of color channels as our input image
                         out_channels=10,
                         kernel_size=(5, 5), # kernel is usually a square so a tuple also works
                         stride=2,
                         padding=0)

# Pass single image through new conv_layer_2 
conv_layer_2(test_image).shape

torch.Size([10, 14, 14])

In [8]:
# as you can see it brings it down to 14


# conv2d does not always decrease the width and height of image 
# it depends on *padding*, *stride* & *kernel_size* 

```python
conv_layer = nn.Conv2d(
    in_channels=3,
    out_channels=10,
    kernel_size=3,
    stride=1,
    padding=0  
)
```

---

**Conv2D çıktısının boyutu matematiksel olarak nasıl hesaplanır?**

Her eksen için formül:

output = (((W - K) + 2P) / S) + 1

- **W**: input genişlik (32)
- **K**: kernel size (3)
- **P**: padding (0 veya 1 olabilir)
- **S**: stride (1)

---

In [9]:
# let's see what happens in max poold

In [10]:
# Print out original image shape without and with unsqueezed dimension
print(f"Test image original shape: {test_image.shape}")
print(f"Test image with unsqueezed dimension: {test_image.unsqueeze(dim=0).shape}")

# Create a sample nn.MaxPoo2d() layer
max_pool_layer = nn.MaxPool2d(kernel_size=2)

# Pass data through just the conv_layer
test_image_through_conv = conv_layer(test_image.unsqueeze(dim=0))
print(f"Shape after going through conv_layer(): {test_image_through_conv.shape}")

# Pass data through the max pool layer
test_image_through_conv_and_max_pool = max_pool_layer(test_image_through_conv)
print(f"Shape after going through conv_layer() and max_pool_layer(): {test_image_through_conv_and_max_pool.shape}")

Test image original shape: torch.Size([3, 32, 32])
Test image with unsqueezed dimension: torch.Size([1, 3, 32, 32])
Shape after going through conv_layer(): torch.Size([1, 10, 30, 30])
Shape after going through conv_layer() and max_pool_layer(): torch.Size([1, 10, 15, 15])


In [11]:
# so in tinyvgg we have 
# block1 -> conv2d + relu + conv2d + relu + maxpoold
# block2 -> conv2d + relu + conv2d + relu + maxpoold

In [12]:
# so let's create a random tensor representing only 1 image (batch of 1)
x = torch.randn(1, 3, 32, 32)   # [batch, channels, height, width]
print("Input:", x.shape)

# 1. BLOCK
conv1 = nn.Conv2d(3, 10, kernel_size=3, stride=1, padding=1)
conv2 = nn.Conv2d(10, 10, kernel_size=3, stride=1, padding=1)
pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

x = conv1(x)
print("After Conv1:", x.shape)

x = conv2(x)
print("After Conv2:", x.shape)

x = pool1(x)
print("After MaxPool1:", x.shape)

# 2. BLOCK
conv3 = nn.Conv2d(10, 10, kernel_size=3, stride=1, padding=1)
conv4 = nn.Conv2d(10, 10, kernel_size=3, stride=1, padding=1)
pool2 = nn.MaxPool2d(2, 2)

x = conv3(x)
print("After Conv3:", x.shape)

x = conv4(x)
print("After Conv4:", x.shape)

x = pool2(x)
print("After MaxPool2:", x.shape)

Input: torch.Size([1, 3, 32, 32])
After Conv1: torch.Size([1, 10, 32, 32])
After Conv2: torch.Size([1, 10, 32, 32])
After MaxPool1: torch.Size([1, 10, 16, 16])
After Conv3: torch.Size([1, 10, 16, 16])
After Conv4: torch.Size([1, 10, 16, 16])
After MaxPool2: torch.Size([1, 10, 8, 8])


In [13]:
# as you can see it goes down to 8*8, that's why we need to use 8*8 in classifier

#        self.classifier = nn.Sequential(
#            nn.Flatten(),
#            nn.Linear(in_features=hidden_units*8*8, 
#                      out_features=output_shape)
#        )