
# Lenet:

Lenet 5 is considered as the first architecture for Convolutional Neural Networks, which are used to identify handwritten digits in the zip codes in the US. It was introduced in the paper, “Gradient-Based Learning Applied To Document Recognition.”



<img src="https://miro.medium.com/max/1400/0*VjXufJUfN2q3B-j8.jpg" alt="alt" width="50%"/>



In [1]:
import torch
import torch.nn as nn

class Lenet(nn.Module):
  def __init__(self):
    super(Lenet,self).__init__()
    self.tanh = nn.Tanh()      
    self.pool = nn.AvgPool2d(kernel_size=(2,2),stride=(2,2))
    self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=(5,5),stride=(1,1))
    self.conv2 = nn.Conv2d(in_channels=6,out_channels=16,kernel_size=(5,5),stride=(1,1))
    self.conv3 = nn.Conv2d(in_channels=16,out_channels=120,kernel_size=(5,5),stride=(1,1))
    self.linear1 = nn.Linear(120,84)
    self.linear2 = nn.Linear(84,10)

  def forward(self,x):
    x = self.tanh(self.conv1(x))
    x = self.pool(x)
    x = self.tanh(self.conv2(x))
    x = self.pool(x)
    x = self.tanh(self.conv3(x))
    x = x.reshape(x.shape[0],-1)
    x = self.tanh(self.linear1(x))
    x = self.linear2(x)
    return x

model = Lenet()

model

Lenet(
  (tanh): Tanh()
  (pool): AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (linear1): Linear(in_features=120, out_features=84, bias=True)
  (linear2): Linear(in_features=84, out_features=10, bias=True)
)

In [2]:
import torch

def count_parameters(model):
    return sum(p.numel() for p in model.parameters()) / 1e6

# Example usage:

num_params = count_parameters(model)
print(f"Number of parameters in the model: {num_params} M")


Number of parameters in the model: 0.061706 M


In [3]:
input = torch.rand(10,1,32,32)

model(input).shape

torch.Size([10, 10])

In [6]:
import torch 
import torch.nn as nn

class AlexNet(nn.Module):
    def __init__(self, in_channels=3, classes=1000):
        super().__init__()
        self.c1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0)
        self.c2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
        self.c3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.c4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.c5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)

        self.fc1 = nn.Linear(6*6*256, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, classes)

        self.localnorm = nn.LocalResponseNorm(size=5)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(0.5)

      

    def forward(self, x):
        # x shape: [batch, 3, 227, 227]
        x = self.relu(self.c1(x))
        # x shape: [batch, 96, 55, 55]
        x = self.maxpool(self.localnorm(x))
        # x shape: [batch, 96, 27, 27]
        x = self.relu(self.c2(x))
        # x shape: [batch, 256, 27, 27]
        x = self.maxpool(self.localnorm(x))
        # x shape: [batch, 256, 13, 13]
        x = self.relu(self.c3(x))
        # x shape: [batch, 384, 13, 13]
        x = self.relu(self.c4(x))
        # x shape: [batch, 384, 13, 13]
        x = self.maxpool(self.relu(self.c5(x)))
        # x shape: [batch, 256, 6, 6]
        x = torch.flatten(x,1)
        # x shape: [batch, 256*6*6]
        x = self.relu(self.dropout(self.fc1(x)))
        # x shape: [batch, 4096]
        x = self.relu(self.dropout(self.fc2(x)))
        # x shape: [batch, 4096]
        x = self.fc3(x)
        # x shape: [batch, classes]
        return x


model = AlexNet()

model

AlexNet(
  (c1): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
  (c2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (c3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (c4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (c5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=9216, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=1000, bias=True)
  (localnorm): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU(inplace=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [7]:
import torch

def count_parameters(model):
    return sum(p.numel() for p in model.parameters()) / 1e6

# Example usage:

num_params = count_parameters(model)
print(f"Number of parameters in the model: {num_params} M")


Number of parameters in the model: 62.378344 M


In [9]:
input = torch.rand(1,3,227,227)

model(input).shape

torch.Size([1, 1000])

# VGG:
<img src="https://pytorch.org/assets/images/vgg.png" alt="alt" width="50%"/>

In [10]:
import torch
import torch.nn as nn


configs = { 
        "VGG11" : [1,1,2,2,2], # 1x64, 1x128, 2x256, 2x512, 2x512
        "VGG13" : [2,2,2,2,2], # 2x64, 2x128, 2x256, 2x512, 2x512
        "VGG16" : [2,2,3,3,3], # 2x64, 2x128, 3x256, 3x512, 3x512
        "VGG19" : [2,2,4,4,4]  # 2x64, 2x128, 4x256, 4x512, 4x512
    }

class Block(nn.Module):
    def __init__(self, in_channels, out_channels, no_layers):
        super().__init__()
        self.layers = [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), nn.ReLU()]
        for i in range(no_layers-1):
            self.layers.append(nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1))
            self.layers.append(nn.ReLU())

        self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        self.seq = nn.Sequential(*self.layers)


    def forward(self, x):
        return self.seq(x)

class VGG(nn.Module):
    def __init__(
        self, 
        model_size : str,
        in_channels : int = 3, 
        classes : int = 1000
        ):
        super().__init__()
   
        config = configs[model_size]
        channels = [in_channels,64,128,256,512,512]
        self.blocks = nn.ModuleList([])
        for i in range(len(config)):
            self.blocks.append(Block(channels[i], channels[i+1], config[i]))
        
        self.fc1 = nn.Linear(7*7*512, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        for block in self.blocks:
            x = block(x)

        x = torch.flatten(x,1)
        x = self.relu(self.dropout(self.fc1(x)))
        x = self.relu(self.dropout(self.fc2(x)))
        x = self.fc3(x)

        return x



In [11]:
model = VGG("VGG16")
model

VGG(
  (blocks): ModuleList(
    (0): Block(
      (seq): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): ReLU()
        (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (1): Block(
      (seq): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): ReLU()
        (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (2): Block(
      (seq): Sequential(
        (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): ReLU()
        (4): Conv2d(256, 256, kernel_size=(3

In [12]:
import torch

def count_parameters(model):
    return sum(p.numel() for p in model.parameters()) / 1e6

# Example usage:

num_params = count_parameters(model)
print(f"Number of parameters in the model: {num_params} M")


Number of parameters in the model: 138.357544 M


In [8]:
input = torch.rand(1,3,224,224)

model(input).shape

torch.Size([1, 1000])

In [25]:
import torch
import torch.nn as nn

# Define the Residual Block
class ResidualBlock(nn.Module):
    expansion = 1  # Adding expansion factor to match the ResNet class

    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.downsample = None
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion),
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(identity)

        out += identity
        out = self.relu(out)

        return out

# Define the ResNet architecture
class ResNet50(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

# Create an instance of the ResNet model
model = ResNet50(ResidualBlock, [2, 2, 2, 2])

model


TypeError: super(type, obj): obj must be an instance or subtype of type

In [17]:
import torch

def count_parameters(model):
    return sum(p.numel() for p in model.parameters()) / 1e6

# Example usage:

num_params = count_parameters(model)
print(f"Number of parameters in the model: {num_params} M")


Number of parameters in the model: 11.689512 M


# Efficientnet b0

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def conv_1x1_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )

def conv_3x3_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, padding=1, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )

class MBConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, expansion, stride):
        super(MBConvBlock, self).__init__()
        self.stride = stride
        self.expansion = expansion
        hidden_dim = round(in_channels * expansion)
        self.use_res_connect = self.stride == 1 and in_channels == out_channels

        if expansion != 1:
            self.expand_conv = conv_1x1_bn(in_channels, hidden_dim, 1)
        self.depthwise_conv = conv_3x3_bn(hidden_dim, hidden_dim, stride)
        self.pointwise_conv = nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        identity = x
        if self.expansion != 1:
            x = self.expand_conv(x)
        x = self.depthwise_conv(x)
        x = self.pointwise_conv(x)
        x = self.bn(x)
        if self.use_res_connect:
            x += identity
        return x

class EfficientNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(EfficientNet, self).__init__()
        # Assuming B0 base model, width_coefficient, depth_coefficient, resolution, dropout_rate = 1.0, 1.0, 224, 0.2
        self.stem = conv_3x3_bn(3, 32, 2)  # initial stem conv
        
        # Example of EfficientNet block configuration
        self.blocks = nn.Sequential(
            MBConvBlock(32, 16, expansion=1, stride=1),
            MBConvBlock(16, 24, expansion=6, stride=2),
            # Add more MBConvBlocks as per the EfficientNet architecture
        )
        
        self.head = nn.Sequential(  # Head to produce final classifications
            conv_1x1_bn(24, 1280, 1),  # Example final conv to 1280 channels
            nn.AdaptiveAvgPool2d(1),  # Global average pooling
            nn.Flatten(),
            nn.Dropout(0.2),
            nn.Linear(1280, num_classes)
        )

    def forward(self, x):
        x = self.stem(x)
        x = self.blocks(x)
        x = self.head(x)
        return x

# Create an instance of EfficientNet
model = EfficientNet(num_classes=1000)
print(model)


EfficientNet(
  (stem): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(inplace=True)
  )
  (blocks): Sequential(
    (0): MBConvBlock(
      (depthwise_conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (pointwise_conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): MBConvBlock(
      (expand_conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
 

In [26]:
import torch
import torch.nn as nn
import pandas as pd

def count_parameters(model):
    return sum(p.numel() for p in model.parameters()) / 1e6

models = {
    "Lenet": Lenet(),
    "AlexNet": AlexNet(),
    "VGG16": VGG("VGG16"),
    "ResNet50": ResNet(ResidualBlock, [2, 2, 2, 2]),
    "EfficientNet": EfficientNet(num_classes=1000)
}

parameters = {}
for model_name, model in models.items():
    num_params = count_parameters(model)
    parameters[model_name] = num_params

parameters

# change parameter to pandas dataframe

df = pd.DataFrame(parameters.items(), columns=["Model", "Number of Parameters (M)"])
df

Unnamed: 0,Model,Number of Parameters (M)
0,Lenet,0.061706
1,AlexNet,62.378344
2,VGG16,138.357544
3,ResNet50,11.689512
4,EfficientNet,1.412248
