In [1]:
from pathlib import Path
import sys
import pandas as pd
from PIL import Image
import io
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.optim import lr_scheduler
import time
from sklearn.model_selection import train_test_split
import torchvision 
from torchvision import transforms
import pickle
from scipy import stats
import numpy as np
from torchvision.models import efficientnet_b3
import torch.nn.functional as F
import math
import pandas as pd
import numpy as np
import scipy.stats as stats
from thop import profile

In [2]:
# Baseline
class SqueezeExcitation(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(SqueezeExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1),
            nn.Sigmoid(),
        )
    def forward(self, x):
        return x * self.se(x)
class EfficientNetB3(nn.Module):
  def __init__(self, num_classes, stages):
    super(EfficientNetB3, self).__init__()
    self.model = efficientnet_b3(weights="IMAGENET1K_V1", progress=True)
    self.model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True), 
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)
    )
    for param in self.model.parameters():
        param.requires_grad = False
    for param in self.model.classifier.parameters():
        param.requires_grad = True
    # qse in stage 2
    if 2 in stages:
        self.model.features[1][0].block[1] = SqueezeExcitation(40,10)
        self.model.features[1][1].block[1] = SqueezeExcitation(24,6)
    # qse in stage 3
    if 3 in stages:
        self.model.features[2][0].block[2] = SqueezeExcitation(144,6)
        for i in range(1,3):
          self.model.features[2][1].block[2] = SqueezeExcitation(192,8)
    # qse in stage 4
    if 4 in stages:
      self.model.features[3][0].block[2] = SqueezeExcitation(192,8)
      for i in range(1,3):  
        self.model.features[3][i].block[2] = SqueezeExcitation(288,12)
    # qse in stage 5
    if 5 in stages:
      self.model.features[4][0].block[2] = SqueezeExcitation(288,12)
      for i in range(1,5):
        self.model.features[4][i].block[2] = SqueezeExcitation(576,24)
    # qse in stage 6
    if 6 in stages:
      self.model.features[5][0].block[2] = SqueezeExcitation(576,24)
      for i in range(1,5):
        self.model.features[5][i].block[2] = SqueezeExcitation(816,34)
    # qse in stage 7
    if 7 in stages:
      self.model.features[6][0].block[2] = SqueezeExcitation(816,34)
      for i in range(1,6):
        self.model.features[6][i].block[2] = SqueezeExcitation(1392,58)
    # qse in stage 8
    if 8 in stages:
      self.model.features[7][0].block[2] = SqueezeExcitation(1392,58)
      self.model.features[7][1].block[2] = SqueezeExcitation(2304,96)
  def forward(self, x):
    return self.model(x)

In [3]:
# QAM
class QAM(nn.Module):
    def __init__(self, in_channels, reduction_dim):
        super(QAM, self).__init__()
        self.fc = nn.Sequential(
            nn.AdaptiveAvgPool2d((2, 2)),
            nn.Conv2d(in_channels, reduction_dim, kernel_size=1, stride=1),
            nn.SiLU(),
            nn.Conv2d(reduction_dim, in_channels, kernel_size=1, stride=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        residual = x
        b, c, h, w = x.size()
        
        x = self.fc(x)

        residual[:, :, :h//2, :w//2].mul_(x[:, :, 0:1, 0:1])  # Top-left
        residual[:, :, :h//2, w//2:].mul_(x[:, :, 0:1, 1:2])  # Top-right
        residual[:, :, h//2:, :w//2].mul_(x[:, :, 1:2, 0:1])  # Bottom-left
        residual[:, :, h//2:, w//2:].mul_(x[:, :, 1:2, 1:2])  # Bottom-right

        return residual
class EfficientNetB3QAM(nn.Module):
  def __init__(self, num_classes, stages):
    super(EfficientNetB3QAM, self).__init__()
    self.model = efficientnet_b3(weights="IMAGENET1K_V1", progress=True)
    self.model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True), 
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)
    )
    for param in self.model.parameters():
        param.requires_grad = False
    for param in self.model.classifier.parameters():
        param.requires_grad = True
    # qse in stage 2
    if 2 in stages:
        self.model.features[1][0].block[1] = QAM(40,10)
        self.model.features[1][1].block[1] = QAM(24,6)
    # qse in stage 3
    if 3 in stages:
        self.model.features[2][0].block[2] = QAM(144,6)
        for i in range(1,3):
          self.model.features[2][1].block[2] = QAM(192,8)
    # qse in stage 4
    if 4 in stages:
      self.model.features[3][0].block[2] = QAM(192,8)
      for i in range(1,3):  
        self.model.features[3][i].block[2] = QAM(288,12)
    # qse in stage 5
    if 5 in stages:
      self.model.features[4][0].block[2] = QAM(288,12)
      for i in range(1,5):
        self.model.features[4][i].block[2] = QAM(576,24)
    # qse in stage 6
    if 6 in stages:
      self.model.features[5][0].block[2] = QAM(576,24)
      for i in range(1,5):
        self.model.features[5][i].block[2] = QAM(816,34)
    # qse in stage 7
    if 7 in stages:
      self.model.features[6][0].block[2] = QAM(816,34)
      for i in range(1,6):
        self.model.features[6][i].block[2] = QAM(1392,58)
    # qse in stage 8
    if 8 in stages:
      self.model.features[7][0].block[2] = QAM(1392,58)
      self.model.features[7][1].block[2] = QAM(2304,96)
  def forward(self, x):
    return self.model(x)

In [4]:
# HAM
class HAM(nn.Module):
    def __init__(self, in_channels, reduction_dim):
        super(HAM, self).__init__()
        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.quadrant_avgpool = nn.AdaptiveAvgPool2d((2, 2))
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, reduction_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduction_dim, in_channels, 1),
        )
        self.scale_activation = nn.Sigmoid()

    def forward(self, x):
        residual = x
        b, c, h, w = x.size()
        
        # Global Average Pooling (GAP) and Quadrant Average Pooling (QAP)
        gap_out = self.fc(self.global_avgpool(x))
        qap_out = self.fc(self.quadrant_avgpool(x))
        
        # Applying element-wise multiplication using scaling activations
        x = self.scale_activation(self.scale_activation(gap_out) * qap_out)
        
        # Apply quadrant-specific scaling using in-place multiplication (mul_)
        residual[:, :, 0:h//2, 0:w//2].mul_(x[:, :, 0:1, 0:1])  # Top-left
        residual[:, :, 0:h//2, w//2:].mul_(x[:, :, 0:1, 1:2])  # Top-right
        residual[:, :, h//2:, 0:w//2].mul_(x[:, :, 1:2, 0:1])  # Bottom-left
        residual[:, :, h//2:, w//2:].mul_(x[:, :, 1:2, 1:2])  # Bottom-right

        return residual

class EfficientNetB3HAM(nn.Module):
  def __init__(self, num_classes, stages):
    super(EfficientNetB3HAM, self).__init__()
    self.model = efficientnet_b3(weights="IMAGENET1K_V1", progress=True)
    self.model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),  # Dropout layer with 30% dropout rate
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)  # Adjust in_features to match EfficientNetB3 output
    )
    for param in self.model.parameters():
        param.requires_grad = False
    for param in self.model.classifier.parameters():
        param.requires_grad = True
    # qse in stage 2
    if 2 in stages:
        self.model.features[1][0].block[1] = HAM(40,10)
        self.model.features[1][1].block[1] = HAM(24,6)
    # qse in stage 3
    if 3 in stages:
        self.model.features[2][0].block[2] = HAM(144,6)
        for i in range(1,3):
          self.model.features[2][1].block[2] = HAM(192,8)
    # qse in stage 4
    if 4 in stages:
      self.model.features[3][0].block[2] = HAM(192,8)
      for i in range(1,3):  
        self.model.features[3][i].block[2] = HAM(288,12)
    # qse in stage 5
    if 5 in stages:
      self.model.features[4][0].block[2] = HAM(288,12)
      for i in range(1,5):
        self.model.features[4][i].block[2] = HAM(576,24)
    # qse in stage 6
    if 6 in stages:
      self.model.features[5][0].block[2] = HAM(576,24)
      for i in range(1,5):
        self.model.features[5][i].block[2] = HAM(816,34)
    # qse in stage 7
    if 7 in stages:
      self.model.features[6][0].block[2] = HAM(816,34)
      for i in range(1,6):
        self.model.features[6][i].block[2] = HAM(1392,58)
    # qse in stage 8
    if 8 in stages:
      self.model.features[7][0].block[2] = HAM(1392,58)
      self.model.features[7][1].block[2] = HAM(2304,96)
  def forward(self, x):
    return self.model(x)

In [5]:
# credit: https://github.com/Peachypie98/CBAM/blob/main/cbam.py
class CAM(nn.Module):
    def __init__(self, channels, r=16):
        super(CAM, self).__init__()
        self.channels = channels
        self.r = r
        self.linear = nn.Sequential(
            nn.Linear(in_features=self.channels, out_features=self.channels//16, bias=True),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=self.channels//16, out_features=self.channels, bias=True)
        )

    def forward(self, x):
        max = F.adaptive_max_pool2d(x, output_size=1)
        avg = F.adaptive_avg_pool2d(x, output_size=1)
        b, c, _, _ = x.size()
        linear_max = self.linear(max.view(b,c)).view(b, c, 1, 1)
        linear_avg = self.linear(avg.view(b,c)).view(b, c, 1, 1)
        output = linear_max + linear_avg
        return torch.sigmoid(output) * x
class EfficientNetB3CAM(nn.Module):
  def __init__(self, num_classes, stages):
    super(EfficientNetB3CAM, self).__init__()
    self.model = efficientnet_b3(weights="IMAGENET1K_V1", progress=True)
    self.model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),  # Dropout layer with 30% dropout rate
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)  # Adjust in_features to match EfficientNetB3 output
    )
    for param in self.model.parameters():
        param.requires_grad = False
    for param in self.model.classifier.parameters():
        param.requires_grad = True
    # CBAM in stage 2
    if 2 in stages:
        self.model.features[1][0].block[1] = CAM(40,10)
        self.model.features[1][1].block[1] = CAM(24,6)
    # qse in stage 3
    if 3 in stages:
        self.model.features[2][0].block[2] = CAM(144,6)
        for i in range(1,3):
          self.model.features[2][1].block[2] = CAM(192,8)
    # qse in stage 4
    if 4 in stages:
      self.model.features[3][0].block[2] = CAM(192,8)
      for i in range(1,3):  
        self.model.features[3][i].block[2] = CAM(288,12)
    # qse in stage 5
    if 5 in stages:
      self.model.features[4][0].block[2] = CAM(288,12)
      for i in range(1,5):
        self.model.features[4][i].block[2] = CAM(576,24)
    # qse in stage 6
    if 6 in stages:
      self.model.features[5][0].block[2] = CAM(576,24)
      for i in range(1,5):
        self.model.features[5][i].block[2] = CAM(816,34)
    # qse in stage 7
    if 7 in stages:
      self.model.features[6][0].block[2] = CAM(816,34)
      for i in range(1,6):
        self.model.features[6][i].block[2] = CAM(1392,58)
    # qse in stage 8
    if 8 in stages:
      self.model.features[7][0].block[2] = CAM(1392,58)
      self.model.features[7][1].block[2] = CAM(2304,96)
  def forward(self, x):
    return self.model(x)

In [6]:
class ECA(nn.Module):
    def __init__(self, channels, b=1, gamma=2):
        super(ECA, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.channels = channels
        self.b = b
        self.gamma = gamma
        self.conv = nn.Conv1d(1, 1, kernel_size=self.kernel_size(), padding=(self.kernel_size() - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def kernel_size(self):
        k = int(abs((math.log2(self.channels)/self.gamma)+ self.b/self.gamma))
        out = k if k % 2 else k+1
        return out

    def forward(self, x):
        y = self.avg_pool(x)
        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
        y = self.sigmoid(y)
        return x * y.expand_as(x)
class EfficientNetB3ECA(nn.Module):
  def __init__(self, num_classes, stages):
    super(EfficientNetB3ECA, self).__init__()
    self.model = efficientnet_b3(weights="IMAGENET1K_V1", progress=True)
    self.model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True), 
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)
    )
    for param in self.model.parameters():
        param.requires_grad = False
    for param in self.model.classifier.parameters():
        param.requires_grad = True
    # qse in stage 2
    if 2 in stages:
        self.model.features[1][0].block[1] = ECA(40)
        self.model.features[1][1].block[1] = ECA(24)
    # qse in stage 3
    if 3 in stages:
        self.model.features[2][0].block[2] = ECA(144)
        for i in range(1,3):
          self.model.features[2][1].block[2] = ECA(192)
    # qse in stage 4
    if 4 in stages:
      self.model.features[3][0].block[2] = ECA(192)
      for i in range(1,3):  
        self.model.features[3][i].block[2] = ECA(288)
    # qse in stage 5
    if 5 in stages:
      self.model.features[4][0].block[2] = ECA(288)
      for i in range(1,5):
        self.model.features[4][i].block[2] = ECA(576)
    # qse in stage 6
    if 6 in stages:
      self.model.features[5][0].block[2] = ECA(576)
      for i in range(1,5):
        self.model.features[5][i].block[2] = ECA(816)
    # qse in stage 7
    if 7 in stages:
      self.model.features[6][0].block[2] = ECA(816)
      for i in range(1,6):
        self.model.features[6][i].block[2] = ECA(1392)
    # qse in stage 8
    if 8 in stages:
      self.model.features[7][0].block[2] = ECA(1392)
      self.model.features[7][1].block[2] = ECA(2304)
  def forward(self, x):
    return self.model(x)

In [8]:
class SqueezeExcitation(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(SqueezeExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, reduced_dim, 1),
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return x * self.se(x)


# remove the max pooling idk why it is affecting the accuracy of both the cam and the sam
class SAM(nn.Module):
    def __init__(self, bias=True):
        super(SAM, self).__init__()
        self.bias = bias
        self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=7, stride=1, padding=3, dilation=1, bias=self.bias)

    def forward(self, x):
        avg_pool = torch.mean(x, 1, keepdim=True)
        output = self.conv(avg_pool)
        output = torch.sigmoid(output) * x  
        return output


class SESAM(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(SESAM, self).__init__()
        self.se = SqueezeExcitation(in_channels, reduced_dim)
        self.sam = SAM()

    def forward(self, x):
        x = self.se(x)
        x = self.sam(x)
        return x

class EfficientNetB3SESAM(nn.Module):
  def __init__(self, num_classes, stages):
    super(EfficientNetB3SESAM, self).__init__()
    self.model = efficientnet_b3(weights="IMAGENET1K_V1", progress=True)
    self.model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True), 
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)
    )
    for param in self.model.parameters():
        param.requires_grad = False
    for param in self.model.classifier.parameters():
        param.requires_grad = True
    # qse in stage 2
    if 2 in stages:
        self.model.features[1][0].block[1] = SESAM(40,10)
        self.model.features[1][1].block[1] = SESAM(24,6)
    # qse in stage 3
    if 3 in stages:
        self.model.features[2][0].block[2] = SESAM(144,6)
        for i in range(1,3):
          self.model.features[2][1].block[2] = SESAM(192,8)
    # qse in stage 4
    if 4 in stages:
      self.model.features[3][0].block[2] = SESAM(192,8)
      for i in range(1,3):  
        self.model.features[3][i].block[2] = SESAM(288,12)
    # qse in stage 5
    if 5 in stages:
      self.model.features[4][0].block[2] = SESAM(288,12)
      for i in range(1,5):
        self.model.features[4][i].block[2] = SESAM(576,24)
    # qse in stage 6
    if 6 in stages:
      self.model.features[5][0].block[2] = SESAM(576,24)
      for i in range(1,5):
        self.model.features[5][i].block[2] = SESAM(816,34)
    # qse in stage 7
    if 7 in stages:
      self.model.features[6][0].block[2] = SESAM(816,34)
      for i in range(1,6):
        self.model.features[6][i].block[2] = SESAM(1392,58)
    # qse in stage 8
    if 8 in stages:
      self.model.features[7][0].block[2] = SESAM(1392,58)
      self.model.features[7][1].block[2] = SESAM(2304,96)
  def forward(self, x):
    return self.model(x)

In [9]:
def margin_of_error(data, confidence=0.95):
    data = np.array(data)
    n = len(data)
    if n < 2:
        raise ValueError("At least two data points are required to calculate margin of error.")
    
    mean = np.mean(data)
    std_dev = np.std(data, ddof=1)  # Use ddof=1 for sample standard deviation
    z_score = stats.norm.ppf(1 - (1 - confidence) / 2)  # 1.96 for 95% CI
    
    moe = z_score * (std_dev / np.sqrt(n))
    return mean, moe

In [18]:
num_classes = 1000
stages = [2,3,4,5,6,7,8]
model_baseline = EfficientNetB3(num_classes, stages).eval().cuda()
model_qam = EfficientNetB3QAM(num_classes, stages).eval().cuda()
model_ham = EfficientNetB3HAM(num_classes, stages).eval().cuda()
model_cam = EfficientNetB3CAM(num_classes, stages).eval().cuda()
model_eca = EfficientNetB3ECA(num_classes, stages).eval().cuda()
model_sesam = EfficientNetB3SESAM(num_classes, stages).eval().cuda()
models = {"baseline": model_baseline,
         "qam": model_qam,
         "ham": model_ham,
         "cam": model_cam,
         "eca": model_eca,
         "sesam": model_sesam   
}

In [19]:
batch_size=1
input_single = torch.randn(1, 3, 256, 256).cuda()
input_batch = torch.randn(batch_size, 3, 224, 224).cuda()
df = pd.DataFrame(columns=["Model", "FLOPs", "Parameters", "Throughput", "Margin of Error"])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
throughputs = {"baseline":[], "qam":[], "ham":[], "cam":[], "eca":[], "sesam":[]}
for index, (key, value) in enumerate(models.items()):
    flops, params = profile(value, inputs=(input_single,))
    print(flops)
    for i in range(100):
        start = time.time()
        with torch.no_grad():
            output = value(input_batch)
        end = time.time()
        throughputs[key].append(batch_size/(end-start))
    mean, moe = margin_of_error(throughputs[key])
    df.loc[len(df)] = {"Model": key, "FLOPs": flops, "Parameters": params, "Throughput": mean, "Margin of Error":moe}

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
1330719104.0
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.

In [20]:
df

Unnamed: 0,Model,FLOPs,Parameters,Throughput,Margin of Error
0,baseline,1330719000.0,12233232.0,102.421952,0.218486
1,qam,1336346000.0,12233232.0,81.549878,0.43877
2,ham,1343814000.0,12233232.0,63.544629,0.199166
3,cam,1328818000.0,13160479.0,82.972892,0.129246
4,eca,1328962000.0,10356899.0,109.155222,0.607856
5,sesam,1333027000.0,12234482.0,85.150951,0.150591
