<a href="https://colab.research.google.com/github/fuat-arslan/Pytorch_Scratch/blob/main/CV/EfficentNets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://arxiv.org/abs/1905.11946

In [1]:
import torch
import torch.nn as nn
from math import ceil
from collections import namedtuple



Base Network Values

In [12]:
LAYER = namedtuple('Layer', 'expand_ratio channels stride kernel_size num_layers')
bl0 = LAYER(1,16,1,3,1)
bl1 = LAYER(6,24,2,3,2)
bl2 = LAYER(6,40,2,5,2)
bl3 = LAYER(6,80,2,3,3)
bl4 = LAYER(6,112,1,5,3)
bl5 = LAYER(6,192,2,5,4)
bl6 = LAYER(6,320,1,3,1)

base = [bl0, bl1,bl2,bl3,bl4,bl5,bl6]

phi values for scaleing 

In [11]:
ModelType = namedtuple('ModelType', 'phi_value resolution drop_rate')

b0 = ModelType(0,224,0.2)
b1 = ModelType(0.5,240,0.2)
b2 = ModelType(1,260,0.3)
b3 = ModelType(2,300,0.3)
b4 = ModelType(3,380,0.4)
b5 = ModelType(4,456,0.4)
b6 = ModelType(5,528,0.5)
b7 = ModelType(6,600,0.5)

In [17]:
class PrimitiveCNN(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups=1):
        super(PrimitiveCNN,self).__init__()
        self.cnn = nn.Conv2d(
            in_channels, 
            out_channels, 
            kernel_size, 
            stride, 
            padding,
            bias = False,
            groups = groups
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.activation = nn.SiLU() #This is what paper specisfies.
    def forward(self,x):
        return self.activation(self.bn(self.cnn(x)))

In [18]:
class SqueezeExcitation(nn.Module):
    """https://arxiv.org/abs/1709.01507
        This is similar to attention idea. Attenitons to channels.
    """
    def __init__(self, in_channels, reduced_dim):
        super().__init__()
        self.main = nn.Sequential(
            nn.AdaptiveAvgPool2d(1), # C x H x W --> C x 1 x 1
            nn.Conv2d(in_channels, reduced_dim,1), #1x1 kernel to bring it reduced form
            nn.SiLU(),
            nn.Conv2d(reduced_dim, in_channels, 1), #Bring it back to original C size
            nn.Sigmoid()
        )
    def forward(self,x):
        #Multiply with attentions
        return x * self.main(x)

In [8]:
class MBConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, 
                 stride , padding, expand_ratio, reduction = 2, stochastic_depth = 0.8):
        super().__init__()
        self.stochastic_depth = 0.8

        self.residual_avaliable = in_channels == out_channels and stride == 1

        hidden_dim = in_channels * expand_ratio
        
        self.expandable = in_channels != hidden_dim
        reduced_dim = int(in_channels/reduction)

        if self.expandable:
            self.expander = PrimitiveCNN(in_channels, hidden_dim,kernel_size, stride, padding)

        self.main = nn.Sequential(
            PrimitiveCNN(hidden_dim, hidden_dim,
                         kernel_size, stride,
                         padding, groups=hidden_dim), #Depth wise conv

            SqueezeExcitation(hidden_dim, reduced_dim),
            nn.Conv2d(hidden_dim, out_channels, 1, bias = False), #Point wise conv
            nn.BatchNorm2d(out_channels)
        )

    def stochastic_depth_f (self, x):
        if not self.training:
            return x
        
        binary_tensor = torch.rand(x.shape[0],1,1,1, device = x.device) < self.stochastic_depth

        return torch.div(x, self.stochastic_depth) * binary_tensor #from stochastic depth paper ????

    def forward(self, X):
        x = self.expander(X) if self.expandable else X

        if self.residual_avaliable:
            return self.stochastic_depth_f(self.main(x)) + X
        else:
            return self.main(x)
        

In [13]:
class EfficientNet(nn.Module):
    def __init__(self, ver_tuple, num_classes):
        super().__init__()

        depth_multiplier, width__multiplier, drop_rate = self.param_extractor(ver_tuple)
        last_c = ceil(1280*width__multiplier)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.features = self.create_features(depth_multiplier, width__multiplier,last_c)
        self.fc = nn.Sequential(
            nn.Dropout(drop_rate),
            nn.Linear(last_c, num_classes)
        )

    def param_extractor(self, ver_tuple, alpha=1.2, beta=1.1):
        phi, res, drop_rate = ver_tuple
        depth_multiplier = alpha**phi
        width__multiplier = beta ** phi
        return depth_multiplier, width__multiplier, drop_rate

    def create_features(self, depth_multiplier, width__multiplier,last_c):
        chn = int(32*width__multiplier)
        features = []
        features.append(PrimitiveCNN(3, chn, 3, stride=2, padding=1))
        in_channels = chn

        for expand_ratio, channels, stride, kernel_size, num_layers in base:
            out_channels = 4*ceil(int(channels*width__multiplier)/4)
            num_layers_mult = ceil(num_layers* depth_multiplier)

            for layer in range(num_layers_mult):
                features.append(MBConv(in_channels,out_channels,
                                       expand_ratio = expand_ratio,
                                       stride = stride if layer == 0 else 1,
                                       kernel_size = kernel_size,
                                       padding = kernel_size//2
                                       ))
                   
                in_channels = out_channels
            
        features.append(
            PrimitiveCNN(in_channels, last_c, kernel_size = 1, stride = 1, padding=0)
        )

        return nn.Sequential(*features)

    def forward(self,x):
        x = self.pool(self.features(x))
        return self.fc(x.view(x.shape[0],-1))
                

In [14]:
def test():
    device = "cuda" if torch.cuda.is_available() else 'cpu'
    version_tuple = b0
    phi, res, drop_rate = version_tuple
    num_ex = 4
    num_class = 10
    x = torch.randn((num_ex,3,res,res))
    model = EfficientNet(version_tuple,10).to(device)
    print(model(x).shape)

In [19]:
test()

torch.Size([4, 10])
