In [None]:
import torch
import torch.nn as nn
import torchvision.ops as ops
from math import ceil
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter

In [None]:
class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding, stride, groups=1):
        super(conv_block, self).__init__()
        self.conv = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            groups=groups,
            bias=False
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.silu = nn.SiLU()

    def forward(self, x):
        return self.silu(self.bn(self.conv(x)))

In [None]:
class SqueezeEndExcitation(nn.Module):
    def __init__(self, in_channels, reduced_dim):
        super(SqueezeEndExcitation, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return x * self.se(x)

In [None]:
class InvertedResidualBlock(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding,
        expand_ratio,
        reduction=4
    ):
        super(InvertedResidualBlock, self).__init__()
        hidden_dim = in_channels * expand_ratio
        reduced_dim = int(in_channels / reduction)
        self.use_residual = in_channels == out_channels and stride == 1

        self.expand_conv = conv_block(in_channels, hidden_dim, kernel_size=3, padding=1, stride=1)
        self.conv = nn.Sequential(
            conv_block(
                hidden_dim,
                hidden_dim,
                kernel_size,
                padding,
                stride,
                groups=hidden_dim
             ),
             SqueezeEndExcitation(hidden_dim, reduced_dim),
             nn.Conv2d(hidden_dim, out_channels, kernel_size=1, bias=False),
             nn.BatchNorm2d(out_channels)
        )
        self.relu = nn.ReLU()
    
    def forward(self, inputs):
        x = self.expand_conv(inputs)

        if self.use_residual:
            return ops.stochastic_depth(self.conv(x), 0.8, 'batch') + inputs
        else:
            return self.conv(x)



In [None]:
base_model = [
    # expand_ratio, channels, repeats, stride, kernel_size
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3],
]

phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}

class EfficientNet(nn.Module):
    def __init__(self, version, num_classes):
        super(EfficientNet, self).__init__()
        depth_factor, width_factor, dropout_rate = self.calculate_factors(version)
        last_channels = ceil(1280 * width_factor)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.features_extractor = self.create_features(depth_factor, width_factor, last_channels)
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(last_channels, num_classes)
        )

    
    def calculate_factors(self, version, alpha=1.2, beta=1.1):
        phi, resolution, dropout_rate = phi_values[version]
        depth_factor = alpha**phi
        width_factor = beta**phi
        return depth_factor, width_factor, dropout_rate
    
    def create_features(self, depth_factor, width_factor, last_channels):
        channels = ceil(32 * width_factor)
        features = [conv_block(3, channels, kernel_size=3, padding=1, stride=2)]

        in_channels = channels

        for expand_ratio, channels, repeats, stride, kernel_size in base_model:
            out_channels = ceil(channels * width_factor)
            layers_repeats = ceil(repeats * depth_factor)
            
            for layer in range(layers_repeats):
                features += [InvertedResidualBlock(
                    in_channels,
                    out_channels,
                    kernel_size,
                    stride=stride if layer == 0 else 1,
                    padding=kernel_size // 2,
                    expand_ratio=expand_ratio
                )]
                in_channels = out_channels
        
        features += [conv_block(in_channels, last_channels, kernel_size=1, padding=0, stride=1)]
        
        return nn.Sequential(*features)
    
    def forward(self, x):
        x = self.features_extractor(x)
        x = self.avgpool(x)

        x = x.reshape(x.shape[0], -1)
        
        out = self.classifier(x)
        return out


In [None]:

device = "cuda" if torch.cuda.is_available() else "cpu"
version = "b7"
phi, res, drop_rate = phi_values[version]
num_examples, num_classes = 4, 10
x = torch.randn((num_examples, 3, res, res)).to(device)
model = EfficientNet(
    version=version,
        num_classes=num_classes,
).to(device)
writer = SummaryWriter()
writer.add_graph(model, x)

summary(model, (3, res, res))

print(model(x).shape)  # (num_examples, num_classes)

In [None]:
del model
torch.cuda.empty_cache()