<a href="https://colab.research.google.com/github/aju22/EfficientNet/blob/main/EfficientNet_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import tensorflow as tf
import tensorflow.keras.layers as layers
import math

# Model Architecture

EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients

![](https://production-media.paperswithcode.com/methods/Screen_Shot_2020-06-06_at_10.45.54_PM.png)

In [13]:
base_model = [
    # expand_ratio, channels, repeats, stride, kernel_size
    [1, 16, 1, 1, 3],
    [6, 24, 2, 2, 3],
    [6, 40, 2, 2, 5],
    [6, 80, 3, 2, 3],
    [6, 112, 3, 1, 5],
    [6, 192, 4, 2, 5],
    [6, 320, 1, 1, 3],
]

phi_values = {
    # tuple of: (phi_value, resolution, drop_rate)
    "b0": (0, 224, 0.2),  # alpha, beta, gamma, depth = alpha ** phi
    "b1": (0.5, 240, 0.2),
    "b2": (1, 260, 0.3),
    "b3": (2, 300, 0.3),
    "b4": (3, 380, 0.4),
    "b5": (4, 456, 0.4),
    "b6": (5, 528, 0.5),
    "b7": (6, 600, 0.5),
}

In [14]:
class CNNBlock(layers.Layer):
  def __init__(self, out_channels, groups = 1, **kwargs):
    super().__init__()
    
    self.conv = layers.Conv2D(out_channels, use_bias = False, groups = groups, **kwargs)
    self.bn = layers.BatchNormalization()
    self.silu = layers.Activation(tf.nn.silu)

  def call(self, x, training = False):
    
    x = self.conv(x, training = training)
    x = self.bn(x)
    x = self.silu(x)

    return x

## Squeeze-Excitation Mechanism

With the squeeze-and-excitation block, the neural nets are better able to map the channel dependency along with access to global information. Therefore, they are better able to recalibrate the filter outputs and thus, this leads to performance gains.

![](https://i.ytimg.com/vi/FUiUfD7bdqw/maxresdefault.jpg)

In [15]:
class SqueezeExcitation(layers.Layer):
  def __init__(self, in_channels, reduced_dim):
    super().__init__()

    self.SE = tf.keras.Sequential([   
                                      layers.GlobalAveragePooling2D(data_format='channels_last', keepdims = True),
                                      layers.Conv2D(reduced_dim, kernel_size= 1, padding ='same'),
                                      layers.Activation(tf.nn.silu),
                                      layers.Conv2D(in_channels, kernel_size= 1, padding = 'same'),
                                      layers.Activation(tf.keras.activations.sigmoid)
                                  ])
    

  def call(self, x):

    return x * self.SE(x)    

## Inverted Residual Blocks

An Inverted Residual Block, sometimes called an MBConv Block, is a type of residual block used for image models that uses an inverted structure for efficiency reasons. It was originally proposed for the MobileNetV2 CNN architecture. It has since been reused for several mobile-optimized CNNs.

![](https://miro.medium.com/max/571/1*hq62bMEMgvBgU_f8cW7jbw.png)

In [16]:
class InvertedResidualBlock(layers.Layer):
  def __init__(self, in_channels, out_channels, expand_ratio, **kwargs):
    super().__init__()

    self.survival_prob = 0.8 #survival_prob
    self.use_residual = ((in_channels == out_channels) and (kwargs['strides'] == 1))
    

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.strides = kwargs['strides']


    hidden_dim = in_channels*expand_ratio

    reduced_dim = int(in_channels / 4) #reduction

    self.conv = tf.keras.Sequential([
                                     CNNBlock(hidden_dim, groups = hidden_dim, **kwargs),
                                     SqueezeExcitation(hidden_dim, reduced_dim),
                                     layers.Conv2D(out_channels, strides = 1, kernel_size = 1, padding='same', use_bias = False),
                                     layers.BatchNormalization()                                    
                                    ])
    


  def stochastic_depth(self, x, training):
    
    if not training:
      return x

    binary_tensor = tf.where((tf.random.uniform((x.shape[0], 1, 1, 1)) < self.survival_prob), 0, 1)
    return x * binary_tensor

  def call(self, x, training = False):

    if self.use_residual:

      return self.stochastic_depth(self.conv(x), training = training) + x
    
    else:

      return self.conv(x)

In [17]:
class EfficientNet(tf.keras.Model):
  def __init__(self, version, num_classes):

    super().__init__()
    width_factor, depth_factor, dropout_rate = self.calculate_factors(version)
    last_channels = math.ceil(1280*width_factor)
    self.features = self.create_features(width_factor, depth_factor, last_channels)

    self.pool = layers.GlobalAveragePooling2D(data_format='channels_last', keepdims = True)
    self.flatten = layers.Flatten()
    
    self.classifier = tf.keras.Sequential([
                                           layers.Dropout(dropout_rate),
                                           layers.Dense(num_classes, activation = tf.keras.activations.sigmoid)
                                          ])


  def calculate_factors(self, version, alpha = 1.2, beta = 1.1):

    phi, res, drop_rate = phi_values[version]
    
    depth_factor = alpha**phi
    width_factor = beta**phi

    return width_factor, depth_factor, drop_rate


  def create_features(self, width_factor, depth_factor, last_channels):
    channels = int(32*width_factor)
    features = [CNNBlock(channels, kernel_size = 3, strides = 2, padding ='same')]
    in_channels = channels

    for expand_ratio, channels, repeats, stride, kernel_size in base_model:

      out_channels = 4 * math.ceil(int(channels*width_factor)/4)
      layer_repeats = math.ceil(repeats*depth_factor)

      for layer in range(layer_repeats):

        features.append(
            InvertedResidualBlock(in_channels, 
                                  out_channels, 
                                  expand_ratio = expand_ratio,
                                  strides = stride if layer == 0 else 1, 
                                  kernel_size = kernel_size, padding = 'same' )
                                 )
        
        in_channels = out_channels
      
      features.append(
          CNNBlock(last_channels, kernel_size = 1, strides = 1, padding ='same')
      )

      return tf.keras.Sequential(features)

  def call(self, x):

    x = self.pool(self.features(x))

    x = self.flatten(x)

    x = self.classifier(x)

    return x    

# Model Architecture

In [18]:
version = 'b0'
num_classes = 10

In [19]:
phi, res, drop_rate = phi_values[version]

In [20]:
model = EfficientNet(
    version = version,
    num_classes = num_classes
)

In [21]:
x = tf.random.normal((3, res, res, 3))

In [22]:
print(model(x).shape)

(3, 10)
