# MobileNetV2: Inverted Residuals and Linear Bottlenecks

### Imports

In [1]:
import tensorflow as tf

2025-03-17 14:25:55.724435: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Define the Model

#### Inverted Residual Block

<div align="center">

  <img alt="Bottleneck Block" src="./assets/bottleneck_block.png" width=800 height=250/>
  <br/>
  <figcaption>Figure 1: Bottleneck Block</figcaption>

</div>

In [5]:
def inverted_residual_block(inputs, expansion, n_filters, strides):
    """
    Inverted Residual Block

    Args:
        inputs (tensor): input tensor
        expansion (int): expand ratio
        n_filters (int): number of filters
        strides (int): strides
    """
    in_channels = inputs.shape[-1]
    expanded_channels = in_channels * expansion

    # expansion layer (1x1 conv)
    x = tf.keras.layers.Conv2D(expanded_channels, kernel_size=1, padding="same", use_bias=False)(
        inputs
    )
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU(6.0)(x)

    # depth-wise convolution (3x3)
    x = tf.keras.layers.DepthwiseConv2D(
        kernel_size=(3, 3), padding="same", strides=strides, use_bias=False
    )(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU(6.0)(x)

    # linear projection (1x1 conv)
    x = tf.keras.layers.Conv2D(n_filters, kernel_size=(1, 1), padding="same", use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)

    # residual connection only if stride==1 and input/output channels match
    if strides == 1 and in_channels == n_filters:
        x = tf.keras.layers.Add()([x, x])

    return x


### MobileNetV2

In [3]:
def mobilenet_v2(input_shape, n_classes):
    """
    Args:
        input_shape (tuple): input tensor shape (H, W, C)
        n_classes: number of classes
    """
    inputs = tf.keras.layers.Input(shape=input_shape)

    # initial convolutional layer
    x = tf.keras.layers.Conv2D(
        32, kernel_size=(3, 3), padding="same", strides=(2, 2), use_bias=False
    )(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU(6.0)(x)

    # 1st block
    x = inverted_residual_block(x, expansion=1, n_filters=16, strides=1)

    # rest of the blocks (expansion, n_filters, strides)
    config = [
        (6, 24, 2),
        (6, 32, 2),
        (6, 64, 2),
        (6, 96, 1),
        (6, 160, 2),
        (6, 320, 1),
    ]

    for expansion, n_filters, strides in config:
        x = inverted_residual_block(x, expansion=expansion, n_filters=n_filters, strides=strides)

    # final convolutional layer
    x = tf.keras.layers.Conv2D(1280, kernel_size=(1, 1), padding="same", use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU(6.0)(x)

    # global average pooling and fc
    x = tf.keras.layers.AveragePooling2D(pool_size=(7, 7))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(n_classes, activation="softmax")(x)

    model = tf.keras.Model(inputs=inputs, outputs=x, name="MobileNetV2")

    return model

#### Model Summary

In [6]:
model = mobilenet_v2(input_shape=(224, 224, 3), n_classes=100)
model.summary()