# **MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications**

Howard, A. G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., ... & Adam, H. (2017). Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861.

In [None]:
import tensorflow as tf
tf.__version__

'2.4.1'

In [None]:
def DepthwiseSeparableConv2D_BN_ReLU(
    x, 
    i,                     ## just for naming
    down_sampling = False,
    up_filtering  = False,
):
    ## Depthwise separable convolutions with Depthwise and 
    ## Pointwise layers followed by batchnorm and ReLU.

    ## In depthwise convolution, it mainly plays a role of 
    ## reducing the size of an image through strides adjustment.
    x = tf.keras.layers.Conv2D(
        x.shape[-1], 
        kernel_size = 3, 
        strides = 2 if down_sampling else 1, 
        padding = "same", 
        groups = x.shape[-1],
        name = f"Conv_DW_3x3_{i}")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    ## In pointwise convolution, it is mainly responsible for 
    ## doubling the dimension of the output by adjusting the filters.
    x = tf.keras.layers.Conv2D(
        x.shape[-1] * (2 if up_filtering else 1),
        kernel_size = 1, 
        strides = 1, 
        padding = "same",
        name = f"Conv_1x1_{i}")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    return x

In [None]:
IMAGE_SIZE = [224, 224, 3] ## Cropped ImageNet test size.

def MobileNet_224(
    model_name  = "MobileNet",
    input_shape = IMAGE_SIZE,
    num_classes = 1_000,
    alpha       = 1., ## Width Multiplier; typical settings: [1, 0.75, 0.5, 0.25]
):
    """Base MobileNet-224"""
    assert input_shape[0] == input_shape[1]
    rho = input_shape[0] / IMAGE_SIZE[0]

    x = model_input = tf.keras.layers.Input(shape = input_shape, dtype = tf.dtypes.float32)

    ## Entry flow.
    original_filter_size = int(32 * alpha)
    x = tf.keras.layers.Conv2D(original_filter_size, 3, strides = 2,
                               padding = "same", name = "Conv_3x3")(x)

    down_samplings = [False] + [True, False] * 2 + [True] + [False] * 5 + [True, False]
    up_filterings  = [True]  + [True, False] * 2 + [True] + [False] * 5 + [True, False]

    ## Middle flow.
    for i, (down_sampling, up_filtering) in enumerate(zip(down_samplings, up_filterings)):
        x = DepthwiseSeparableConv2D_BN_ReLU(x, i, down_sampling, up_filtering)

    ## Exit flow.
    x = tf.keras.layers.AveragePooling2D((x.shape[1], x.shape[2]))(x)
    x = tf.keras.layers.Dense(num_classes, name = "Fully_Connected")(x)

    model_output = x = tf.keras.layers.Softmax()(x)

    return tf.keras.Model(
        inputs = model_input,
        outputs = model_output,
        name = f"{model_name}_{alpha:.2f}_{rho:.2f}")

In [None]:
tmp = MobileNet_224("tmp", input_shape = [160, 160, 3], alpha = .75)
tmp.summary()

Model: "tmp_0.75_0.71"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
Conv_3x3 (Conv2D)            (None, 80, 80, 24)        672       
_________________________________________________________________
Conv_DW_3x3_0 (Conv2D)       (None, 80, 80, 24)        240       
_________________________________________________________________
batch_normalization_234 (Bat (None, 80, 80, 24)        96        
_________________________________________________________________
re_lu_234 (ReLU)             (None, 80, 80, 24)        0         
_________________________________________________________________
Conv_1x1_0 (Conv2D)          (None, 80, 80, 48)        1200      
_________________________________________________________________
batch_normalization_235 (Bat (None, 80, 80, 48)      

In [None]:
import collections

param_count_dict = collections.OrderedDict({
    "Conv_1x1": 0,
    "Conv_DW_3x3": 0,
    "Conv_3x3": 0,
    "Fully_Connected": 0})

for layer in tmp.layers:
    for key in param_count_dict:
        if key in layer.name:
            param_count_dict[key] += layer.count_params()
            continue

total = sum(param_count_dict.values())
param_count_dict = collections.OrderedDict(
    {key: param_count_dict[key] / total for key in param_count_dict.keys()})

for key, value in param_count_dict.items():
    print(f"{key}: {value * 100:.2f}")

Conv_1x1: 74.52
Conv_DW_3x3: 1.18
Conv_3x3: 0.02
Fully_Connected: 24.28


In [None]:
del tmp