# MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications

### Imports

In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

2025-03-16 14:29:26.175679: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Prepare the Dataset

In [2]:
# Load CIFAT-10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# Normallize images to [0, 1] range
x_train, x_test = x_train / 255.0, x_test / 255.0

# One-hot encode labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

In [3]:
def preprocess_dataset(image, label):
    image = tf.image.resize(image, (224, 224))
    return image, label

In [None]:
BATCH_SIZE = 64

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(preprocess_dataset)
train_dataset = train_dataset.shuffle(1000, reshuffle_each_iteration=True)
train_dataset = train_dataset.batch(BATCH_SIZE)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.prefetch(-1)

In [None]:
validation_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
validation_dataset = validation_dataset.map(preprocess_dataset)
validation_dataset = validation_dataset.batch(BATCH_SIZE)
validation_dataset = validation_dataset.repeat()

### MobileNetV1

#### Depth-wise Separable Convolution


<div align="center">

  <img alt="Depthwise conv" src="./assets/depthwise_conv.png" width=800 height=250/>
  <br/>
  <figcaption>Figure 1: Depth-wise Separable Convolution</figcaption>

</div>

In [1]:
def depthwise_separable_conv(inputs, filters, alpha, s=1):
    """
    Depth-wise + Point-wise Convolutions

    Args:
        inputs (tensor): input tensor
        filters (int): number of output channels
        alpha (float): scale factor between 0 and 1. for the number of output channels
        s (int): strides
    """
    # depth-wise
    x = tf.keras.layers.DepthwiseConv2D((3, 3), padding="same", strides=(s, s))(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    # point-wise
    x = tf.keras.layers.Conv2D(int(filters * alpha), kernel_size=(1, 1), padding="same")(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    return x

#### MobileNetV1 Model

In [6]:
def mobilenet_v1(input_shape, n_classes, alpha=1.0, rho=1.0):
    """
    Args:
        input_shape (tuple): input shape (H, W, C)
        n_classes (int): number of classes
        alpha (float): with multiplier (scales channels)
        rho (float): resolution factor (scales input size)
    """

    # scale input shape by rho
    input_shape = (int(input_shape[0] * rho), int(input_shape[1] * rho), input_shape[2])
    inputs = tf.keras.layers.Input(shape=input_shape)

    # initial normal convolution
    x = tf.keras.layers.Conv2D(int(32 * alpha), (3, 3), strides=(2, 2), padding="same")(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)

    # depth-wise separable blocks
    x = depthwise_separable_conv(x, 64, alpha, s=1)
    x = depthwise_separable_conv(x, 128, alpha, s=2)
    x = depthwise_separable_conv(x, 128, alpha, s=1)
    x = depthwise_separable_conv(x, 256, alpha, s=2)
    x = depthwise_separable_conv(x, 256, alpha, s=1)
    x = depthwise_separable_conv(x, 512, alpha, s=2)

    for _ in range(5):
        x = depthwise_separable_conv(x, 512, alpha, s=1)

    x = depthwise_separable_conv(x, 1024, alpha, s=2)
    x = depthwise_separable_conv(x, 1024, alpha, s=1)

    # global average pooling and fc
    x = tf.keras.layers.AveragePooling2D(pool_size=(7, 7))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(n_classes, activation="softmax")(x)

    model = tf.keras.Model(inputs=inputs, outputs=x, name="MobileNetV1")

    return model

In [7]:
input_shape = (224, 224, 3)
n_classes = 10

model = mobilenet_v1(input_shape, n_classes, alpha=1.0, rho=1.0)
model.summary()

### Compile the Model

In [8]:
model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

### Train the Model

In [None]:
history = model.fit(

)