In [1]:
##Import the necessary libraries
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import (
    Input,
    DepthwiseConv2D,
    Conv2D,
    BatchNormalization,
    ReLU,
    GlobalAveragePooling2D,
    Flatten,
    Dense,
    Dropout,
    Activation,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model

from keras import backend as K

from keras.models import Model
from keras.layers import (
    DepthwiseConv2D,
    Conv2D,
    BatchNormalization,
    AveragePooling2D,
    Dense,
    Activation,
    Flatten,
    Reshape,
    Add,
    Dropout,
    Input,
)
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import numpy as np


def depthwise_block(inputs, stride, kernel_sz=(3, 3), pad="same"):
    """
    Function that performs the depthwise convolution
    Inputs:
        inputs:    the input shape of the depthwise convolution
        kernel_sz: a tuple that indicates the size of the filtering kernel
        stride:    a tuple that indicates the strides of the kernel
    Return:
        x: the result of the depthwise convolution
    """

    x = DepthwiseConv2D(
        kernel_size=kernel_sz, strides=stride, depth_multiplier=1, padding=pad
    )(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation="relu")(x)

    return x


def pointwise_block(
    inputs,
    num_filters,
    alpha,
    kernel_sz=(1, 1),
    stride=(1, 1),
    pad="same",
    dropout=False,
    dropout_perc=0.1,
):
    """
    Function that performs the pointwise convolution
    Inputs:
        inputs:      the input shape of the depthwise convolution
        num_filters: number of filters to be used in the convolution
        kernel_sz:   a tuple that indicates the size of the filtering kernel
        stride:      a tuple that indicates the strides of the kernel
        dropout: determine if dropout will be performed
        dropout_perc: percentage of neurons that will be set to zero
    Return:
        x: the result of the pointwise convolution
    """

    # Number of filters based on width multiplier reported in the original paper
    n_fil = int(num_filters * alpha)

    x = Conv2D(filters=n_fil, kernel_size=kernel_sz, padding=pad)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation="relu")(x)

    if dropout == True:
        x = Dropout(dropout_perc)(x)

    return x


def MobileNetV1(
    input_shape,
    num_units,
    filters=32,
    kernel_sz=(3, 3),
    stride=(2, 2),
    alp=1,
    ro=1,
    dropout_perc=0.1,
):
    input_shape = (int(input_shape[0] * ro), int(input_shape[1] * ro), input_shape[2])

    inputs = Input(shape=input_shape)

    # Regular convolution
    x = Conv2D(filters=filters, kernel_size=kernel_sz, strides=stride)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation="relu")(x)
    x = Dropout(dropout_perc)(x)

    # First depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=64,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Second depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(2, 2))
    x = pointwise_block(
        x,
        num_filters=128,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Third depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=128,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Fourth depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(2, 2))
    x = pointwise_block(
        x,
        num_filters=256,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Fifth depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=256,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Sixth depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(2, 2))
    x = pointwise_block(
        x,
        num_filters=512,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Seventh depthwise-pointwise block (repeated five times)
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=512,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=512,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=512,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=512,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=512,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Eight depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(2, 2))
    x = pointwise_block(
        x,
        num_filters=1024,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Nineth depthwise-pointwise block
    x = depthwise_block(x, kernel_sz=(3, 3), stride=(1, 1))
    x = pointwise_block(
        x,
        num_filters=1024,
        alpha=alp,
        stride=(1, 1),
        dropout=True,
        dropout_perc=dropout_perc,
    )

    # Pooling layer
    # Pooling size correction due to the resolution multiplier parameter
    pool_size = int(np.round(7 * ro))
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    x = Flatten()(x)

    # Fully connected layer
    x = Dense(units=1024, activation="relu")(x)

    # Softmax layer
    output = Dense(num_units, activation="softmax")(x)

    model = Model(inputs, output)

    return model


### Training on CIFAR-10

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, dtype ="uint8")
y_test = to_categorical(y_test, dtype ="uint8")

Results from various values of alpha

$α = 1$

In [4]:
alpha = 1
ro = 1

model_v1 = MobileNetV1(
    (32,32,3),
    num_units=10,
    alp=alpha,
    ro=ro,
    dropout_perc=0.2)

optimizer = Adam(learning_rate=0.001)

model_v1.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,
    patience=10,
    verbose=1,
    min_lr=0.0001
)

history_train = model_v1.fit(
    x_train,
    y_train, 
    batch_size=64,
    epochs=10, 
    validation_data=(
        x_test, 
        y_test
    )
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


$α = 0.75$

In [6]:
alpha = 0.75
ro = 1

model_v1 = MobileNetV1(
    (32,32,3),
    num_units=10,
    alp=alpha,
    ro=ro,
    dropout_perc=0.2)

optimizer = Adam(learning_rate=0.001)

model_v1.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,
    patience=10,
    verbose=1,
    min_lr=0.0001
)

history_train = model_v1.fit(
    x_train,
    y_train, 
    batch_size=64,
    epochs=10, 
    validation_data=(
        x_test, 
        y_test
    )
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


$α = 0.5$

In [7]:
alpha = 0.5
ro = 1

model_v1 = MobileNetV1(
    (32,32,3),
    num_units=10,
    alp=alpha,
    ro=ro,
    dropout_perc=0.2)

optimizer = Adam(learning_rate=0.001)

model_v1.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,
    patience=10,
    verbose=1,
    min_lr=0.0001
)

history_train = model_v1.fit(
    x_train,
    y_train, 
    batch_size=64,
    epochs=10, 
    validation_data=(
        x_test, 
        y_test
    )
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


$α = 0.25$

In [8]:
alpha = 0.25
ro = 1

model_v1 = MobileNetV1(
    (32,32,3),
    num_units=10,
    alp=alpha,
    ro=ro,
    dropout_perc=0.2)

optimizer = Adam(learning_rate=0.001)

model_v1.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,
    patience=10,
    verbose=1,
    min_lr=0.0001
)

history_train = model_v1.fit(
    x_train,
    y_train, 
    batch_size=64,
    epochs=10, 
    validation_data=(
        x_test, 
        y_test
    )
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
