In [None]:
import keras
import datetime
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import h5py

from tensorflow.keras import Model, Sequential

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Input, Activation, Concatenate, Convolution2D, GlobalAveragePooling2D, Layer, Add, MaxPool2D

from keras.callbacks import TensorBoard

from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import plot_model
from tensorflow.keras import regularizers

import warnings
warnings.simplefilter('ignore')


We import the training and validation data

In [None]:
train_dir = '../imagenet2/imagenet2/train'
val_dir = '../imagenet2/imagenet2/val'

batch_size=64
def random_crop_preprocessing(img):
    crop_size = 227
    h, w, _ = img.shape
    top = np.random.randint(0, h - crop_size + 1)
    left = np.random.randint(0, w - crop_size + 1)
    cropped = img[top:top+crop_size, left:left+crop_size, :]
    mean = np.array([104, 117, 123], dtype=np.float32)
    cropped = cropped - mean
    return cropped

train_datagen = ImageDataGenerator(
    rescale=1.,
    horizontal_flip=True,
    preprocessing_function=random_crop_preprocessing
)


def center_crop_preprocessing(img):
    crop_size = 227
    h, w, _ = img.shape
    top = (h - crop_size) // 2
    left = (w - crop_size) // 2
    cropped = img[top:top+crop_size, left:left+crop_size, :]
    mean = np.array([104, 117, 123], dtype=np.float32)
    cropped = cropped - mean
    return cropped

val_datagen = ImageDataGenerator(
    rescale=1.,
    preprocessing_function=center_crop_preprocessing
)


train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator1 = val_datagen.flow_from_directory(
    val_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 9559 images belonging to 10 classes.
Found 3963 images belonging to 10 classes.
Found 3963 images belonging to 10 classes.


We define the Squeezenet class with complex bypass

In [None]:
ONE   = (1,1)
THREE = (3,3)
SEVEN = (7,7)


def fire_module(x, squeeze_filters, expand_filters, layer_name_prefix="fire"):

    sq = Conv2D(squeeze_filters, ONE, padding='valid',
                activation='relu', name=f"{layer_name_prefix}_squeeze")(x)

    expand_1x1 = Conv2D(expand_filters//2, ONE, padding='valid',
                        activation='relu', name=f"{layer_name_prefix}_expand1x1")(sq)
    expand_3x3 = Conv2D(expand_filters//2, THREE, padding='same',
                        activation='relu', name=f"{layer_name_prefix}_expand3x3")(sq)

    c = Concatenate(name=f"{layer_name_prefix}_concat")([expand_1x1, expand_3x3])
    return c

def conv1x1_skip(source, out_channels, name="skip_1x1"):
    return Conv2D(out_channels, kernel_size=ONE, strides=(1,1),
                  padding='valid', activation=None,
                  name=name)(source)

def add_bypass(source, target, force_1x1=False, out_channels=None, name="bypass"):
    s_shape = source.shape
    t_shape = target.shape
    if force_1x1:
        if out_channels is None:
            out_channels = t_shape[-1]
        s_conv = conv1x1_skip(source, out_channels=out_channels, name=f"{name}_1x1")
        return Add(name=name)([s_conv, target])
    else:
        if s_shape == t_shape:
            return Add(name=name)([source, target])
        else:
            outc = t_shape[-1]
            s_conv = conv1x1_skip(source, out_channels=outc, name=f"{name}_1x1auto")
            return Add(name=name)([s_conv, target])

def build_squeezenet_complex_bypass(input_shape=(224,224,3), n_classes=10):

    inp = Input(shape=input_shape, name="input")

    x = Conv2D(96, SEVEN, strides=2, padding='same', activation='relu', name='conv1')(inp)
    mp1 = MaxPooling2D(pool_size=THREE, strides=2, name='maxpool1')(x)

    f2 = fire_module(mp1, squeeze_filters=16, expand_filters=128, layer_name_prefix="fire2")

    skip_1_3 = add_bypass(mp1, f2, force_1x1=True, out_channels=128, name="bypass_mp1_f3")
    f3 = fire_module(skip_1_3, squeeze_filters=16, expand_filters=128, layer_name_prefix="fire3")

    skip_2_4 = add_bypass(f2, f3, force_1x1=False, name="bypass_f2_f4_in")
    f4 = fire_module(skip_2_4, squeeze_filters=32, expand_filters=256, layer_name_prefix="fire4")

    skip_3_mp2 = add_bypass(f3, f4, force_1x1=True, out_channels=256, name="bypass_f3_mp2")
    mp2 = MaxPooling2D(pool_size=THREE, strides=2, name='maxpool2')(skip_3_mp2)

    f5 = fire_module(mp2, squeeze_filters=32, expand_filters=256, layer_name_prefix="fire5")

    skip_mp2_6 = add_bypass(mp2, f5, force_1x1=False, name="bypass_mp2_f6_in")
    f6 = fire_module(skip_mp2_6, squeeze_filters=48, expand_filters=384, layer_name_prefix="fire6")

    skip_5_7 = add_bypass(f5, f6, force_1x1=True, out_channels=384, name="bypass_f5_f7_in")
    f7 = fire_module(skip_5_7, squeeze_filters=48, expand_filters=384, layer_name_prefix="fire7")

    skip_6_8 = add_bypass(f6, f7, force_1x1=False, name="bypass_f6_f8_in")
    f8 = fire_module(skip_6_8, squeeze_filters=64, expand_filters=512, layer_name_prefix="fire8")

    skip_7_mp3 = add_bypass(f7, f8, force_1x1=True, out_channels=512, name="bypass_f7_mp3")
    mp3 = MaxPooling2D(pool_size=THREE, strides=2, name='maxpool3')(skip_7_mp3)

    f9 = fire_module(mp3, squeeze_filters=64, expand_filters=512, layer_name_prefix="fire9")

    skip_mp3_10 = add_bypass(mp3, f9, force_1x1=False, name="bypass_mp3_conv10_in")

    drop = Dropout(0.5, name="dropout")(skip_mp3_10)

    conv10 = Conv2D(n_classes, ONE, padding='valid', activation='relu',
                    name='conv10')(drop)
    gap = GlobalAveragePooling2D(name='global_avgpool')(conv10)
    out = Activation('softmax', name='softmax')(gap)

    model = Model(inputs=inp, outputs=out, name="SqueezeNet_ComplexBypass_Figure2")
    return model




We define the training parameters

In [None]:
base_lr=0.0003
batch_size=64
steps_per_epoch = train_generator.samples // batch_size
total_epochs = 50

def poly_lr_scheduler(epoch, lr):
    decay = 1.0 - (epoch / total_epochs)
    new_lr = base_lr * decay
    return new_lr

lr_callback = LearningRateScheduler(poly_lr_scheduler)
tensorboard_callback = TensorBoard(log_dir='./logs')
checkpoint_callback = ModelCheckpoint(
    filepath="squeezenet_bypass_checkpoint_epoch_{epoch:02d}.h5",
    save_freq='epoch',
    verbose=1
)

checkpoint_callback = ModelCheckpoint(
    filepath="SqueezeNet_bypass_epoch_{epoch:02d}.h5",
    save_freq='epoch',
    verbose=1
)

In [None]:
model_complex = build_squeezenet_complex_bypass(input_shape=(224,224,3), n_classes=10)
model_complex.summary()

Model: "SqueezeNet_ComplexBypass_Figure2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1 (Conv2D)                 (None, 112, 112, 96  14208       ['input[0][0]']                  
                                )                                                                 
                                                                                                  
 maxpool1 (MaxPooling2D)        (None, 55, 55, 96)   0           ['conv1[0][0]']                  
                                                                   

We train the Squeezenet model with complex bypass

In [None]:

model_complex.compile(optimizer= tf.keras.optimizers.Adam(base_lr),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

sq_bypass_history = model_complex.fit(
    train_generator,
    epochs=total_epochs,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    callbacks=[lr_callback, checkpoint_callback, tensorboard_callback],
    verbose=1
)

Epoch 1/50
Epoch 1: saving model to SqueezeNet_bypass_epoch_01.h5
Epoch 2/50
Epoch 2: saving model to SqueezeNet_bypass_epoch_02.h5
Epoch 3/50
Epoch 3: saving model to SqueezeNet_bypass_epoch_03.h5
Epoch 4/50
Epoch 4: saving model to SqueezeNet_bypass_epoch_04.h5
Epoch 5/50
Epoch 5: saving model to SqueezeNet_bypass_epoch_05.h5
Epoch 6/50
Epoch 6: saving model to SqueezeNet_bypass_epoch_06.h5
Epoch 7/50
Epoch 7: saving model to SqueezeNet_bypass_epoch_07.h5
Epoch 8/50
Epoch 8: saving model to SqueezeNet_bypass_epoch_08.h5
Epoch 9/50
Epoch 9: saving model to SqueezeNet_bypass_epoch_09.h5
Epoch 10/50
Epoch 10: saving model to SqueezeNet_bypass_epoch_10.h5
Epoch 11/50
Epoch 11: saving model to SqueezeNet_bypass_epoch_11.h5
Epoch 12/50
Epoch 12: saving model to SqueezeNet_bypass_epoch_12.h5
Epoch 13/50
Epoch 13: saving model to SqueezeNet_bypass_epoch_13.h5
Epoch 14/50
Epoch 14: saving model to SqueezeNet_bypass_epoch_14.h5
Epoch 15/50
Epoch 15: saving model to SqueezeNet_bypass_epoch_15.h

In [None]:
model_complex.summary()

Model: "SqueezeNet_ComplexBypass_Figure2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1 (Conv2D)                 (None, 112, 112, 96  14208       ['input[0][0]']                  
                                )                                                                 
                                                                                                  
 maxpool1 (MaxPooling2D)        (None, 55, 55, 96)   0           ['conv1[0][0]']                  
                                                                   

In [None]:
model_complex.save_weights('Squeezenet_complex_bypass.h5')

In [None]:
model_complex_load = build_squeezenet_complex_bypass(input_shape=(224,224,3), n_classes=10)
model_complex_load.load_weights('Squeezenet_complex_bypass.h5')

We verify the accuracy

In [None]:
val_steps = val_generator1.samples // batch_size

predictions = model_complex_load.predict(val_generator1, steps=val_steps)

true_labels = val_generator1.classes[:val_steps * batch_size]

num_classes = len(val_generator1.class_indices)
true_labels_one_hot = tf.keras.utils.to_categorical(true_labels, num_classes)

top1_metric = tf.keras.metrics.CategoricalAccuracy()
top5_metric = tf.keras.metrics.TopKCategoricalAccuracy(k=5)
top1_metric.update_state(true_labels_one_hot, predictions)
top5_metric.update_state(true_labels_one_hot, predictions)

print("Top-1 Accuracy on validation set:", top1_metric.result().numpy())
print("Top-5 Accuracy on validation set:", top5_metric.result().numpy())

Top-1 Accuracy on validation set: 0.8145492
Top-5 Accuracy on validation set: 0.977459
