In [None]:
import keras
import datetime
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import h5py

from tensorflow.keras import Model, Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Input, Layer, Concatenate, Add, MaxPool2D, GlobalAveragePooling2D, Activation, Convolution2D

from keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers
from tensorflow.keras.utils import plot_model

import warnings
warnings.simplefilter('ignore')


We import the training and validation data

In [None]:
train_dir = '../imagenet2/imagenet2/train'
val_dir = '../imagenet2/imagenet2/val'
batch_size=64


def random_crop_preprocessing(img):
    crop_size = 227
    h, w, _ = img.shape
    top = np.random.randint(0, h - crop_size + 1)
    left = np.random.randint(0, w - crop_size + 1)
    cropped = img[top:top+crop_size, left:left+crop_size, :]
    mean = np.array([104, 117, 123], dtype=np.float32)
    cropped = cropped - mean
    return cropped


train_datagen = ImageDataGenerator(
    rescale=1.,
    horizontal_flip=True,
    preprocessing_function=random_crop_preprocessing
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical'
)

def center_crop_preprocessing(img):
    crop_size = 227
    h, w, _ = img.shape
    top = (h - crop_size) // 2
    left = (w - crop_size) // 2
    cropped = img[top:top+crop_size, left:left+crop_size, :]
    mean = np.array([104, 117, 123], dtype=np.float32)
    cropped = cropped - mean
    return cropped

val_datagen = ImageDataGenerator(
    rescale=1.,
    preprocessing_function=center_crop_preprocessing
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical'
)
val_generator1 = val_datagen.flow_from_directory(
    val_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 9559 images belonging to 10 classes.
Found 3963 images belonging to 10 classes.
Found 3963 images belonging to 10 classes.


We define the squeezenet architecture allowing us to vary the squeeze ratio

In [None]:
one = (1, 1)
three = (3, 3)
seven = (7, 7)

input_shape = (227, 227, 3)
class FireModule(object):

    def __init__(self, layer_number: int, activation: str, kernel_initializer: str,sq=0.125,p3=0.5) -> None:
        self.layer_number = layer_number
        self.activation = activation
        self.kernel_initializer = kernel_initializer
        self.sq=sq
        self.p3=p3

    def build_module(self, fire_input: Layer) -> Layer:
        global one, three

        output_size = 128 * (1 + (self.layer_number//2))

        squeeze_1x1_filters = int(output_size*self.sq)
        expand_1x1_filters = int(output_size*(1-self.p3))
        expand_3x3_filters= int(output_size*self.p3)

        squeeze_1x1 = Conv2D(name=f'fire_{self.layer_number+2}_squeeze_1x1',
            filters=squeeze_1x1_filters, kernel_size=one, strides=1, padding='valid', activation=self.activation,
            kernel_initializer=self.kernel_initializer)(fire_input)
        expand_1x1 = Conv2D(name=f'fire_{self.layer_number+2}_expand_1x1',
            filters=expand_1x1_filters, kernel_size=one, strides=1, padding='valid', activation=self.activation,
            kernel_initializer=self.kernel_initializer)(squeeze_1x1)
        expand_3x3 = Conv2D(name=f'fire_{self.layer_number+2}_expand_3x3',
            filters=expand_3x3_filters, kernel_size=three, strides=1, padding='same', activation=self.activation,
            kernel_initializer=self.kernel_initializer)(squeeze_1x1)

        fire = Concatenate(name=f'fire_{self.layer_number+2}')([expand_1x1, expand_3x3])

        return fire
class SqueezeNet(object):

    def __init__(self, activation: str='relu', kernel_initializer: str='glorot_uniform', squeeze_ratio=0.125, p3=0.5) -> None:
        self.activation = activation
        self.kernel_initializer = kernel_initializer
        self.sq=squeeze_ratio
        self.p3=p3

    def vanilla_model(self, input_shape: tuple=(224, 224, 3), n_classes: int=1000) -> None:
        inp = Input(shape=input_shape, name='Input')

        conv_1 = Conv2D(name="Conv_1",
            filters=96, kernel_size=seven, strides=2, padding='same', activation=self.activation, kernel_initializer=self.kernel_initializer)(inp)
        maxpool_1 = MaxPool2D(name="MaxPool_1",
            pool_size=three, strides=2)(conv_1)


        fire_2 = FireModule(layer_number=0, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(maxpool_1)
        fire_3 = FireModule(layer_number=1, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_2)
        fire_4 = FireModule(layer_number=2, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_3)


        maxpool_2 = MaxPool2D(name="MaxPool_2",
            pool_size=three, strides=2)(fire_4)

        fire_5 = FireModule(layer_number=3, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(maxpool_2)
        fire_6 = FireModule(layer_number=4, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_5)
        fire_7 = FireModule(layer_number=5, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_6)
        fire_8 = FireModule(layer_number=6, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_7)

        maxpool_3 = MaxPool2D(name="MaxPool_3",
            pool_size=three, strides=2)(fire_8)

        fire_9 = FireModule(layer_number=7, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(maxpool_3)

        dropout = Dropout(0.5, name="Dropout")(fire_9)

        conv_10 = Conv2D(name="Conv_10",
            filters=10, kernel_size=one, strides=1, padding='valid', activation=self.activation, kernel_initializer=self.kernel_initializer)(dropout)
        gap_11 = GlobalAveragePooling2D()(conv_10)
        out = Activation('softmax', name='softmax')(gap_11)

        self.model = Model(inputs=inp, outputs=out)


    def bypass_model(self, input_shape: tuple=(224, 224, 3), n_classes: int=1000) -> None:

        inp = Input(shape=input_shape, name='Input')

        conv_1 = Conv2D(name="Conv_1",
            filters=96, kernel_size=seven, strides=2, padding='same', activation=self.activation, kernel_initializer=self.kernel_initializer)(inp)
        maxpool_1 = MaxPool2D(name="MaxPool_1",
            pool_size=three, strides=2)(conv_1)

        fire_2 = FireModule(layer_number=0, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(maxpool_1)
        fire_3 = FireModule(layer_number=1, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_2)
        bypass_1 = Add(name="Bypass_1")([fire_2, fire_3])
        fire_4 = FireModule(layer_number=2, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(bypass_1)

        maxpool_2 = MaxPool2D(name="MaxPool_2",
            pool_size=three, strides=2)(fire_4)

        fire_5 = FireModule(layer_number=3, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(maxpool_2)
        bypass_2 = Add(name="Bypass_2")([maxpool_2, fire_5])
        fire_6 = FireModule(layer_number=4, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(bypass_2)
        fire_7 = FireModule(layer_number=5, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(fire_6)
        bypass_3 = Add(name="Bypass_3")([fire_6, fire_7])
        fire_8 = FireModule(layer_number=6, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(bypass_3)

        maxpool_3 = MaxPool2D(name="MaxPool_3",
            pool_size=three, strides=2)(fire_8)

        fire_9 = FireModule(layer_number=7, activation=self.activation, kernel_initializer=self.kernel_initializer,sq=self.sq,p3=self.p3).build_module(maxpool_3)
        bypass_4 = Add(name="Bypass_4")([maxpool_3, fire_9])

        dropout = Dropout(0.5, name="Dropout")(bypass_4)

        conv_10 = Conv2D(name="Conv_10",
            filters=10, kernel_size=one, strides=1, padding='valid', activation=self.activation, kernel_initializer=self.kernel_initializer)(dropout)
        gap_11 = GlobalAveragePooling2D()(conv_10)
        out = Activation('softmax', name='softmax')(gap_11)

        self.model = Model(inputs=inp, outputs=out)


    def build_model(self, input_shape: tuple=(224, 224, 3), n_classes: int=1000, choice: str='vanilla') -> Model:
        if choice == "vanilla":
            self.vanilla_model(input_shape, n_classes)
        else:
            self.bypass_model(input_shape, n_classes)

        return self.model

We set up the training parameters

In [None]:
base_lr=0.0003
batch_size=64
steps_per_epoch = train_generator.samples // batch_size
total_epochs = 50

def poly_lr_scheduler(epoch, lr):
    decay = 1.0 - (epoch / total_epochs)
    new_lr = base_lr * decay
    return new_lr

lr_callback = LearningRateScheduler(poly_lr_scheduler)
tensorboard_callback = TensorBoard(log_dir='./logs')
checkpoint_callback = ModelCheckpoint(
    filepath="squeezenet_bypass_checkpoint_epoch_{epoch:02d}.h5",
    save_freq='epoch',
    verbose=1
)

We train the models and save the weights

In [None]:
model_sq_ratio={}
sq_ratio_history={}
sr_list=[0.125,0.25,0.5,0.75,1]
for i in range(len(sr_list)):
    r=sr_list[i]
    print('squeeze ratio is', r)
    sqnet = SqueezeNet(squeeze_ratio=r)
    model =sqnet.build_model(input_shape=input_shape, n_classes=10, choice='bypass')
    model.compile(optimizer= tf.keras.optimizers.Adam(base_lr),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

    checkpoint_callback = ModelCheckpoint(
        filepath="SqueezeNet_sq_ratio_{}_epoch_{{epoch:02d}}.h5".format(r),
        save_freq='epoch',
        verbose=1
    )

    training_history = model.fit(
        train_generator,
        epochs=total_epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=val_generator,
        validation_steps=val_generator.samples // batch_size,
        callbacks=[lr_callback, checkpoint_callback, tensorboard_callback],
        verbose=1
    )
    model.save_weights(f'Squeeze_net_ratio_trial_{r}.h5')



    val_steps = val_generator1.samples // batch_size
    predictions = model.predict(val_generator1, steps=val_steps)

    true_labels = val_generator1.classes[:val_steps * batch_size]

    num_classes = len(val_generator1.class_indices)
    true_labels_one_hot = tf.keras.utils.to_categorical(true_labels, num_classes)


    top1_metric = tf.keras.metrics.CategoricalAccuracy()
    top5_metric = tf.keras.metrics.TopKCategoricalAccuracy(k=5)
    top1_metric.update_state(true_labels_one_hot, predictions)
    top5_metric.update_state(true_labels_one_hot, predictions)

    print(f"Top-1 Accuracy of squeezenet_{r}", top1_metric.result().numpy())
    print(f"Top-5 Accuracy of squeezenet_{r}", top5_metric.result().numpy())
    model_sq_ratio[r]=model
    sq_ratio_history[r]=training_history
    print('save model', r)


squeeze ratio is 0.125
Epoch 1/50
Epoch 1: saving model to SqueezeNet_sq_ratio_0.125_epoch_01.h5
Epoch 2/50
Epoch 2: saving model to SqueezeNet_sq_ratio_0.125_epoch_02.h5
Epoch 3/50
Epoch 3: saving model to SqueezeNet_sq_ratio_0.125_epoch_03.h5
Epoch 4/50
Epoch 4: saving model to SqueezeNet_sq_ratio_0.125_epoch_04.h5
Epoch 5/50
Epoch 5: saving model to SqueezeNet_sq_ratio_0.125_epoch_05.h5
Epoch 6/50
Epoch 6: saving model to SqueezeNet_sq_ratio_0.125_epoch_06.h5
Epoch 7/50
Epoch 7: saving model to SqueezeNet_sq_ratio_0.125_epoch_07.h5
Epoch 8/50
Epoch 8: saving model to SqueezeNet_sq_ratio_0.125_epoch_08.h5
Epoch 9/50
Epoch 9: saving model to SqueezeNet_sq_ratio_0.125_epoch_09.h5
Epoch 10/50
Epoch 10: saving model to SqueezeNet_sq_ratio_0.125_epoch_10.h5
Epoch 11/50
Epoch 11: saving model to SqueezeNet_sq_ratio_0.125_epoch_11.h5
Epoch 12/50
Epoch 12: saving model to SqueezeNet_sq_ratio_0.125_epoch_12.h5
Epoch 13/50
Epoch 13: saving model to SqueezeNet_sq_ratio_0.125_epoch_13.h5
Epoch 1

KeyboardInterrupt: 

We verify the accuracy

In [None]:
for r in [0.125,0.25,0.5,0.75]:
    print("\n")
    print('squeeze ratio is', r)
    sqnet=SqueezeNet(squeeze_ratio=r)
    model = sqnet.build_model(input_shape=input_shape, n_classes=10, choice='bypass')
    model.load_weights(f'Squeeze_net_ratio_trial_{r}.h5')
    val_steps = val_generator1.samples // batch_size
    predictions = model.predict(val_generator1, steps=val_steps)

    true_labels = val_generator1.classes[:val_steps * batch_size]

    num_classes = len(val_generator1.class_indices)
    true_labels_one_hot = tf.keras.utils.to_categorical(true_labels, num_classes)


    top1_metric = tf.keras.metrics.CategoricalAccuracy()
    top5_metric = tf.keras.metrics.TopKCategoricalAccuracy(k=5)
    top1_metric.update_state(true_labels_one_hot, predictions)
    top5_metric.update_state(true_labels_one_hot, predictions)

    print(f"Top-1 Accuracy of squeezenet_{r}", top1_metric.result().numpy())
    print(f"Top-5 Accuracy of squeezenet_{r}", top5_metric.result().numpy())




squeeze ratio is 0.125
Top-1 Accuracy of squeezenet_0.125 0.8122439
Top-5 Accuracy of squeezenet_0.125 0.9836066


squeeze ratio is 0.25
Top-1 Accuracy of squeezenet_0.25 0.81813526
Top-5 Accuracy of squeezenet_0.25 0.98181355


squeeze ratio is 0.5
Top-1 Accuracy of squeezenet_0.5 0.8335041
Top-5 Accuracy of squeezenet_0.5 0.9833504


squeeze ratio is 0.75
Top-1 Accuracy of squeezenet_0.75 0.82428277
Top-5 Accuracy of squeezenet_0.75 0.9833504
