In [None]:
import keras
import datetime
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import h5py

from tensorflow.keras import Model, Sequential

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Input, Activation, Concatenate, Convolution2D, GlobalAveragePooling2D, Layer, Add, MaxPool2D

from keras.callbacks import TensorBoard

from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import plot_model
from tensorflow.keras import regularizers


import warnings
warnings.simplefilter('ignore')

We import the training and validation data

In [None]:
train_dir = '../imagenet2/imagenet2/train'
val_dir = '../imagenet2/imagenet2/val'
batch_size=64
def random_crop_preprocessing(img):
    crop_size = 227
    h, w, _ = img.shape
    top = np.random.randint(0, h - crop_size + 1)
    left = np.random.randint(0, w - crop_size + 1)
    cropped = img[top:top+crop_size, left:left+crop_size, :]
    mean = np.array([104, 117, 123], dtype=np.float32)
    cropped = cropped - mean
    return cropped

train_datagen = ImageDataGenerator(
    rescale=1.,
    horizontal_flip=True,
    preprocessing_function=random_crop_preprocessing
)


def center_crop_preprocessing(img):
    crop_size = 227
    h, w, _ = img.shape
    top = (h - crop_size) // 2
    left = (w - crop_size) // 2
    cropped = img[top:top+crop_size, left:left+crop_size, :]
    mean = np.array([104, 117, 123], dtype=np.float32)
    cropped = cropped - mean
    return cropped

val_datagen = ImageDataGenerator(
    rescale=1.,
    preprocessing_function=center_crop_preprocessing
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator1 = val_datagen.flow_from_directory(
    val_dir,
    target_size=(227, 227),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 9559 images belonging to 10 classes.
Found 3963 images belonging to 10 classes.
Found 3963 images belonging to 10 classes.


We define the squeezenet architechture

In [None]:
one = (1, 1)
two = (2, 2)
three = (3, 3)
five = (5, 5)
seven = (7, 7)
thirteen = (13, 13)

input_shape = (227, 227, 3)
class FireModule(object):

    def __init__(self, layer_number: int, activation: str, kernel_initializer: str) -> None:
        self.layer_number = layer_number
        self.activation = activation
        self.kernel_initializer = kernel_initializer

    def build_module(self, fire_input: Layer) -> Layer:
        global one, three, five

        output_size = 128 * (1 + (self.layer_number//2))

        squeeze_1x1_filters = 16 * (1 + (self.layer_number//2))
        expand_1x1_filters = expand_3x3_filters = output_size//2

        squeeze_1x1 = Conv2D(name=f'fire_{self.layer_number+2}_squeeze_1x1',
            filters=squeeze_1x1_filters, kernel_size=one, strides=1, padding='valid', activation=self.activation,
            kernel_initializer=self.kernel_initializer)(fire_input)
        expand_1x1 = Conv2D(name=f'fire_{self.layer_number+2}_expand_1x1',
            filters=expand_1x1_filters, kernel_size=one, strides=1, padding='valid', activation=self.activation,
            kernel_initializer=self.kernel_initializer)(squeeze_1x1)
        expand_3x3 = Conv2D(name=f'fire_{self.layer_number+2}_expand_3x3',
            filters=expand_3x3_filters, kernel_size=three, strides=1, padding='same', activation=self.activation,
            kernel_initializer=self.kernel_initializer)(squeeze_1x1)

        fire = Concatenate(name=f'fire_{self.layer_number+2}')([expand_1x1, expand_3x3])

        return fire
class SqueezeNet(object):

    def __init__(self, activation: str='relu', kernel_initializer: str='glorot_uniform') -> None:


        self.activation = activation
        self.kernel_initializer = kernel_initializer

    def vanilla_model(self, input_shape: tuple=(224, 224, 3), n_classes: int=1000) -> None:


        inp = Input(shape=input_shape, name='Input')

        conv_1 = Conv2D(name="Conv_1",
            filters=96, kernel_size=seven, strides=2, padding='same', activation=self.activation, kernel_initializer=self.kernel_initializer)(inp)
        maxpool_1 = MaxPool2D(name="MaxPool_1",
            pool_size=three, strides=2)(conv_1)

        fire_2 = FireModule(layer_number=0, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(maxpool_1)
        fire_3 = FireModule(layer_number=1, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_2)
        fire_4 = FireModule(layer_number=2, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_3)

        maxpool_2 = MaxPool2D(name="MaxPool_2",
            pool_size=three, strides=2)(fire_4)

        fire_5 = FireModule(layer_number=3, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(maxpool_2)
        fire_6 = FireModule(layer_number=4, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_5)
        fire_7 = FireModule(layer_number=5, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_6)
        fire_8 = FireModule(layer_number=6, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_7)

        maxpool_3 = MaxPool2D(name="MaxPool_3",
            pool_size=three, strides=2)(fire_8)

        fire_9 = FireModule(layer_number=7, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(maxpool_3)

        dropout = Dropout(0.5, name="Dropout")(fire_9)

        conv_10 = Conv2D(name="Conv_10",
            filters=10, kernel_size=one, strides=1, padding='valid', activation=self.activation, kernel_initializer=self.kernel_initializer)(dropout)
        gap_11 = GlobalAveragePooling2D()(conv_10)
        out = Activation('softmax', name='softmax')(gap_11)
        self.model = Model(inputs=inp, outputs=out)


    def bypass_model(self, input_shape: tuple=(224, 224, 3), n_classes: int=1000) -> None:

        inp = Input(shape=input_shape, name='Input')

        conv_1 = Conv2D(name="Conv_1",
            filters=96, kernel_size=seven, strides=2, padding='same', activation=self.activation, kernel_initializer=self.kernel_initializer)(inp)
        maxpool_1 = MaxPool2D(name="MaxPool_1",
            pool_size=three, strides=2)(conv_1)

        fire_2 = FireModule(layer_number=0, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(maxpool_1)
        fire_3 = FireModule(layer_number=1, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_2)
        bypass_1 = Add(name="Bypass_1")([fire_2, fire_3])
        fire_4 = FireModule(layer_number=2, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(bypass_1)

        maxpool_2 = MaxPool2D(name="MaxPool_2",
            pool_size=three, strides=2)(fire_4)

        fire_5 = FireModule(layer_number=3, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(maxpool_2)
        bypass_2 = Add(name="Bypass_2")([maxpool_2, fire_5])
        fire_6 = FireModule(layer_number=4, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(bypass_2)
        fire_7 = FireModule(layer_number=5, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(fire_6)
        bypass_3 = Add(name="Bypass_3")([fire_6, fire_7])
        fire_8 = FireModule(layer_number=6, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(bypass_3)

        maxpool_3 = MaxPool2D(name="MaxPool_3",
            pool_size=three, strides=2)(fire_8)

        fire_9 = FireModule(layer_number=7, activation=self.activation, kernel_initializer=self.kernel_initializer).build_module(maxpool_3)
        bypass_4 = Add(name="Bypass_4")([maxpool_3, fire_9])

        dropout = Dropout(0.5, name="Dropout")(bypass_4)

        conv_10 = Conv2D(name="Conv_10",
            filters=10, kernel_size=one, strides=1, padding='valid', activation=self.activation, kernel_initializer=self.kernel_initializer)(dropout)
        gap_11 = GlobalAveragePooling2D()(conv_10)
        out = Activation('softmax', name='softmax')(gap_11)

        self.model = Model(inputs=inp, outputs=out)


    def build_model(self, input_shape: tuple=(224, 224, 3), n_classes: int=1000, choice: str='vanilla') -> Model:
        if choice == "vanilla":
            self.vanilla_model(input_shape, n_classes)
        else:
            self.bypass_model(input_shape, n_classes)

        return self.model

We set training parameters

In [None]:
base_lr=0.0003
batch_size=64
steps_per_epoch = train_generator.samples // batch_size
total_epochs = 60
base_lr=0.0003

def poly_lr_scheduler(epoch, lr):
    decay = 1.0 - (epoch / total_epochs)
    new_lr = base_lr * decay
    return new_lr

lr_callback = LearningRateScheduler(poly_lr_scheduler)
tensorboard_callback = TensorBoard(log_dir='./logs')
checkpoint_callback = ModelCheckpoint(
    filepath="squeezenet_checkpoint_epoch_{epoch:02d}.h5",
    save_freq='epoch',
    verbose=1
)

checkpoint_callback = ModelCheckpoint(
    filepath="SqueezeNet_epoch_{epoch:02d}.h5",
    save_freq='epoch',
    verbose=1
)





We train the model

In [None]:
sqnet = SqueezeNet()
model_sq = sqnet.build_model(input_shape=input_shape, n_classes=10, choice="vanilla")
model_sq.compile(optimizer= tf.keras.optimizers.Adam(base_lr),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

sq_history = model_sq.fit(
    train_generator,
    epochs=total_epochs,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    callbacks=[lr_callback, checkpoint_callback, tensorboard_callback],
    verbose=1
)

Epoch 1/60
Epoch 1: saving model to SqueezeNet_epoch_01.h5
Epoch 2/60
Epoch 2: saving model to SqueezeNet_epoch_02.h5
Epoch 3/60
Epoch 3: saving model to SqueezeNet_epoch_03.h5
Epoch 4/60
Epoch 4: saving model to SqueezeNet_epoch_04.h5
Epoch 5/60
Epoch 5: saving model to SqueezeNet_epoch_05.h5
Epoch 6/60
Epoch 6: saving model to SqueezeNet_epoch_06.h5
Epoch 7/60
Epoch 7: saving model to SqueezeNet_epoch_07.h5
Epoch 8/60
Epoch 8: saving model to SqueezeNet_epoch_08.h5
Epoch 9/60
Epoch 9: saving model to SqueezeNet_epoch_09.h5
Epoch 10/60
Epoch 10: saving model to SqueezeNet_epoch_10.h5
Epoch 11/60
Epoch 11: saving model to SqueezeNet_epoch_11.h5
Epoch 12/60
Epoch 12: saving model to SqueezeNet_epoch_12.h5
Epoch 13/60
Epoch 13: saving model to SqueezeNet_epoch_13.h5
Epoch 14/60
Epoch 14: saving model to SqueezeNet_epoch_14.h5
Epoch 15/60
Epoch 15: saving model to SqueezeNet_epoch_15.h5
Epoch 16/60
Epoch 16: saving model to SqueezeNet_epoch_16.h5
Epoch 17/60
Epoch 17: saving model to Sque

In [None]:
model_sq.save_weights('Squeeze_net.h5')

In [None]:
sqnet = SqueezeNet()
model_bp_load = sqnet.build_model(input_shape=input_shape, n_classes=10, choice="vanilla")
model_bp_load.load_weights('Squeeze_net.h5')

We verify the accuracy

In [None]:
val_steps = val_generator1.samples // batch_size

predictions = model_bp_load.predict(val_generator1, steps=val_steps)

true_labels = val_generator1.classes[:val_steps * batch_size]

num_classes = len(val_generator1.class_indices)
true_labels_one_hot = tf.keras.utils.to_categorical(true_labels, num_classes)

top1_metric = tf.keras.metrics.CategoricalAccuracy()
top5_metric = tf.keras.metrics.TopKCategoricalAccuracy(k=5)
top1_metric.update_state(true_labels_one_hot, predictions)
top5_metric.update_state(true_labels_one_hot, predictions)

print("Top-1 Accuracy on validation set:", top1_metric.result().numpy())
print("Top-5 Accuracy on validation set:", top5_metric.result().numpy())

Top-1 Accuracy on validation set: 0.77612704
Top-5 Accuracy on validation set: 0.9707992


In [None]:
model_sq.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input (InputLayer)             [(None, 227, 227, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv_1 (Conv2D)                (None, 114, 114, 96  14208       ['Input[0][0]']                  
                                )                                                                 
                                                                                                  
 MaxPool_1 (MaxPooling2D)       (None, 56, 56, 96)   0           ['Conv_1[0][0]']                 
                                                                                              