# Understanding Sequential, Functional and Model Subclassing
This notebook is for training and understanding purposes only. All algorithms and credits go to pyimagesearch.com, specifically https://www.pyimagesearch.com/2019/10/28/3-ways-to-create-a-keras-model-with-tensorflow-2-0-sequential-functional-and-model-subclassing/and Adrian Rosebrock (A wonderful source and inspiration for Computer Vision and Deep Learning)

As this notebook is for training and understanding purposes, rather than downloading the source code right away. The code will be typed in order to build "muscle-memory". Author-readable comments will appear from time to time.

With keras, there are three ways to build model. They are 
<br>
1. **Sequential** is the simplest method to build a model - as its name, you add layer after layer. However, it lacks the ability to build complex model (i.e. share output from two layers, concatenating two layers, multiple inputs etc) Ex. LeNet
<br>
2. **Functional** allows exactly such an an application - multiple inputs and multiple outputs, complex architecture. Ex. ResNet
<br>
3. **Model Subclassing** allows even more flexibility, specifically allowing custom forward pass path (i.e. forward pass that skips layer)

In [1]:
# import the necessary packages
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import concatenate

### Sequential

In [None]:
def shallownet_sequential(width, height, depth, classes):
    # initialize the model along with the input shape to be "channels last" ordering
    # can also have the model built in the sequential bracket
    model = Sequential()
    inputShape = (height, width, depth)
    
    # define the first (and only) CONV => RELU layer
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=inputShape))
    model.add(Activation("relu"))
    # softmax classifier
    model.add(Flatten())
    model.add(Dense(classes))
    model.add(Activation("softmax"))
    # return the constructed network architecture
    return model

### Functional

In [2]:
def minigooglenet_function(width, height, depth, classes):
    def conv_module(x, K, kX, kY, stride, chanDim, padding="same"):
        # the input x here refers to previous layer and is added as input as below
        # K = no. of filter
        # kX, kY = shape of filter
        # stride = how many pixels to move the filter
        # chanDim is needed to know which axis to normalize (for image typically accross the batch (based on backend))
        # padding = "same" allows to keep the input dimension as output dimension
        x = Conv2D(K, (kX, kY), strides=stride, padding=padding)(x)
        x = BatchNormalization(axis=chanDim)(x)
        x = Activation("relu")(x)
        
        return x
    
    def inception_module(x, numK1x1, numK3x3, chanDim):
        # the input x here refers to previous layer and is added as input as below
        # numK1x1 = number of 1x1 filter
        # numK1x1 = number of 3x3 filter
        conv_1x1 = conv_module(x, numK1x1, 1, 1, (1,1), chanDim)
        conv_3x3 = conv_module(x, numK3x3, 3, 3, (1,1), chanDim)
        x = concatenate([conv_1x1, conv_3x3], axis=chanDim)
        
        return x
    
    def downsample_module(x, K, chanDim):
        # padding = "valid" with (2,2) stride allows to downsize the input dimension
        conv_3x3 = conv_module(x, K, 3, 3, (2,2), chanDim, padding="valid")
        pool = MaxPooling2D((3,3), strides=(2, 2))(x)
        x = concatenate([conv_3x3, pool], axis=chanDim)
        
        return x
    
    # once we have defined the function, we need to start define the input layer
    # we need to first define an input layer, then we can put the input layer into
    
    inputShape = (height, width, depth)
    chanDim = -1    
    inputs = Input(shape = inputShape)
    
    x = conv_module(inputs, 96, 3, 3, (1,1), chanDim)
    x = inception_module(x, 32, 32, chanDim)
    x = inception_module(x, 32, 32, chanDim)
    x = downsample_module(x, 80, chanDim)
    
    x = inception_module(x, 112, 48, chanDim)
    x = inception_module(x, 96, 64, chanDim)
    x = inception_module(x, 80, 80, chanDim)
    x = inception_module(x, 48, 96, chanDim)
    x = downsample_module(x, 96, chanDim)
    
    x = inception_module(x, 176, 160, chanDim)
    x = inception_module(x, 176, 160, chanDim)
    x = AveragePooling2D((7, 7))(x)
    x = Dropout(0.5)(x)
    
    # softmax classifier
    x = Flatten()(x)
    x = Dense(classes)(x)
    x = Activation("softmax")(x)
    # create the model
    model = Model(inputs, x, name="minigooglenet")
    # return the constructed network architecture
    return model
    
    

### Model Subclassing

In [None]:
# use a class and inheritance method to build complex model
class MiniVGGNetModel(Model):
    def __init__(self, classes, chanDim=-1):
        super(MiniVGGNetModel, self).__init__()
        
        self.conv1A = Conv2D(32,(3,3), padding="same")
        self.act1A = Activation("relu")
        self.bn1A = BatchNormalization(axis=chanDim)
        self.conv1B = Conv2D(32,(3,3), padding="same")
        self.act1B = Activation("relu")
        self.bn1B = BatchNormalization(axis=chanDim)
        self.pool1 = MaxPooling2D(pool_size=(2, 2))
        self.conv2A = Conv2D(32, (3, 3), padding="same")
        self.act2A = Activation("relu")
        self.bn2A = BatchNormalization(axis=chanDim)
        self.conv2B = Conv2D(32, (3, 3), padding="same")
        self.act2B = Activation("relu")
        self.bn2B = BatchNormalization(axis=chanDim)
        self.pool2 = MaxPooling2D(pool_size=(2, 2))
        self.flatten = Flatten()
        self.dense3 = Dense(512)
        self.act3 = Activation("relu")
        self.bn3 = BatchNormalization()
        self.do3 = Dropout(0.5)
        self.dense4 = Dense(classes)
        self.softmax = Activation("softmax")
        
    # important function for forward pass
    # uses functional API method
    def call(self, inputs):
        # build the first (CONV => RELU) * 2 => POOL layer set
        x = self.conv1A(inputs)
        x = self.act1A(x)
        x = self.bn1A(x)
        x = self.conv1B(x)
        x = self.act1B(x)
        x = self.bn1B(x)
        x = self.pool1(x)
        # build the second (CONV => RELU) * 2 => POOL layer set
        x = self.conv2A(x)
        x = self.act2A(x)
        x = self.bn2A(x)
        x = self.conv2B(x)
        x = self.act2B(x)
        x = self.bn2B(x)
        x = self.pool2(x)
        # build our FC layer set
        x = self.flatten(x)
        x = self.dense3(x)
        x = self.act3(x)
        x = self.bn3(x)
        x = self.do3(x)
        # build the softmax classifier
        x = self.dense4(x)
        x = self.softmax(x)
        # return the constructed model
    return x

In [None]:
EPOCHS = 20
LR = 1e-2
BS = 64

# Instantiate model (assume for MNIST dataset)
model = shallownet_sequential(28,28,1,10)
# Introduce an optimizer with no weights decay
opt = Adam(lr=INIT_LR)
# compile everything
model.compile(optimizer = opt, loss = categorical_crossentropy, metrics = ["acc"])
# if building with gradientTape, dont need to fit, since you will have to define the actual forward pass step function
# in the stepping function, you would have to calculate the loss function, how to apply the gradient in each epoch.
# i.e. for num in epoch: model = forwardpass(input_x), loss = , gradient = , apply gradient =
# Won't be able to validate data?
model.fit(trainX, trainY, validation_data = (testX, testY), epochs=EPOCHS, batch_size=BS) # any other custom configuration
# if generator object
model.fit_generator ()