Reference: https://d2l.ai/chapter_convolutional-modern/googlenet.html

<img src="images/inception_block.png">

In [11]:
import tensorflow as tf

# input.shape: (b, w, h, c)
# output.shape: (b, w, h, c1+c2[1]+c3[1]+c4)

class Inception(tf.keras.Model):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self, c1, c2, c3, c4):
        super().__init__()
        self.p1_1 = tf.keras.layers.Conv2D(c1, 1, activation='relu')
        self.p2_1 = tf.keras.layers.Conv2D(c2[0], 1, activation='relu')
        self.p2_2 = tf.keras.layers.Conv2D(c2[1], 3, padding='same', activation='relu')
        self.p3_1 = tf.keras.layers.Conv2D(c3[0], 1, activation='relu')
        self.p3_2 = tf.keras.layers.Conv2D(c3[1], 5, padding='same', activation='relu')
        self.p4_1 = tf.keras.layers.MaxPool2D(3, 1, padding='same')
        self.p4_2 = tf.keras.layers.Conv2D(c4, 1, activation='relu')

    def call(self, x):                # x.shape: (b, w, h, c)
        p1 = self.p1_1(x)             # p1.shape: (b, w, h, c1)
        p2 = self.p2_2(self.p2_1(x))  # p2_1(x).shape: (b, w, h, c2[0]), p2.shape: (b, w, h, c2[1])
        p3 = self.p3_2(self.p3_1(x))  # p3_1(x).shape: (b, w, h, c3[0]), p3.shape: (b, w, h, c3[1])
        p4 = self.p4_2(self.p4_1(x))  # p4_1(x).shape: (b, w, h, 3), p4.shape: (b, w, h, c4)
        return tf.keras.layers.Concatenate()([p1, p2, p3, p4])  # output shape: (b, w, h, c1+c2[1]+c3[1]+c4)

<img src="images/googlenet.png">

In [23]:
def b1():
    return tf.keras.models.Sequential([                                                # input shape: (b, w, h, c)
        tf.keras.layers.Conv2D(64, 7, strides=2, padding='same', activation='relu'),   # shape: (b, w/2, h/2, 64)
        tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])            # shape: (b, w/4, h/4, 64)

def b2():
    return tf.keras.Sequential([                                             # input shape: (b, w, h, c)
        tf.keras.layers.Conv2D(64, 1, activation='relu'),                    # shape: (b, w, h, 64)
        tf.keras.layers.Conv2D(192, 3, padding='same', activation='relu'),   # shape: (b, w, h, 192)
        tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])  # shape: (b, w/2, h/2, 192)

def b3():
    return tf.keras.models.Sequential([                                      # input shape: (b, w, h, c)
        Inception(64, (96, 128), (16, 32), 32),                              # shape: (b, w, h, 64+128+32+32=256)
        Inception(128, (128, 192), (32, 96), 64),                            # shape: (b, w, h, 128+192+96+64=480)
        tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])  # shape: (b, w/2, h/2, 128+192+96+64=480)

def b4():
    return tf.keras.Sequential([                        # input shape: (b, w, h, c)
        Inception(192, (96, 208), (16, 48), 64),        # shape: (b, w, h, 192+208+48+64=512)
        Inception(160, (112, 224), (24, 64), 64),       # shape: (b, w, h, 160+224+64+64=512)
        Inception(128, (128, 256), (24, 64), 64),       # shape: (b, w, h, 128+256+64+64=512)
        Inception(112, (144, 288), (32, 64), 64),       # shape: (b, w, h, 112+288+64+64=528)
        Inception(256, (160, 320), (32, 128), 128),     # shape: (b, w, h, 256+320+128+128=832)
        tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])    # shape: (b, w/2, h/2, 832)

def b5():
    return tf.keras.Sequential([                        # input shape: (b, w, h, c)
        Inception(256, (160, 320), (32, 128), 128),     # shape: (b, w, h, 256+320+128+128=832)
        Inception(384, (192, 384), (48, 128), 128),     # shape: (b, w, h, 384+384+128+128=1024)
        tf.keras.layers.GlobalAvgPool2D(),              # shape: (b, 1024)
        #tf.keras.layers.Flatten()                       # shape: (b, 1024); I don't think it is needed
    ])

# Recall that this has to be a function that will be passed to
# `d2l.train_ch6()` so that model building/compiling need to be within
# `strategy.scope()` in order to utilize the CPU/GPU devices that we have
def net():
    return tf.keras.Sequential([b1(), b2(), b3(), b4(), b5(), tf.keras.layers.Dense(10)])

In [24]:
X = tf.random.uniform(shape=(5, 96, 96, 3))
for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

Sequential output shape:	 (5, 24, 24, 64)
Sequential output shape:	 (5, 12, 12, 192)
Sequential output shape:	 (5, 6, 6, 480)
Sequential output shape:	 (5, 3, 3, 832)
Sequential output shape:	 (5, 1024)
Dense output shape:	 (5, 10)
