## Lenet-5 Example

In [8]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D

In [None]:
# Imagine we start with an image that is:
# (width, height, colour channels) = (256, 256, 3)

# Formula for new size after convolution/pooling:
# size = [(old-size - kernel + 2*padding)/stride]+1

In [12]:
model = Sequential()

# Pad image to (256+3+3, 256+3+3, 3) = (262,262,4)
model.add(ZeroPadding2D((3, 3), input_shape=(256, 256, 3)))

# A 2D convolution means the same filter is applied to all colour channels
# So each convolution is performed seperately and then summed to give
# final feature map

# size = (262-3)/1 + 1 = 260, so:
# output_shape = (batchsize, width, height, filters/channels)
# output_shape = batchsize, 260, 260, number-of-filters=32
# output_shape = (None, 260, 260, 32)
model.add(Conv2D(32, (3,3), activation='relu'))

# size = (260-3)/1 + 1 = 258
# output_shape = (None, 258, 258, 64)
model.add(Conv2D(64, (3,3), activation='relu'))

# size = (258-2)/2 + 1 = 129
# output_shape = (None, 129, 129, 64) 
# Note that pooling is 2D and not across filters so this remains
# at 64
model.add(MaxPooling2D(pool_size=(2, 2)))

# No change on output-shape (kills neurons)
model.add(Dropout(0.25))

# This doesn't affect batch_size so: 129*129*64=1065024
# output_shape = (None, 1065024)
model.add(Flatten())

# Dense directly goes into output_shape:
# output_shape = (None, 128)

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# output_shape = (None, 10)
model.add(Dense(10, activation='sigmoid'))

In [13]:
# Test our reasoning
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_4 (ZeroPaddin (None, 262, 262, 3)       0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 260, 260, 32)      896       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 258, 258, 64)      18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 129, 129, 64)      0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 129, 129, 64)      0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 1065024)           0         
_________________________________________________________________
dense_7 (Dense)              (None, 128)               136323200 
__________

## Resnet-50 example

In [20]:
from keras.applications.resnet50 import ResNet50
model = ResNet50(weights='imagenet', input_shape=(224,224,3))

In [21]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_4 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
zero_padding2d_8 (ZeroPadding2D) (None, 230, 230, 3)   0           input_4[0][0]                    
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472        zero_padding2d_8[0][0]           
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256         conv1[0][0]                      
___________________________________________________________________________________________

In [None]:
# Looking at first chunk

# Image comes in as (224, 224, 3)
# 2D Padding buffers it to (230, 230, 3)
#x = ZeroPadding2D((3, 3))(img_input)

# size = (230-7)/2 + 1 = 112.5, so:
# output_shape = (batchsize, width, height, filters/channels)
# output_shape = batchsize, 112, 112, number-of-filters=64
# output_shape = (None, 112, 112, 64)
#x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)

# This doesn't affect dimensions
#x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
#x = Activation('relu')(x)

# size = (112-3)/2 + 1 = 55.5, so:
# output_shape = (None, 55, 55, 64)
#x = MaxPooling2D((3, 3), strides=(2, 2))(x)

# If you follow this through to end you will see:
#activation_196 (Activation)      (None, 7, 7, 2048)    0           add_64[0][0]                     
#____________________________________________________________________________________________________
#avg_pool (AveragePooling2D)      (None, 1, 1, 2048)    0           activation_196[0][0]             
#____________________________________________________________________________________________________
#flatten_9 (Flatten)              (None, 2048)          0           avg_pool[0][0]     

In [None]:
# BTW, the deal with the non-integers ... imagine:

# 1D kernel = 3
# 1D stride = 2
# 1D input = (a,b,c,d,e,f)

# Formula for new size = (6-3)/2 + 1 = 2.5
# We say this becomes 2 because

# First convolution: (a,b,c)
# Second convolution: (c,d,e)
# Third convolution; (e,f,none) => chop it off

# So we could say (to be more accurate):
#size = floor([(old-size - kernel + 2*padding)/stride]+1)

```
Taken from here: https://github.com/fchollet/keras/blob/master/keras/applications/resnet50.py

x = ZeroPadding2D((3, 3))(img_input)
x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)

x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

x = AveragePooling2D((7, 7), name='avg_pool')(x)
```