# Summary

- [Self-implementation](#Self-implementation)
- [Comparison with Keras Implementation](#Comparison-with-Keras-Implementation)
- [References](#References)

In [1]:
import numpy as np
from keras.layers import Input, Conv2D, ZeroPadding2D, BatchNormalization, Activation, MaxPool2D, GlobalAveragePooling2D, Flatten, Dense, Add
from keras.models import Model
from keras.preprocessing import image
from keras.utils.data_utils import get_file
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions

INPUT_SIZE = (224, 224, 3)
WEIGHTS_PATH = "https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5'"

Using TensorFlow backend.


# Introduction 

The ResNet-50 is called this way because it has __34 convolutional/fully-connected layers + 16 skip connections__.

<img src="images/resnet.png" width="1200">


# Self-implementation 

### Identity Block 

<img src="images/resnet_idblock2.png" width="800">

<img src="images/resnet_idblock3.png" width="800">

In [2]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
    """O identity_block é o bloco que não tem CONV no atalho"""
    f1, f2, f3 = filters
    bn_axis = 3
    
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(f1, kernel_size=(1, 1), name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    x = Conv2D(f2, kernel_size, padding='same', name=conv_name_base + '2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(f3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = Add()([x, input_tensor])
    x = Activation('relu')(x)
    
    return x

### Convolutional Block 

<img src="images/resnet_convblock.png" width="800">

In [3]:
def convolutional_block(input_tensor, kernel_size, stride, filters, stage, block):
    f1, f2, f3 = filters
    bn_axis = 3
    
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    x = Conv2D(f1, kernel_size=(1, 1), strides=(stride, stride), name=conv_name_base+'2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base+'2a')(x)
    x = Activation('relu')(x)
    
    x = Conv2D(f2, kernel_size=(kernel_size, kernel_size), padding='same', name=conv_name_base+'2b')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base+'2b')(x)
    x = Activation('relu')(x)
    
    x = Conv2D(f3, kernel_size=(1, 1), name=conv_name_base+'2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base+'2c')(x)
    
    x_shortcut = Conv2D(f3, kernel_size=(1, 1), strides=(stride, stride), name=conv_name_base+'1')(input_tensor)
    x_shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base+'1')(x_shortcut)
    
    x = Add()([x, x_shortcut])
    x = Activation('relu')(x)
    
    return x

### ResNet Model 

In [4]:
def myResNet50(input_shape=INPUT_SIZE, classes=1000):
    # input
    x_input = Input(input_shape, name='input_1')
    
    # zero-padding
    x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x_input)
    
    # stage 1
    x = Conv2D(filters=64, kernel_size=(7, 7), strides=(2, 2), name='conv1')(x)
    x = BatchNormalization(axis=3, name='bn_conv1')(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), name='max_pooling2d_1')(x)
    
    # stage 2
    x = convolutional_block(x, kernel_size=3, stride=1, filters=[64, 64, 256], stage=2, block='a')
    x = identity_block(x, kernel_size=3, filters=[64, 64, 256], stage=2, block='b')
    x = identity_block(x, kernel_size=3, filters=[64, 64, 256], stage=2, block='c')
    
    # stage 3
    x = convolutional_block(x, kernel_size=3, stride=2, filters=[128, 128, 512], stage=3, block='a')
    x = identity_block(x, kernel_size=3, filters=[128, 128, 512], stage=3, block='b')
    x = identity_block(x, kernel_size=3, filters=[128, 128, 512], stage=3, block='c')
    x = identity_block(x, kernel_size=3, filters=[128, 128, 512], stage=3, block='d')
    
    # stage 4
    x = convolutional_block(x, kernel_size=3, stride=2, filters=[256, 256, 1024], stage=4, block='a')
    x = identity_block(x, kernel_size=3, filters=[256, 256, 1024], stage=4, block='b')
    x = identity_block(x, kernel_size=3, filters=[256, 256, 1024], stage=4, block='c')
    x = identity_block(x, kernel_size=3, filters=[256, 256, 1024], stage=4, block='d')
    x = identity_block(x, kernel_size=3, filters=[256, 256, 1024], stage=4, block='e')
    x = identity_block(x, kernel_size=3, filters=[256, 256, 1024], stage=4, block='f')

    # stage 5
    x = convolutional_block(x, kernel_size=3, stride=2, filters=[512, 512, 2048], stage=5, block='a')
    x = identity_block(x, kernel_size=3, filters=[512, 512, 2048], stage=5, block='b')
    x = identity_block(x, kernel_size=3, filters=[512, 512, 2048], stage=5, block='c')

    # global average pooling
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    
    # output layer
    x = Dense(units=classes, activation='softmax', name='fc' + str(classes))(x)
    
    model = Model(inputs=x_input, outputs=x, name='myResNet50')
    
    return model

In [5]:
model = myResNet50()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [6]:
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='a7b3fe01876f51b976af0dea6bc144eb')
model.load_weights(weights_path)

In [7]:
img = image.load_img('data/cat.jpeg', target_size=(INPUT_SIZE[0], INPUT_SIZE[1]))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)

preds = model.predict(preprocess_input(x))
print('Predicted:', decode_predictions(preds, top=3)[0])

Predicted: [('n02123597', 'Siamese_cat', 0.9539053), ('n03742115', 'medicine_chest', 0.017175093), ('n04070727', 'refrigerator', 0.01453087)]


# Comparison with Keras Implementation 

In [8]:
model = ResNet50()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [9]:
img = image.load_img('data/cat.jpeg', target_size=(INPUT_SIZE[0], INPUT_SIZE[1]))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)

preds = model.predict(preprocess_input(x))
print('Predicted:', decode_predictions(preds, top=3)[0])

Predicted: [('n02123597', 'Siamese_cat', 0.9539053), ('n03742115', 'medicine_chest', 0.017175093), ('n04070727', 'refrigerator', 0.01453087)]


# References 

- [Original Paper](https://arxiv.org/abs/1512.03385)
- [Keras Implementation](https://github.com/keras-team/keras/blob/master/keras/applications/resnet50.py)