In [2]:
from tensorflow.python.keras import backend
from tensorflow.python.keras.applications import imagenet_utils
from tensorflow.python.keras.engine import training
from tensorflow.python.keras.layers import VersionAwareLayers
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.python.keras.utils import data_utils
from tensorflow.python.keras.utils import layer_utils
from tensorflow.python.lib.io import file_io
from tensorflow.python.util.tf_export import keras_export
from tensorflow.keras import datasets
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np

In [3]:
(x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()
x_train = tf.keras.applications.resnet50.preprocess_input(x_train.astype(np.float32))
x_test = tf.keras.applications.resnet50.preprocess_input(x_test.astype(np.float32))

In [99]:
def eval_model(model, x_train, y_train, x_test, y_test):
    print('train data\n', model.evaluate(x_train, y_train), '\n')
    print('test data\n', model.evaluate(x_test, y_test))

In [67]:
# model.save_weights('resnet.h5')

In [71]:
model.load_weights('checkpoints1/')

In [71]:
layers = None


def ResNet(stack_fn,
           preact,
           use_bias,
           model_name='resnet',
           include_top=True,
           weights='imagenet',
           input_tensor=None,
           input_shape=None,
           pooling=None,
           classes=1000,
           classifier_activation='softmax',
           **kwargs):
    """Instantiates the ResNet, ResNetV2, and ResNeXt architecture.
    Args:
      stack_fn: a function that returns output tensor for the
        stacked residual blocks.
      preact: whether to use pre-activation or not
        (True for ResNetV2, False for ResNet and ResNeXt).
      use_bias: whether to use biases for convolutional layers or not
        (True for ResNet and ResNetV2, False for ResNeXt).
      model_name: string, model name.
      include_top: whether to include the fully-connected
        layer at the top of the network.
      weights: one of `None` (random initialization),
        'imagenet' (pre-training on ImageNet),
        or the path to the weights file to be loaded.
      input_tensor: optional Keras tensor
        (i.e. output of `layers.Input()`)
        to use as image input for the model.
      input_shape: optional shape tuple, only to be specified
        if `include_top` is False (otherwise the input shape
        has to be `(224, 224, 3)` (with `channels_last` data format)
        or `(3, 224, 224)` (with `channels_first` data format).
        It should have exactly 3 inputs channels.
      pooling: optional pooling mode for feature extraction
        when `include_top` is `False`.
        - `None` means that the output of the model will be
            the 4D tensor output of the
            last convolutional layer.
        - `avg` means that global average pooling
            will be applied to the output of the
            last convolutional layer, and thus
            the output of the model will be a 2D tensor.
        - `max` means that global max pooling will
            be applied.
      classes: optional number of classes to classify images
        into, only to be specified if `include_top` is True, and
        if no `weights` argument is specified.
      classifier_activation: A `str` or callable. The activation function to use
        on the "top" layer. Ignored unless `include_top=True`. Set
        `classifier_activation=None` to return the logits of the "top" layer.
        When loading pretrained weights, `classifier_activation` can only
        be `None` or `"softmax"`.
      **kwargs: For backwards compatibility only.
    Returns:
      A `keras.Model` instance.
    """
    global layers
    if 'layers' in kwargs:
        layers = kwargs.pop('layers')
    else:
        layers = VersionAwareLayers()

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    x = layers.ZeroPadding2D(
        padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
    x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)

    if not preact:
        x = layers.BatchNormalization(
            axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x)
        x = layers.Activation('relu', name='conv1_relu')(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
    x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)

    x = stack_fn(x)

    if preact:
        x = layers.BatchNormalization(
            axis=bn_axis, epsilon=1.001e-5, name='post_bn')(x)
        x = layers.Activation('relu', name='post_relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes, activation=classifier_activation,
                         name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    return model


def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
    bn_axis = 3

    if conv_shortcut:
        shortcut = layers.Conv2D(
            4 * filters, 1, strides=stride, name=name + '_0_conv')(x)
        shortcut = layers.BatchNormalization(
            axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut)
    else:
        shortcut = x

    x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x)
    x = layers.Activation('relu', name=name + '_1_relu')(x)

    # Conv2D 3x3
    x = layers.Conv2D(
        filters, kernel_size, padding='SAME', name=name + '_2_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x)
    x = layers.Activation('relu', name=name + '_2_relu')(x)

    x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x)

    x = layers.Add(name=name + '_add')([shortcut, x])
    x = layers.Activation('relu', name=name + '_out')(x)
    return x


def stack1(x, filters, blocks, stride1=2, name=None):
    """A set of stacked residual blocks.
    Args:
      x: input tensor.
      filters: integer, filters of the bottleneck layer in a block.
      blocks: integer, blocks in the stacked blocks.
      stride1: default 2, stride of the first layer in the first block.
      name: string, stack label.
    Returns:
      Output tensor for the stacked blocks.
    """
    x = block1(x, filters, stride=stride1, name=name + '_block1')
    for i in range(2, blocks + 1):
        x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i))
    return x


def ResNet50(include_top=True,
             weights='imagenet',
             input_tensor=None,
             input_shape=None,
             pooling=None,
             classes=1000,
             **kwargs):
    """Instantiates the ResNet50 architecture."""

    def stack_fn(x):
        x = stack1(x, 64, 3, stride1=1, name='conv2')
        x = stack1(x, 128, 4, name='conv3')
        x = stack1(x, 256, 6, name='conv4')
        return stack1(x, 512, 3, name='conv5')

    return ResNet(stack_fn, False, True, 'resnet50', include_top, weights,
                  input_tensor, input_shape, pooling, classes, **kwargs)



In [97]:
layers = None


def ResNet(stack_fn,
           preact,
           use_bias,
           model_name='resnet',
           include_top=True,
           weights='imagenet',
           input_tensor=None,
           input_shape=None,
           pooling=None,
           classes=1000,
           classifier_activation='softmax',
           **kwargs):
    """Instantiates the ResNet, ResNetV2, and ResNeXt architecture.
    Args:
      stack_fn: a function that returns output tensor for the
        stacked residual blocks.
      preact: whether to use pre-activation or not
        (True for ResNetV2, False for ResNet and ResNeXt).
      use_bias: whether to use biases for convolutional layers or not
        (True for ResNet and ResNetV2, False for ResNeXt).
      model_name: string, model name.
      include_top: whether to include the fully-connected
        layer at the top of the network.
      weights: one of `None` (random initialization),
        'imagenet' (pre-training on ImageNet),
        or the path to the weights file to be loaded.
      input_tensor: optional Keras tensor
        (i.e. output of `layers.Input()`)
        to use as image input for the model.
      input_shape: optional shape tuple, only to be specified
        if `include_top` is False (otherwise the input shape
        has to be `(224, 224, 3)` (with `channels_last` data format)
        or `(3, 224, 224)` (with `channels_first` data format).
        It should have exactly 3 inputs channels.
      pooling: optional pooling mode for feature extraction
        when `include_top` is `False`.
        - `None` means that the output of the model will be
            the 4D tensor output of the
            last convolutional layer.
        - `avg` means that global average pooling
            will be applied to the output of the
            last convolutional layer, and thus
            the output of the model will be a 2D tensor.
        - `max` means that global max pooling will
            be applied.
      classes: optional number of classes to classify images
        into, only to be specified if `include_top` is True, and
        if no `weights` argument is specified.
      classifier_activation: A `str` or callable. The activation function to use
        on the "top" layer. Ignored unless `include_top=True`. Set
        `classifier_activation=None` to return the logits of the "top" layer.
        When loading pretrained weights, `classifier_activation` can only
        be `None` or `"softmax"`.
      **kwargs: For backwards compatibility only.
    Returns:
      A `keras.Model` instance.
    """
    global layers
    if 'layers' in kwargs:
        layers = kwargs.pop('layers')
    else:
        layers = VersionAwareLayers()

    img_input = layers.Input(shape=input_shape)

    bn_axis = 3

    x = layers.ZeroPadding2D(
        padding=((3, 3), (3, 3)), name='conv1_pad')(img_input)
    x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)

    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x)
    x = layers.Activation('relu', name='conv1_relu')(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
    x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)

    x = stack_fn(x)

    inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    return model


def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
    bn_axis = 3

    if conv_shortcut:
        shortcut = layers.Conv2D(
            4 * filters, 1, strides=stride, name=name + '_0_conv')(x)
        shortcut = layers.BatchNormalization(
            axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut)
    else:
        shortcut = x

    x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x)
    x = layers.Activation('relu', name=name + '_1_relu')(x)

    # Conv2D 3x3
    x = layers.Conv2D(
        filters, kernel_size, padding='SAME', name=name + '_2_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x)
    x = layers.Activation('relu', name=name + '_2_relu')(x)

    x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x)

    x = layers.Add(name=name + '_add')([shortcut, x])
    x = layers.Activation('relu', name=name + '_out')(x)
    return x


def stack1(x, filters, blocks, stride1=2, name=None):
    """A set of stacked residual blocks.
    Args:
      x: input tensor.
      filters: integer, filters of the bottleneck layer in a block.
      blocks: integer, blocks in the stacked blocks.
      stride1: default 2, stride of the first layer in the first block.
      name: string, stack label.
    Returns:
      Output tensor for the stacked blocks.
    """
    x = block1(x, filters, stride=stride1, name=name + '_block1')
    for i in range(2, blocks + 1):
        x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i))
    return x


def ResNet50(include_top=True,
             weights='imagenet',
             input_tensor=None,
             input_shape=None,
             pooling=None,
             classes=1000,
             **kwargs):
    """Instantiates the ResNet50 architecture."""

    def stack_fn(x):
        x = stack1(x, 64, 3, stride1=1, name='conv2')
        x = stack1(x, 128, 4, name='conv3')
        x = stack1(x, 256, 6, name='conv4')
        return stack1(x, 512, 3, name='conv5')

    return ResNet(stack_fn, False, True, 'resnet50', include_top, weights,
                  input_tensor, input_shape, pooling, classes, **kwargs)



<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x28eb75f8e20>

In [97]:
def resnet_conv_block(input_shape, filters, conv_shortcut, name, model_name, kernel_size=3, stride=1, DW=False):
    inputs = tf.keras.layers.Input(input_shape)
    bn_axis = 3

    if conv_shortcut:
        shortcut = layers.Conv2D(
            4 * filters, 1, strides=stride, name=name + '_0_conv')(inputs)
        shortcut = layers.BatchNormalization(
            axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut)
    else:
        shortcut = inputs

    x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(inputs)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x)
    x = layers.Activation('relu', name=name + '_1_relu')(x)

    # Conv2D 3x3
    if not DW:
        #conv
        x = layers.Conv2D(
            filters, kernel_size, padding='SAME', name=name + '_2_conv')(x)
        x = layers.BatchNormalization(
            axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x)
        x = layers.Activation('relu', name=name + '_2_relu')(x)
    else:
        #deepwise separate conv
        pass

    x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x)

    x = layers.Add(name=name + '_add')([shortcut, x])
    x = layers.Activation('relu', name=name + '_out')(x)

    return tf.keras.Model(inputs=inputs, outputs=x, name=model_name)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x28eb75f8e20>

In [55]:
def define_modular_resnet50():
    global layers
    layers = VersionAwareLayers()

    inputs = tf.keras.layers.Input(shape=(32, 32, 3))
    resized = tf.keras.layers.UpSampling2D(size=(7, 7))(inputs)

    use_bias = True
    bn_axis = 3

    x = layers.ZeroPadding2D(
        padding=((3, 3), (3, 3)), name='conv1_pad')(resized)
    x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x)

    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x)
    x = layers.Activation('relu', name='conv1_relu')(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
    x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)

    # 3 blocks
    model_conv2_1 = resnet_conv_block(input_shape=x.shape[1:], filters=64, conv_shortcut=True, name='conv2',
                                      model_name='block_conv2_1', stride=1, DW=False)
    x = model_conv2_1(x)
    model_conv2_2 = resnet_conv_block(input_shape=x.shape[1:], filters=64, conv_shortcut=False, name='conv2',
                                      model_name='block_conv2_2', stride=1, DW=False)
    x = model_conv2_2(x)
    model_conv2_3 = resnet_conv_block(input_shape=x.shape[1:], filters=64, conv_shortcut=False, name='conv2',
                                      model_name='block_conv2_3', stride=1, DW=False)
    x = model_conv2_3(x)

    # 4 blocks
    model_conv3_1 = resnet_conv_block(input_shape=x.shape[1:], filters=128, conv_shortcut=True, name='conv3',
                                      model_name='block_conv3_1', stride=2, DW=False)
    x = model_conv3_1(x)
    model_conv3_2 = resnet_conv_block(input_shape=x.shape[1:], filters=128, conv_shortcut=False, name='conv3',
                                      model_name='block_conv3_2', stride=1, DW=False)
    x = model_conv3_2(x)
    model_conv3_3 = resnet_conv_block(input_shape=x.shape[1:], filters=128, conv_shortcut=False, name='conv3',
                                      model_name='block_conv3_3', stride=1, DW=False)
    x = model_conv3_3(x)
    model_conv3_4 = resnet_conv_block(input_shape=x.shape[1:], filters=128, conv_shortcut=False, name='conv3',
                                      model_name='block_conv3_4', stride=1, DW=False)
    x = model_conv3_4(x)

    # 6 blocks
    model_conv4_1 = resnet_conv_block(input_shape=x.shape[1:], filters=256, conv_shortcut=True, name='conv4',
                                      model_name='block_conv4_1', stride=1, DW=False)
    x = model_conv4_1(x)
    model_conv4_2 = resnet_conv_block(input_shape=x.shape[1:], filters=256, conv_shortcut=False, name='conv4',
                                      model_name='block_conv4_2', stride=1, DW=False)
    x = model_conv4_2(x)
    model_conv4_3 = resnet_conv_block(input_shape=x.shape[1:], filters=256, conv_shortcut=False, name='conv4',
                                      model_name='block_conv4_3', stride=1, DW=False)
    x = model_conv4_3(x)
    model_conv4_4 = resnet_conv_block(input_shape=x.shape[1:], filters=256, conv_shortcut=False, name='conv4',
                                      model_name='block_conv4_4', stride=1, DW=False)
    x = model_conv4_4(x)
    model_conv4_5 = resnet_conv_block(input_shape=x.shape[1:], filters=256, conv_shortcut=False, name='conv4',
                                      model_name='block_conv4_5', stride=1, DW=False)
    x = model_conv4_5(x)
    model_conv4_6 = resnet_conv_block(input_shape=x.shape[1:], filters=256, conv_shortcut=False, name='conv4',
                                      model_name='block_conv4_6', stride=1, DW=False)
    x = model_conv4_6(x)

    # 3 blocks
    model_conv5_1 = resnet_conv_block(input_shape=x.shape[1:], filters=512, conv_shortcut=True, name='conv5',
                                      model_name='block_conv5_1', stride=1, DW=False)
    x = model_conv5_1(x)
    model_conv5_2 = resnet_conv_block(input_shape=x.shape[1:], filters=512, conv_shortcut=False, name='conv5',
                                      model_name='block_conv5_2', stride=1, DW=False)
    x = model_conv5_2(x)
    model_conv5_3 = resnet_conv_block(input_shape=x.shape[1:], filters=512, conv_shortcut=False, name='conv5',
                                      model_name='block_conv5_3', stride=1, DW=False)
    x = model_conv5_3(x)

    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1024, activation="relu")(x)
    x = tf.keras.layers.Dense(512, activation="relu")(x)
    x = tf.keras.layers.Dense(10, activation="softmax", name="classification")(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)

    return model, {'model_conv2_1': model_conv2_1,
                   'model_conv2_2': model_conv2_2,
                   'model_conv2_3': model_conv2_3,
                   'model_conv3_1': model_conv3_1,
                   'model_conv3_2': model_conv3_2,
                   'model_conv3_3': model_conv3_3,
                   'model_conv3_4': model_conv3_4,
                   'model_conv4_1': model_conv4_1,
                   'model_conv4_2': model_conv4_2,
                   'model_conv4_3': model_conv4_3,
                   'model_conv4_4': model_conv4_4,
                   'model_conv4_5': model_conv4_5,
                   'model_conv4_6': model_conv4_6,
                   'model_conv5_1': model_conv5_1,
                   'model_conv5_2': model_conv5_2,
                   'model_conv5_3': model_conv5_3 }


model,models_dict = define_modular_resnet50()
model.summary()

Model: "model_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_185 (InputLayer)       [(None, 32, 32, 3)]       0         
_________________________________________________________________
up_sampling2d_30 (UpSampling (None, 224, 224, 3)       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 230, 230, 3)       0         
_________________________________________________________________
conv1_conv (Conv2D)          (None, 112, 112, 64)      9472      
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 64)      256       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 64)      0         
_________________________________________________________________
pool1_pad (ZeroPadding2D)    (None, 114, 114, 64)      0  

In [56]:
models_dict['model_conv5_3'].save_weights('model_conv5_3.h5')

In [30]:
def define_resnet50():
    inputs = tf.keras.layers.Input(shape=(32, 32, 3))
    resized = tf.keras.layers.UpSampling2D(size=(7, 7))(inputs)

    features = ResNet50(input_shape=(224, 224, 3),
                        include_top=False,
                        weights='imagenet')(resized)

    x = tf.keras.layers.GlobalAveragePooling2D()(features)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1024, activation="relu")(x)
    x = tf.keras.layers.Dense(512, activation="relu")(x)
    x = tf.keras.layers.Dense(10, activation="softmax", name="classification")(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)

    return model


model = define_resnet50()
model.summary()

Model: "model_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_70 (InputLayer)        [(None, 32, 32, 3)]       0         
_________________________________________________________________
up_sampling2d_19 (UpSampling (None, 224, 224, 3)       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 230, 230, 3)       0         
_________________________________________________________________
conv1_conv (Conv2D)          (None, 112, 112, 64)      9472      
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 64)      256       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 64)      0         
_________________________________________________________________
pool1_pad (ZeroPadding2D)    (None, 114, 114, 64)      0  

In [57]:
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/',
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [58]:
# optim = optimizers.RMSprop(centered=True, learning_rate=0.0001)
optim = optimizers.Adam(learning_rate=0.001, amsgrad=True)
# optim=optimizers.SGD(learning_rate=0.0001,nesterov=True)
model.compile(optimizer=optim, metrics=['accuracy'], loss='sparse_categorical_crossentropy')

In [None]:
model.fit(x_train, y_train, batch_size=100, epochs=100, validation_split=0.1, shuffle=True,
          callbacks=[model_checkpoint_callback])

Epoch 1/100

In [7]:
eval_model(model, x_train, y_train, x_test, y_test)