# A custom layer in Keras (that's just more, smaller layers)

In [13]:
from tensorflow import keras
from tensorflow.keras import layers, Sequential


Load the data

In [17]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 28, 28, 1)

### A vanilla ConvNet:

In [None]:
model = Sequential(
    layers=[
        # convolutional layer:
        # convolutional layer:
        # take the average of the "feature maps":
        # include a fully connected layer to go 256 to 10 values:
        # take the softmax (to get probabilities)
    ]
)

# compile the model - creates the static computational graph
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy()
)
# run one training epoch!
model.fit(x_train, y_train, batch_size=32, epochs=1)

### The residual layer:

In [22]:
class ResidualLayer(keras.layers.Layer):
    
    def __init__(self, bottleneck_filters, output_filters, stride=2, **kwargs):
        """Create all the components of our model"""

        super().__init__(**kwargs)

    def call(self, x):
        """Run the input data through our model"""

        return


### Using it in our network is now super easy:

In [23]:
model = Sequential(
    layers=[
        # convolutional layer:
        layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same'),
        # residual layer:
        ResidualLayer(bottleneck_filters=32, output_filters=64, stride=2),
        # take the average of the "feature maps":
        layers.GlobalAveragePooling2D(),
        # include a fully connected layer to go 256 to 10 values:
        layers.Dense(units=10),
        # take the softmax:
        layers.Softmax(),
    ]
)
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy()
)
model.fit(x_train, y_train, batch_size=32, epochs=1)



KeyboardInterrupt: 

---

# _Solutions_

# A custom layer in Keras (that's just more, smaller layers)

Imports

In [1]:
from tensorflow import keras
from tensorflow.keras import layers, Sequential
from tensorflow.keras.layers import Layer, Conv2D, BatchNormalization, ReLU, AveragePooling2D

Load the data

In [4]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 28, 28, 1)

### A vanilla ConvNet:

In [7]:
model = Sequential(
    layers=[
        # convolutional layer:
        layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same'),
        # convolutional layer:
        layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same'),
        # take the average of the "feature maps":
        layers.GlobalAveragePooling2D(),
        # include a fully connected layer to go 256 to 10 values:
        layers.Dense(units=10),
        # take the softmax:
        layers.Softmax(),
    ]
)
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy()
)
model.fit(x_train, y_train, batch_size=32, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x13be705c0>

### The residual layer:

In [11]:
class ResidualLayer(Layer):
    
    def __init__(self, bottleneck_filters, output_filters, stride=2, **kwargs):

        self.conv1 = layers.Conv2D(
            bottleneck_filters,
            kernel_size=1,
            strides=1,
        )

        self.conv2 = layers.Conv2D(
            bottleneck_filters,
            kernel_size=3,
            padding='same',
            strides=stride,
        )

        self.conv3 = layers.Conv2D(
            output_filters,
            kernel_size=1,
            strides=1,
        )

        self.batchnorm = BatchNormalization()
        self.relu = ReLU()

        self.downsample = layers.AveragePooling2D(pool_size=stride)

        super().__init__(**kwargs)

    def call(self, x):

        y = self.conv1(x)
        y = BatchNormalization()(y)
        y = ReLU()(y)
        y = self.conv2(y)
        y = BatchNormalization()(y)
        y = ReLU()(y)
        y = self.conv3(y)
        y = BatchNormalization()(y)
        y = ReLU()(y)

        return self.downsample(x) + y


### Using it in our network is now super easy:

In [13]:
model = Sequential(
    layers=[
        # convolutional layer:
        layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same'),
        # residual layer:
        ResidualLayer(bottleneck_filters=32, output_filters=64, stride=2),
        # take the average of the "feature maps":
        layers.GlobalAveragePooling2D(),
        # include a fully connected layer to go 256 to 10 values:
        layers.Dense(units=10),
        # take the softmax:
        layers.Softmax(),
    ]
)
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy()
)
model.fit(x_train, y_train, batch_size=32, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x13ee37be0>