In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, Activation, MaxPool2D, GlobalAveragePooling2D, Add, Dropout
from tensorflow.keras import Model
## resnet paper - https://arxiv.org/pdf/1512.03385.pdf
## issue may be images are too small to run convolutions,
## but residual blocks should provide some accuracy bumps

class ResidualBlock(Model):
    def __init__(self, channel_in = 64, channel_out = 256):
        super().__init__()
        
        channel = channel_out // 4
        self.dropout = Dropout(.2)
        self.conv1 = Conv2D(channel, kernel_size = (1, 1), padding = "same")
        self.bn1 = BatchNormalization()
        self.av1 = Activation(tf.nn.relu)
        self.conv2 = Conv2D(channel, kernel_size = (3, 3), padding = "same")
        self.bn2 = BatchNormalization()
        self.av2 = Activation(tf.nn.relu)
        self.conv3 = Conv2D(channel_out, kernel_size = (1, 1), padding = "same")
        self.bn3 = BatchNormalization()
        self.shortcut = self._shortcut(channel_in, channel_out)
        self.add = Add()
        self.av3 = Activation(tf.nn.relu)
        
    def call(self, x):
        h = self.conv1(x)
        h = self.bn1(h)
        h = self.av1(h)
        h = self.conv2(h)
        h = self.bn2(h)
        h = self.av2(h)
        h = self.conv3(h)
        h = self.bn3(h)
        shortcut = self.shortcut(x)
        h = self.add([h, shortcut])
        y = self.av3(h)
        return y
    
    def _shortcut(self, channel_in, channel_out):
        if channel_in == channel_out:
            return lambda x : x
        else:
            return self._projection(channel_out)
        
    def _projection(self, channel_out):
        return Conv2D(channel_out, kernel_size = (1, 1), padding = "same")
           
class LightNet18(Model):
    def __init__(self, input_shape, output_dim):
        super().__init__()                
        self._layers = [
            # conv1
            Conv2D(64, input_shape = input_shape, kernel_size = (7, 7), strides=(2, 2), padding = "same"),
            BatchNormalization(),
            Activation(tf.nn.relu),
            # conv2_x
            MaxPool2D(pool_size = (3, 3), strides = (2, 2), padding = "same"),
            ResidualBlock(64, 64),
            # conv3_x
            Conv2D(128, kernel_size = (1, 1), strides=(2, 2)),
            ResidualBlock(128, 128),
            # conv4_x
            Conv2D(256, kernel_size = (1, 1), strides=(2, 2)),
            ResidualBlock(256, 256),
            # conv5_x
            Conv2D(512, kernel_size = (1, 1), strides=(2, 2)),
            ResidualBlock(512, 512),
            # last part
            GlobalAveragePooling2D(),
            Dense(1000, activation = tf.nn.relu),
            Dense(output_dim, activation = tf.nn.softmax)
        ]
        
    def call(self, x):
        for layer in self._layers:
            if isinstance(layer, list):
                for l in layer:
                    x = l(x)    
            else:
                x = layer(x)
        return x
       
    
model = LightNet18((28, 28, 1), 10)
model.build(input_shape = (None, 28, 28, 1))

In [9]:
model.summary()

Model: "res_net18_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_48 (Conv2D)           multiple                  3200      
_________________________________________________________________
batch_normalization_39 (Batc multiple                  256       
_________________________________________________________________
activation_39 (Activation)   multiple                  0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 multiple                  0         
_________________________________________________________________
residual_block_12 (ResidualB multiple                  4832      
_________________________________________________________________
conv2d_52 (Conv2D)           multiple                  8320      
_________________________________________________________________
residual_block_13 (ResidualB multiple                  

In [10]:
import os
checkpoint_path = "lightnet18/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=4, min_lr=0.001)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = tf.expand_dims(x_train, axis=3)
x_test = tf.expand_dims(x_test, axis=3)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_test, y_test),
          callbacks=[cp_callback, reduce_lr], epochs=20)

Epoch 1/20


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

  64/1875 [>.............................] - ETA: 3:07 - loss: 1.1743 - accuracy: 0.5820


KeyboardInterrupt



In [7]:
print(model.evaluate(x_test, y_test))



[0.324590802192688, 0.8848999738693237]