# Exercises for Lecture 15 (Deep CNN architectures)

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from functools import partial

# To make this notebook's output stable across runs
def reset_state(seed=42):
    tf.keras.backend.clear_session()
    tf.random.set_seed(seed)
    np.random.seed(seed)

2024-01-10 00:30:31.233273: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-10 00:30:31.284336: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-10 00:30:31.285454: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.




## Exercise 1: Build a ResNet CNN architecture for fashion MNIST.

Load and set up data.

In [2]:
# Load fashion MNIST data
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train, X_valid = X_train_full[:-30000], X_train_full[-30000:]
y_train, y_valid = y_train_full[:-30000], y_train_full[-30000:]

# Standardize
X_mean = X_train.mean(axis=0, keepdims=True)
X_std = X_train.std(axis=0, keepdims=True) + 1e-7
X_train = (X_train - X_mean) / X_std
X_valid = (X_valid - X_mean) / X_std
X_test = (X_test - X_mean) / X_std

# Add final channel axis (one channel)
X_train = X_train[..., np.newaxis]
X_valid = X_valid[..., np.newaxis]
X_test = X_test[..., np.newaxis]

Use the subclassing API to define a `ResidualUnit` layer.

In [3]:
DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, strides=1,
                        padding="SAME", use_bias=False)

class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            keras.layers.BatchNormalization()]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                keras.layers.BatchNormalization()]

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)

Buid a ResNet model using the layer you defined above.

In [4]:
model = keras.models.Sequential()
model.add(DefaultConv2D(64, kernel_size=7, strides=2,
                        input_shape=[28, 28, 1]))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation("relu"))
model.add(keras.layers.MaxPool2D(pool_size=2, strides=2, padding="SAME"))
prev_filters = 64
for filters in [64] * 2 + [128] * 2:
    strides = 1 if filters == prev_filters else 2
    model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters
model.add(keras.layers.GlobalAvgPool2D())
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(10, activation="softmax"))

In [5]:
model.summary()

Model: "sequential"


_________________________________________________________________


 Layer (type)                Output Shape              Param #   




 conv2d (Conv2D)             (None, 14, 14, 64)        3136      


                                                                 


 batch_normalization (Batch  (None, 14, 14, 64)        256       


 Normalization)                                                  


                                                                 


 activation (Activation)     (None, 14, 14, 64)        0         


                                                                 


 max_pooling2d (MaxPooling2  (None, 7, 7, 64)          0         


 D)                                                              


                                                                 


 residual_unit (ResidualUni  (None, 7, 7, 64)          74240     


 t)                                                              


                                                                 


 residual_unit_1 (ResidualU  (None, 7, 7, 64)          74240     


 nit)                                                            


                                                                 


 residual_unit_2 (ResidualU  (None, 4, 4, 128)         230912    


 nit)                                                            


                                                                 


 residual_unit_3 (ResidualU  (None, 4, 4, 128)         295936    


 nit)                                                            


                                                                 


 global_average_pooling2d (  (None, 128)               0         


 GlobalAveragePooling2D)                                         


                                                                 


 flatten (Flatten)           (None, 128)               0         


                                                                 


 dense (Dense)               (None, 10)                1290      


                                                                 




Total params: 680010 (2.59 MB)


Trainable params: 678090 (2.59 MB)


Non-trainable params: 1920 (7.50 KB)


_________________________________________________________________


Compile your model and train it.

In [6]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])

In [None]:
history = model.fit(X_train, y_train, epochs=3, validation_data=(X_valid, y_valid))

Evaluate the model performance on the test set.

In [None]:
model.evaluate(X_test, y_test)