In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
    IS_COLAB = True
except Exception:
    IS_COLAB = False

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

if not tf.test.is_gpu_available():
    print("No GPU was detected. CNNs can be very slow without a GPU.")
    if IS_COLAB:
        print("Go to Runtime > Change runtime and select a GPU hardware accelerator.")

# Common imports
import numpy as np
import os

from functools import partial

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "cnn"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [2]:
def plot_image(image):
    plt.imshow(image, cmap="gray", interpolation="nearest")
    plt.axis("off")

def plot_color_image(image):
    plt.imshow(image, interpolation="nearest")
    plt.axis("off")

# fashion mnist with CNN

## load data

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [4]:
x_train.shape, y_train.shape

((60000, 28, 28), (60000,))

In [5]:
x_train.flags.writeable, y_train.flags.writeable, \
x_test.flags.writeable, y_test.flags.writeable

(False, False, False, False)

In [6]:
x_train = np.expand_dims(x_train.copy(), axis=-1).astype(np.float)
x_test = np.expand_dims(x_test.copy(), axis=-1).astype(np.float)

In [7]:
x_train.shape

(60000, 28, 28, 1)

In [8]:
x_train.flags.writeable, y_train.flags.writeable, \
x_test.flags.writeable, y_test.flags.writeable

(True, False, True, False)

In [9]:
x_train[0, 4, 15:20, :]

array([[136.],
       [127.],
       [ 62.],
       [ 54.],
       [  0.]])

In [10]:
y_train[:5]

array([9, 0, 0, 3, 0], dtype=uint8)

In [11]:
x_train /= 255.0
x_test /= 255.0

In [12]:
x_train[0, 4, 15:20, :]

array([[0.53333333],
       [0.49803922],
       [0.24313725],
       [0.21176471],
       [0.        ]])

## CNN model

In [13]:
DefaultConv2D = partial(tf.keras.layers.Conv2D,
                        kernel_size=(3, 3), 
                        activation=tf.keras.activations.relu, 
                        padding='same')
DefaultMaxPool2D = tf.keras.layers.MaxPool2D
DefaultDense = partial(tf.keras.layers.Dense,
                       activation='relu')
OutputDense = partial(tf.keras.layers.Dense,
                      units=10,
                      activation='softmax')
DefaultDropout = partial(tf.keras.layers.Dropout,
                         rate=.5)

### build models

In [14]:
tf.random.set_seed(42)
inputs = tf.keras.Input(shape=(28, 28, 1))

x = DefaultConv2D(filters=64, kernel_size=7)(inputs)
x = DefaultMaxPool2D()(x)

x = DefaultConv2D(filters=128)(x)
x = DefaultConv2D(filters=128)(x)
x = DefaultMaxPool2D()(x)

x = DefaultConv2D(filters=256)(x)
x = DefaultConv2D(filters=256)(x)
x = DefaultMaxPool2D()(x)

x = tf.keras.layers.Flatten()(x)

x = DefaultDense(units=128)(x)
x = DefaultDropout()(x)

x = DefaultDense(units=64)(x)
x = DefaultDropout()(x)

outputs = OutputDense()(x)

model_fun = keras.Model(inputs=inputs, outputs=outputs)

In [15]:
model_fun.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 64)        3200      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 256)         295168

In [16]:
tf.random.set_seed(42)
model_seq = keras.models.Sequential([
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    keras.layers.MaxPooling2D(pool_size=2),
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    keras.layers.MaxPooling2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(units=128, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(units=10, activation='softmax'),
])

In [17]:
model_seq.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 28, 28, 64)        3200      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 14, 14, 128)       147584    
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 7, 7, 256)         295168    
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 7, 7, 256)         5

### compare weights

Let's try to compare weights of some layers in these 2 models. Looks like if we set seed than weights are in fact the same.

In [18]:
model_fun.layers[:5]

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f7d38368ac8>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f7d38368b00>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f7e1a8820f0>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f7d38362b38>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f7e1703b2b0>]

In [19]:
model_fun.layers[3]

<tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f7d38362b38>

In [20]:
model_fun.get_layer('conv2d_1')

<tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f7d38362b38>

In [21]:
w_fun = model_fun.get_layer('conv2d_1').weights[0].numpy()

In [22]:
w_fun.shape

(3, 3, 64, 128)

In [23]:
w_fun[:, :, 0, 0]

array([[ 0.0221432 , -0.03333401,  0.04752306],
       [-0.00827782,  0.04764385, -0.05091431],
       [-0.0569893 , -0.027     , -0.00349722]], dtype=float32)

In [24]:
w_seq = model_seq.get_layer('conv2d_6').weights[0].numpy()

In [25]:
w_seq.shape

(3, 3, 64, 128)

In [26]:
w_seq[:, :, 0, 0]

array([[ 0.0221432 , -0.03333401,  0.04752306],
       [-0.00827782,  0.04764385, -0.05091431],
       [-0.0569893 , -0.027     , -0.00349722]], dtype=float32)

### training

In [27]:
model_fun.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
              optimizer=tf.keras.optimizers.Adam(), 
              metrics=['accuracy'])

In [28]:
history = model_fun.fit(x_train, y_train, 
                        epochs=10, 
                        validation_split=.2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
score = model_fun.evaluate(x_test, y_test)

In [32]:
score

[0.2825852048158646, 0.9078]

In [33]:
x_new = x_test[:10]

In [34]:
x_new.shape

(10, 28, 28, 1)

In [37]:
y_pred = model_fun.predict(x_new)

In [40]:
y_pred[:3]

array([[5.9006334e-20, 2.9799884e-21, 1.1957012e-24, 6.9247981e-28,
        2.1203728e-27, 6.7338171e-08, 1.8349376e-21, 2.0450607e-07,
        1.3419852e-17, 9.9999976e-01],
       [1.1990573e-05, 5.1960045e-20, 9.9936372e-01, 7.1711479e-13,
        8.1462667e-07, 1.9847506e-24, 6.2348042e-04, 6.7772247e-29,
        2.0972890e-14, 3.6598309e-23],
       [1.3340663e-21, 1.0000000e+00, 2.4192680e-22, 3.5877232e-14,
        2.8833815e-15, 5.2291462e-36, 7.8210807e-17, 1.2183427e-36,
        4.8885439e-31, 1.3097176e-34]], dtype=float32)