In [1]:
 # Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.4 is required in this notebook
# Earlier 2.x versions will mostly work the same, but with a few bugs
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.4"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [82]:
# Mistake here is not that alpha and beta are not trainable
class MyNormalization(keras.layers.Layer):
    def __init__(self, eps=0.001, activation = None, **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.eps = eps
    def build(self, batch_input_shape, dtype=tf.dtypes.float32):
        self.alpha = tf.ones(batch_input_shape[-1:], dtype=dtype) 
        self.beta = tf.zeros(batch_input_shape[-1:], dtype=dtype) 
        super().build(batch_input_shape)
    def call(self, inputs):
        mu, var = tf.nn.moments(inputs, axes = -1, keepdims = True) 
        sigma = tf.math.sqrt(var)        
        #return self.activation(self.alpha*(inputs-mu)/(sigma + self.eps) + self.beta)
        return self.activation(tf.math.multiply(self.alpha, inputs-mu)/(sigma + self.eps) + self.beta)
    def compute_output_shape(self, batch_input_shape):
        return batch_input_shape
    

In [86]:
#test_input = tf.random.uniform((100,))
data = tf.constant(np.arange(10).reshape(5, 2) * 10, dtype=tf.float32)
testNormLayer = MyNormalization(0.0001)


In [87]:
batch_input_shape = np.shape(data)
testNormLayer.build(batch_input_shape)
testNormLayer.call(data)

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[-0.99998,  0.99998],
       [-0.99998,  0.99998],
       [-0.99998,  0.99998],
       [-0.99998,  0.99998],
       [-0.99998,  0.99998]], dtype=float32)>

In [88]:
layerKeras = keras.layers.LayerNormalization(epsilon=0.0001)
layerKeras(data)

<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[-0.999998  ,  0.99999803],
       [-0.999998  ,  0.99999803],
       [-0.99999785,  0.99999815],
       [-0.9999982 ,  0.9999978 ],
       [-0.9999976 ,  0.9999984 ]], dtype=float32)>

In [74]:
testNormLayer.call(test_input)-layerKeras(test_input)

<tf.Tensor: shape=(100,), dtype=float32, numpy=
array([ 0.00731987,  0.03694963, -0.01261872, -0.00281164,  0.02999103,
        0.01168078, -0.03181362, -0.00369719,  0.01507574, -0.01809865,
        0.00434911, -0.00237684,  0.02895081,  0.00775576,  0.00602019,
       -0.00670305, -0.02608705,  0.03336132, -0.02079934, -0.00197782,
        0.00836134, -0.01384455,  0.0184747 , -0.03087485, -0.02593875,
        0.01138765, -0.00276733, -0.01903772, -0.0262872 ,  0.01723152,
       -0.01494324, -0.01551801, -0.01152378,  0.00839815,  0.02599633,
        0.01217395,  0.02558565,  0.00710955,  0.01632565, -0.01810664,
       -0.02253854, -0.03183103, -0.03295672,  0.03625274,  0.0340451 ,
       -0.02242804,  0.03652465, -0.03243899,  0.00805688,  0.0285058 ,
        0.03538239, -0.01150179, -0.03182018, -0.00602692, -0.00651011,
        0.02031487,  0.01463425, -0.01114911, -0.01095721,  0.02687931,
       -0.01382685, -0.0153355 ,  0.03126419, -0.00653452, -0.02324951,
        0.011992

In [90]:
#solution with alpha and beta as trainable weights
class MyNormalization_ver2(keras.layers.Layer):
    def __init__(self, eps=0.001, activation = None, **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.eps = eps
    def build(self, batch_input_shape, dtype=tf.dtypes.float32):
        self.alpha = self.add_weight(name='alpha', shape=batch_input_shape[-1:], initializer='ones',
                                     dtype=dtype)         
        self.beta = self.add_weight(name='alpha', shape=batch_input_shape[-1:], initializer='zeros',
                                     dtype=dtype) 
        super().build(batch_input_shape)
    def call(self, inputs):
        mu, var = tf.nn.moments(inputs, axes = -1, keepdims = True)         
        #return self.activation(self.alpha*(inputs-mu)/(sigma + self.eps) + self.beta)
        return self.activation(tf.math.multiply(self.alpha, inputs-mu)/(tf.math.sqrt(var + self.eps)) + self.beta)
    def compute_output_shape(self, batch_input_shape):
        return batch_input_shape
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "eps": self.eps}

In [91]:
testNormLayer_2 = MyNormalization_ver2(0.0001)

In [96]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

In [97]:
X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.

In [100]:
len(X_train)


55000

In [101]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(50, activation="relu"),
    keras.layers.Dense(25, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])

In [104]:
def random_batch(X_train, y_train, batch_size =32):
    idx = np.random.randint(len(X_train), size = batch_size)
    return X_train[idx], y_train[idx]

In [111]:
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics, end=end)

In [115]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.Accuracy()]

In [116]:
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_stats_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch 1/5


ValueError: Shapes (32,) and (32, 10) are incompatible

In [119]:
y_batch.shape


(32,)