In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


import tensorflow as tf

In [2]:
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

In [3]:
X_train, X_valid, X_test = X_train / 255, X_valid / 255, X_test / 255

# L1 and L2 regularization

In [4]:
# lets create a layer with l2 regularization
layer_l2 = tf.keras.layers.Dense(100, activation="relu",
                              kernel_initializer="he_normal",
                              kernel_regularizer=tf.keras.regularizers.l2(0.01))

# alternativly we can also use l1 or l1_l2 both

layer_l1 = tf.keras.layers.Dense(
    100,
    activation='relu',
    kernel_initializer='he_normal',
    kernel_regularizer= tf.keras.regularizers.l1(0.1)
)

layer_l1_l2 = tf.keras.layers.Dense(
    100,
    activation='relu',
    kernel_initializer='he_normal',
    kernel_regularizer= tf.keras.regularizers.l1_l2(0.1, 0.01)
)

In [5]:
tf.random.set_seed(2024)

from functools import partial

# creating a partial function for dense
RegularizedDense = partial(tf.keras.layers.Dense,
                           activation="relu",
                           kernel_initializer="he_normal",
                           kernel_regularizer=tf.keras.regularizers.l2(0.01))


In [6]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    RegularizedDense(100),
    RegularizedDense(100),
    RegularizedDense(10, activation="softmax")
])

In [7]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.02)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=2,
                    validation_data=(X_valid, y_valid))

Epoch 1/2
Epoch 2/2


# Dropout

In [8]:
tf.random.set_seed(2024)

model = tf.keras.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=[28, 28]),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
        tf.keras.layers.Dropout(rate=0.2),
        tf.keras.layers.Dense(10, activation="softmax")
    ]
)

In [9]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Here training accuracy looks like less then validation accuracy. This is because dropout is only active dusing training

In [11]:
# real training accuracy (w/o dropout)
model.evaluate(X_train, y_train)



[0.31970342993736267, 0.8793091177940369]

In [12]:
model.evaluate(X_test, y_test)



[0.37347832322120667, 0.8605999946594238]

## MC dropout

In [13]:
tf.random.set_seed(2024)
y_probas = np.stack([model(X_test, training=True) for sample in range(100)])
y_proba = y_probas.mean(axis=0)

In [14]:
model.predict(X_test[:1]).round(3)



array([[0.   , 0.   , 0.   , 0.   , 0.   , 0.054, 0.   , 0.078, 0.   ,
        0.868]], dtype=float32)

In [15]:
y_proba[0].round(3)

array([0.   , 0.   , 0.   , 0.   , 0.   , 0.188, 0.   , 0.097, 0.   ,
       0.715], dtype=float32)

In [16]:
y_std = y_probas.std(axis=0)
y_std[0].round(3)

array([0.   , 0.   , 0.   , 0.   , 0.   , 0.231, 0.   , 0.141, 0.001,
       0.249], dtype=float32)

In [17]:
y_pred = y_proba.argmax(axis=1)
accuracy = (y_pred == y_test).sum() / len(y_test)
accuracy

0.8608

# Max norm

In [18]:
dense = tf.keras.layers.Dense( 100, 
                              activation="relu", 
                              kernel_initializer="he_normal",
                              kernel_constraint=tf.keras.constraints.max_norm(1.)
                )

In [19]:
MaxNormDense = partial(tf.keras.layers.Dense,
                       activation="relu", kernel_initializer="he_normal",
                       kernel_constraint=tf.keras.constraints.max_norm(1.))

tf.random.set_seed(2024)
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    MaxNormDense(100),
    MaxNormDense(100),
    tf.keras.layers.Dense(10, activation="softmax")
])
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
