# 5.1 Generalization

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

(train_images, train_labels), (test_images, test_labels) = (
    tf.keras.datasets.mnist.load_data(path="mnist.npz")
)

train_images = train_images.reshape(
    train_images.shape[0], train_images.shape[1] * train_images.shape[2]
)
train_images = train_images.astype("float32") / 255

train_images_with_noise_channels = np.concatenate(
    [train_images, np.random.random((len(train_images), 784))], axis=1
)

train_images_with_noise = np.add(
    train_images, np.random.random((len(train_images), 784))
)

train_images_with_zeros_channels = np.concatenate(
    [train_images, np.zeros((len(train_images), 784))], axis=1
)

print(
    f"train_images shape:\t{train_images.shape}\ntest_images shape:\t{test_images.shape}"
)


def plot_digit(image_array):
    image = image_array.reshape(28, 28)
    plt.imshow(image, cmap="viridis")
    plt.axis("off")


plt.figure(figsize=(3, 3))
for idx, image_data in enumerate(train_images[:25]):
    plt.subplot(5, 5, idx + 1)
    plot_digit(image_data)
plt.subplots_adjust(wspace=0, hspace=0)
plt.suptitle("Original Data")
plt.show()

plt.figure(figsize=(3, 3))
for idx, image_data in enumerate(train_images_with_noise[:25]):
    plt.subplot(5, 5, idx + 1)
    plot_digit(image_data)
plt.subplots_adjust(wspace=0, hspace=0)
plt.suptitle("Data With Noise")
plt.show()

: 

## 5.1.1 Underfitting and Overfitting
Noisy features are prone to overfitting.

In [None]:
import keras
from keras import layers

import matplotlib as mpl
import matplotlib.gridspec as gridspec

mpl.rcParams["figure.facecolor"] = "darkgray"
mpl.rcParams["axes.facecolor"] = "white"

def get_model():
    l1 = layers.Dense(512, activation="relu")
    outpt_layer = layers.Dense(10, activation="softmax")

    model = keras.Sequential()
    model.add(l1)
    model.add(outpt_layer)
    model.compile(
        optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model

def fit_and_plot_results(model, x_train, y_train, n_epochs=20, batch_size=512, val_ratio=0.4, verbose=0, title="", loss_only=False):
    history = model.fit(
        x_train,
        y_train,
        epochs=n_epochs,
        batch_size=batch_size,
        validation_split=val_ratio,
        verbose=verbose,
    )
    history_dct = history.history

    loss_values = history_dct["loss"]
    val_loss_values = history_dct["val_loss"]
    acc = history_dct["accuracy"]
    val_acc = history_dct["val_accuracy"]
    epochs = range(1, len(loss_values) + 1)

    # Set up the figure and GridSpec
    fig = plt.figure(figsize=(6, 8))
    gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1])

    # First subplot (Training and validation loss)
    ax0 = fig.add_subplot(gs[0])
    ax0.plot(epochs, loss_values, "bo", label="Training Loss")
    ax0.plot(epochs, val_loss_values, "r", label="Validation Loss")
    ax0.set_title("Training and Validation Loss")
    ax0.set_xlabel("Epochs")
    ax0.set_ylabel("Loss")
    ax0.legend()

    if not loss_only:
        # Second subplot (Training and validation accuracy)
        ax1 = fig.add_subplot(gs[1])
        ax1.plot(epochs, acc, "bo", label="Training Accuracy")
        ax1.plot(epochs, val_acc, "r", label="Validation Accuracy")
        ax1.set_title("Training and Validation Accuracy")
        ax1.set_xlabel("Epochs")
        ax1.set_ylabel("Accuracy")
        ax1.legend()

    fig.suptitle(title)
    fig.tight_layout()

    plt.show()

fit_and_plot_results(model=get_model(), x_train=train_images_with_noise_channels, y_train=train_labels, n_epochs=10, batch_size=128, val_ratio=.2, verbose=0, title="noise channels", loss_only=True)
fit_and_plot_results(model=get_model(), x_train=train_images_with_zeros_channels, y_train=train_labels, n_epochs=10, batch_size=128, val_ratio=.2, verbose=0, title="zeros channels", loss_only=True)

: 

In [None]:
import keras
from keras import layers
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams["figure.facecolor"] = "darkgray"
mpl.rcParams["axes.facecolor"] = "white"

def get_model():
    l1 = layers.Dense(512, activation="relu")
    outpt_layer = layers.Dense(10, activation="softmax")

    model = keras.Sequential()
    model.add(l1)
    model.add(outpt_layer)
    model.compile(
        optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model

def fit_and_plot_results(model, x_train, y_train, n_epochs=20, batch_size=512, val_ratio=0.4, verbose=0, title="", loss_only=False, axes=None):
    history = model.fit(
        x_train,
        y_train,
        epochs=n_epochs,
        batch_size=batch_size,
        validation_split=val_ratio,
        verbose=verbose,
    )
    history_dct = history.history

    loss_values = history_dct["loss"]
    val_loss_values = history_dct["val_loss"]
    acc = history_dct["accuracy"]
    val_acc = history_dct["val_accuracy"]
    epochs = range(1, len(loss_values) + 1)

    if axes is None:
        fig, axes = plt.subplots(1 if loss_only else 2, 1, figsize=(6, 8))
        fig.suptitle(title)
        fig.tight_layout()

    # First subplot (Training and validation loss)
    axes[0].plot(epochs, loss_values, "bo", label="Training Loss")
    axes[0].plot(epochs, val_loss_values, "r", label="Validation Loss")
    axes[0].set_title(f"{title} - Training and Validation Loss")
    axes[0].set_xlabel("Epochs")
    axes[0].set_ylabel("Loss")
    axes[0].legend()

    if not loss_only:
        # Second subplot (Training and validation accuracy)
        axes[1].plot(epochs, acc, "bo", label="Training Accuracy")
        axes[1].plot(epochs, val_acc, "r", label="Validation Accuracy")
        axes[1].set_title(f"{title} - Training and Validation Accuracy")
        axes[1].set_xlabel("Epochs")
        axes[1].set_ylabel("Accuracy")
        axes[1].legend()

fig1, axes1 = plt.subplots(1, 2, figsize=(8, 5))
fig1.suptitle("Model Comparisons")

# Model 1: Noise channels
fit_and_plot_results(model=get_model(), x_train=train_images_with_noise_channels, y_train=train_labels, n_epochs=10, batch_size=128, val_ratio=.2, verbose=0, title="Noise Channels", loss_only=True, axes=[axes1[0]])

# Model 2: Zeros channels
fit_and_plot_results(model=get_model(), x_train=train_images_with_zeros_channels, y_train=train_labels, n_epochs=10, batch_size=128, val_ratio=.2, verbose=0, title="Zeros Channels", loss_only=True, axes=[axes1[1]])

plt.show()


: 

# 5.3 Improving Model Fit
## 5.3.1 Tuning Key Gradient Descent Parameters

If model doesn't get started - or stalls too early, we can overcome it by changing gradient descent parameters. This is always true.
If this happens try modifying learning rate or increasing batch size.

In [None]:
def get_model_rms_prop_change(learning_rate: float):
    model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])
    model.compile(optimizer=keras.optimizers.RMSprop(learning_rate),
                loss="sparse_categorical_crossentropy",
                metrics=["accuracy"])
    return model

fit_and_plot_results(model=get_model_rms_prop_change(learning_rate=1.), x_train=train_images, y_train=train_labels, n_epochs=10, batch_size=128, val_ratio=.2, verbose=0, title="Train model with learning rate 1", loss_only=True)
fit_and_plot_results(model=get_model_rms_prop_change(learning_rate=1e-2), x_train=train_images, y_train=train_labels, n_epochs=10, batch_size=128, val_ratio=.2, verbose=0, title="Train model with learning rate 1e-2", loss_only=True)
fit_and_plot_results(model=get_model_rms_prop_change(learning_rate=1e-2), x_train=train_images, y_train=train_labels, n_epochs=20, batch_size=128, val_ratio=.2, verbose=0, title="Train model with learning rate 1e-2 - overfitting", loss_only=True)


: 

: 