# Chapter 17: Defeating Overfitting

## Regularizing the Model

### Reviewing the Deep Network

In [None]:
# Load Echidna dataset

import os
import numpy as np


def load(filename):
    data = np.loadtxt(filename, skiprows=1, unpack=True).T
    np.random.seed(12345)
    np.random.shuffle(data)
    x_raw = data[:, 0:2]
    x_min = x_raw.min(axis=0)
    x_max = x_raw.max(axis=0)
    # Rescale data between -0.5 and 0.5
    x = (x_raw - x_min) / (x_max - x_min) - 0.5
    y = data[:, 2].astype(int).reshape(-1, 1)
    return (x, y)


X, Y = load('./echidna.txt')
X_train, X_validation, X_test = np.split(X, 3)
Y_train, Y_validation, Y_test = np.split(Y, 3)

In [None]:
# Plot the decision boundary of a Keras model over a bi-dimensional dataset.

from matplotlib.colors import ListedColormap


def plot_boundary(model, points):
    # Generate a grid of points over the data
    RANGE = 0.55
    x_mesh = np.arange(-RANGE, RANGE, 0.001)
    y_mesh = np.arange(-RANGE, RANGE, 0.001)
    grid_x, grid_y = np.meshgrid(x_mesh, y_mesh)
    grid = np.c_[grid_x.ravel(), grid_y.ravel()]
    # Classify points in the grid
    classifications = model.predict(grid).argmax(axis=1)
    classifications_grid = classifications.reshape(grid_x.shape)
    # Trace the decision boundary
    BLUE_AND_GREEN = ListedColormap(['#BBBBFF', '#BBFFBB'])
    plt.contourf(grid_x, grid_y, classifications_grid, cmap=BLUE_AND_GREEN)


def plot_data_by_label(input_variables, labels, label_selector, symbol):
    points = input_variables[(labels == label_selector).flatten()]
    plt.plot(points[:, 0], points[:, 1], symbol, markersize=4)


def show_boundary(model, x, y, title="Decision boundary"):
    plot_boundary(model, x)
    plot_data_by_label(x, y, 0, 'bs')
    plot_data_by_label(x, y, 1, 'g^')
    plt.gca().axes.xaxis.set_ticklabels([])
    plt.gca().axes.yaxis.set_ticklabels([])
    plt.title(title)
    plt.ion()
    plt.show()

In [None]:
# Plot the training loss and validation loss from a Keras history object.

from matplotlib.colors import ListedColormap

def show_losses(history):
    plt.plot(history.history['loss'], label='Training set',
             color='blue', linestyle='-')
    plt.plot(history.history['val_loss'], label='Validation set',
             color='green', linestyle='--')
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.xlim(0, len(history.history['loss']))
    plt.legend()
    plt.show()

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
from keras.utils import to_categorical

Y_train_one_hot_encoded = to_categorical(Y_train)
Y_validation_one_hot_encoded = to_categorical(Y_validation)

model = Sequential()
model.add(Dense(100, activation='sigmoid'))
model.add(Dense(30, activation='sigmoid'))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['accuracy'])

history = model.fit(X_train, Y_train_one_hot_encoded,
                    validation_data=(X_validation, Y_validation_one_hot_encoded),
                    epochs=3, batch_size=25)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
show_losses(history)

In [None]:
show_boundary(model, X_train, Y_train, title="Training set")

In [None]:
show_boundary(model, X_validation, Y_validation, title="Validation set")

### L1 and L2 Regularization

#### L1 in Action

In [None]:
from keras.regularizers import l1

model = Sequential()
model.add(Dense(100, activation='sigmoid', activity_regularizer=l1(0.0004)))
model.add(Dense(30, activation='sigmoid', activity_regularizer=l1(0.0004)))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['accuracy'])

history = model.fit(X_train, Y_train_one_hot_encoded,
                    validation_data=(X_validation, Y_validation_one_hot_encoded),
                    epochs=3, batch_size=25)

In [None]:
show_losses(history)

In [None]:
show_boundary(model, X_train, Y_train, title="Training set")

In [None]:
show_boundary(model, X_validation, Y_validation, title="Validation set")

#### Too Much of a Good Thing

In [None]:
model = Sequential()
model.add(Dense(100, activation='sigmoid', activity_regularizer=l1(0.002)))
model.add(Dense(30, activation='sigmoid', activity_regularizer=l1(0.0004)))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['accuracy'])

history = model.fit(X_train, Y_train_one_hot_encoded,
                    validation_data=(X_validation, Y_validation_one_hot_encoded),
                    epochs=3, batch_size=25)

In [None]:
show_losses(history)

In [None]:
show_boundary(model, X_train, Y_train, title="Training set")

In [None]:
show_boundary(model, X_validation, Y_validation, title="Validation set")