# Early stopping to prevent Overfitting

It is important to segment the original dataset into several datasets.

- **Training Set**
- **Validation Set**
- **Holout Set(Test Set)**

# Early stopping with classification

In [1]:
import pandas as pd
import io
import os
import requests
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping # NOTE THIS STEP

df = pd.read_csv('https://data.heatonresearch.com/data/t81-558/iris.csv', na_values = ['NA', '?'])

# Convert to numpy - Classification
x = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values
dummies = pd.get_dummies(df['species']) # Classification
species = dummies.columns
y = dummies.values

# Split into validation and training data

X_train, X_test, y_train, y_test = train_test_split(x, y, 
                                                   test_size = 0.25,
                                                   random_state = 42)



Using TensorFlow backend.


In [5]:
# Make the model

model = Sequential()
model.add(Dense(50, input_dim = x.shape[1], activation = 'relu')) #hidden 1
model.add(Dense(25, activation = 'relu')) #hidden 2
model.add(Dense(y.shape[1], activation = 'softmax')) #output

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

monitor = EarlyStopping(monitor = 'val_loss', min_delta = 1e-3,
                       patience = 5, verbose = 1,
                        mode = 'auto',
                       restore_best_weights = True)

model.fit(X_train, y_train, validation_data = (X_test, y_test),
         callbacks = [monitor], verbose = 2, epochs = 1000)

Train on 112 samples, validate on 38 samples
Epoch 1/1000
 - 0s - loss: 1.5549 - val_loss: 1.4721
Epoch 2/1000
 - 0s - loss: 1.3400 - val_loss: 1.3344
Epoch 3/1000
 - 0s - loss: 1.2347 - val_loss: 1.2262
Epoch 4/1000
 - 0s - loss: 1.1549 - val_loss: 1.1185
Epoch 5/1000
 - 0s - loss: 1.0775 - val_loss: 1.0378
Epoch 6/1000
 - 0s - loss: 1.0149 - val_loss: 0.9760
Epoch 7/1000
 - 0s - loss: 0.9583 - val_loss: 0.9214
Epoch 8/1000
 - 0s - loss: 0.9061 - val_loss: 0.8615
Epoch 9/1000
 - 0s - loss: 0.8486 - val_loss: 0.8128
Epoch 10/1000
 - 0s - loss: 0.8129 - val_loss: 0.7717
Epoch 11/1000
 - 0s - loss: 0.7827 - val_loss: 0.7415
Epoch 12/1000
 - 0s - loss: 0.7555 - val_loss: 0.7144
Epoch 13/1000
 - 0s - loss: 0.7290 - val_loss: 0.6886
Epoch 14/1000
 - 0s - loss: 0.7036 - val_loss: 0.6645
Epoch 15/1000
 - 0s - loss: 0.6801 - val_loss: 0.6389
Epoch 16/1000
 - 0s - loss: 0.6561 - val_loss: 0.6138
Epoch 17/1000
 - 0s - loss: 0.6338 - val_loss: 0.5884
Epoch 18/1000
 - 0s - loss: 0.6149 - val_loss:

<keras.callbacks.callbacks.History at 0x7ff6f8562650>

There are a number of parameters that are specified to EarlyStopping object.

- **min_delta** This value should be kept small. It simply means the minimum change in error to be registered as an improvement. Setting it even smaller will not likely have a great deal of impact.
- **patience** How long should the training wait for the validation error to improve?
- **verbose** HOw much progress info do you want?
- **mode** In general, always set this to auto. This allows to specify if the error should be minimized or maximized. Consider accuracy, where higher numbers are desired vs log-loss/RMSE where lower numbers are desired.
- **restore_best_weights** This should always be set to true. This restores the weights to the values they were at when the validation set is the highest. Unless you are manually tracking the weights yourself, you should have Keras perform this step for you.

As you can see, the entire number of epochs weren't used. The neural network training stopped once the validation set no longer improved

In [7]:
from sklearn.metrics import accuracy_score

pred = model.predict(X_test)
predicted_classes = np.argmax(pred, axis = 1)
expected_classes = np.argmax(y_test, axis = 1)
correct = accuracy_score(expected_classes, predicted_classes)
print(f'Accuracy: {correct}')

Accuracy: 0.9736842105263158


# Early Stopping with Regression

In [11]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
import pandas as pd
import io
import os
import requests
import numpy as np
from sklearn import metrics

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/auto-mpg.csv", 
    na_values=['NA', '?'])

cars = df['name']

# Handle missing value
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

# Pandas to Numpy
x = df[['cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin']].values
y = df['mpg'].values # regression

# Split into validation and training sets
x_train, x_test, y_train, y_test = train_test_split(    
    x, y, test_size=0.25, random_state=42)

# Build the neural network
model = Sequential()
model.add(Dense(25, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(10, activation='relu')) # Hidden 2
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto',
        restore_best_weights=True)
model.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor],verbose=2,epochs=1000)

Train on 298 samples, validate on 100 samples
Epoch 1/1000
 - 0s - loss: 2047.1332 - val_loss: 582.1701
Epoch 2/1000
 - 0s - loss: 441.9207 - val_loss: 282.8384
Epoch 3/1000
 - 0s - loss: 267.9644 - val_loss: 239.6907
Epoch 4/1000
 - 0s - loss: 229.7094 - val_loss: 197.4253
Epoch 5/1000
 - 0s - loss: 205.6010 - val_loss: 193.3625
Epoch 6/1000
 - 0s - loss: 183.9098 - val_loss: 193.4359
Epoch 7/1000
 - 0s - loss: 172.9436 - val_loss: 168.0534
Epoch 8/1000
 - 0s - loss: 155.8892 - val_loss: 158.7358
Epoch 9/1000
 - 0s - loss: 148.4894 - val_loss: 148.2871
Epoch 10/1000
 - 0s - loss: 140.6668 - val_loss: 142.7987
Epoch 11/1000
 - 0s - loss: 136.1123 - val_loss: 134.6822
Epoch 12/1000
 - 0s - loss: 122.5186 - val_loss: 121.6033
Epoch 13/1000
 - 0s - loss: 115.7824 - val_loss: 115.1229
Epoch 14/1000
 - 0s - loss: 114.7889 - val_loss: 127.0257
Epoch 15/1000
 - 0s - loss: 105.1802 - val_loss: 98.5739
Epoch 16/1000
 - 0s - loss: 95.8236 - val_loss: 91.6298
Epoch 17/1000
 - 0s - loss: 105.7377 

<keras.callbacks.callbacks.History at 0x7ff687d37850>

In [12]:
# Measure RMSE error. RMSE is common for regression
pred = model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(pred, y_test))
print(f'Final Score (RMSE): {score}')

Final Score (RMSE): 4.156305596931492
