# Early stopping: Optimizing the optimization

- You can use early stopping to stop optimization when it isn't helping any more. Since the optimization stops automatically when it isn't helping, you can also set a high value for `epochs` in your call to `.fit()`.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

# Import titanic data
titanic = pd.read_csv('datasets/titanic_all_numeric.csv')
titanic.head()

2023-05-24 17:33:54.058556: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-24 17:33:54.058580: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Unnamed: 0,survived,pclass,age,sibsp,parch,fare,male,age_was_missing,embarked_from_cherbourg,embarked_from_queenstown,embarked_from_southampton
0,0,3,22.0,1,0,7.25,1,False,0,0,1
1,1,1,38.0,1,0,71.2833,0,False,1,0,0
2,1,3,26.0,0,0,7.925,0,False,0,0,1
3,1,1,35.0,1,0,53.1,0,False,0,0,1
4,0,3,35.0,0,0,8.05,1,False,0,0,1


In [2]:
# Initialize features and labels
features = np.array(titanic.drop('survived', axis=1), dtype = np.float32)
labels = np.array(pd.get_dummies(titanic['survived']), dtype= np.float32)

In [5]:
# Train test split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                   test_size=0.3,
                                                   random_state=42)

In [6]:
# Total features
n_cols = features.shape[1]
n_cols

10

In [10]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

# Specifying model architecture
model = Sequential()

# Add hidden layers
model.add(Dense(16, activation='relu', input_shape=(n_cols,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='relu'))

# Output layer
model.add(Dense(2, activation='softmax'))

# Model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 16)                176       
                                                                 
 dense_5 (Dense)             (None, 8)                 136       
                                                                 
 dense_6 (Dense)             (None, 4)                 36        
                                                                 
 dense_7 (Dense)             (None, 2)                 10        
                                                                 
Total params: 358
Trainable params: 358
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
# Early Stopping while fitting 
from tensorflow.keras.callbacks import EarlyStopping

# Define early_stopping_monitor -- Stop optimization when the validation loss hasn't improved for 2 epochs by specifying the patience parameter of EarlyStopping() to be 2
early_stopping_monitor = EarlyStopping(patience=2)

# Fit the model
model.fit(X_train, y_train, epochs=30, validation_split=0.3, callbacks=[early_stopping_monitor])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fc084120520>

Though specified epochs of 30, the training stopped at epoch 14 since the accuracy was not improving every since. This allowed to specify higher epochs without worrying for computational cost and also with this you don't have to guess the epoch as the training stops automatically at optimal point.

In [14]:
# Predict on test data
pred = model.predict(X_test)

# Convert into binary prediction
bi_pred = np.where(pred>0.5, 1, 0)



In [17]:
# Compute accuracy
from sklearn.metrics import accuracy_score

accuracy_score(y_test, bi_pred)

0.6902985074626866

# Building digit recognition model

In [53]:
mnist = pd.read_csv('datasets/mnist.csv', header=None)
mnist.shape

(2001, 785)

In [54]:
# Features and labels
features_mnist = np.array(mnist.drop(0,axis=1),dtype = np.float32)
labels_mnist = np.array(pd.get_dummies(mnist[0], dtype = np.float32)) 

In [55]:
# Train-test split
X_train_mn, X_test_mn, y_train_mn, y_test_mn = train_test_split(features_mnist, labels_mnist,
                                                               test_size=0.3, random_state=42)

In [66]:
# Specify model architecture
model_mnist = Sequential()

# Add hidden layers
model_mnist.add(Dense(64, activation='relu', input_shape=(784,)))
model_mnist.add(Dense(64, activation='relu'))

# Output layer
model_mnist.add(Dense(10, activation='softmax'))

# Model summary
model_mnist.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_42 (Dense)            (None, 64)                50240     
                                                                 
 dense_43 (Dense)            (None, 64)                4160      
                                                                 
 dense_44 (Dense)            (None, 10)                650       
                                                                 
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


In [67]:
# Compile model
model_mnist.compile(optimizer=tf.keras.optimizers.Adam(0.01), #<-- lr = 0.01
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

In [69]:
# Early stopping
early_stopping_monitor_mnist = EarlyStopping(patience=3)

In [70]:
# Fit the model on training set
model_mnist.fit(X_train_mn, y_train_mn, 
                epochs=30, 
                validation_split=0.2,
                callbacks=[early_stopping_monitor_mnist])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x7fbff4213d90>

In [77]:
# Predict the test set
pred_mnist = model_mnist.predict(X_test_mn)



In [84]:
# Find the indices of the maximum values along each row
max_indices = np.argmax(pred_mnist, axis=1)

# Create a new array with all values set to 0
result = np.zeros_like(pred_mnist)

# Set the maximum value in each row to 1
result[np.arange(len(pred_mnist)), max_indices] = 1

# Print the result -- binary representation of predicted output
print(result)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 1. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


In [85]:
result[4]

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [92]:
# Compute accuracy
accuracy_score(y_test_mn,result)

0.5557404326123128