<a href="https://colab.research.google.com/github/amitsiwach/Deep_Learning_notebooks/blob/main/All_optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [2]:
mnist = tf.keras.datasets.mnist

(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
X_train_full.shape, y_train_full.shape

((60000, 28, 28), (60000,))

In [4]:
X_test.shape, y_test.shape

((10000, 28, 28), (10000,))

In [5]:
X_valid, X_train = X_train_full[:5000]/255.0, X_train_full[5000:]/255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [6]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

LAYERS = [tf.keras.layers.Flatten(input_shape = [28,28], name = "inputLayer"),
         tf.keras.layers.Dense(300,activation="relu", name = "hiddenlayer1"),
         tf.keras.layers.Dense(100,activation="relu", name = "hiddenlayer2"),
         tf.keras.layers.Dense(10,activation="softmax", name = "outputLayer")]

EPOCHS = 50
VALIDATION_SET = (X_valid, y_valid)

### **1. Stochastic Gradient Descent(SGD)**

In [7]:
model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.SGD(0.02),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 9s - loss: 0.4494 - accuracy: 0.8790 - val_loss: 0.2397 - val_accuracy: 0.9304 - 9s/epoch - 5ms/step
Epoch 2/50
1719/1719 - 5s - loss: 0.2261 - accuracy: 0.9351 - val_loss: 0.1785 - val_accuracy: 0.9502 - 5s/epoch - 3ms/step
Epoch 3/50
1719/1719 - 4s - loss: 0.1726 - accuracy: 0.9500 - val_loss: 0.1411 - val_accuracy: 0.9604 - 4s/epoch - 2ms/step
Epoch 4/50
1719/1719 - 4s - loss: 0.1382 - accuracy: 0.9601 - val_loss: 0.1244 - val_accuracy: 0.9634 - 4s/epoch - 2ms/step
Epoch 5/50
1719/1719 - 4s - loss: 0.1148 - accuracy: 0.9672 - val_loss: 0.1071 - val_accuracy: 0.9706 - 4s/epoch - 2ms/step
Epoch 6/50
1719/1719 - 4s - loss: 0.0979 - accuracy: 0.9723 - val_loss: 0.0974 - val_accuracy: 0.9716 - 4s/epoch - 2ms/step
Epoch 7/50
1719/1719 - 4s - loss: 0.0845 - accuracy: 0.9760 - val_loss: 0.0953 - val_accuracy: 0.9732 - 4s/epoch - 2ms/step
Epoch 8/50
1719/1719 - 4s - loss: 0.0735 - accuracy: 0.9792 - val_loss: 0.0900 - val_accuracy: 0.9744 - 4s/epoch - 2ms/step
Epoch 9/

### **2. Momentum Optimization**

In [8]:
del model_clf

model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.SGD(learning_rate=0.02, momentum=0.9),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 5s - loss: 0.1122 - accuracy: 0.9645 - val_loss: 0.0864 - val_accuracy: 0.9746 - 5s/epoch - 3ms/step
Epoch 2/50
1719/1719 - 4s - loss: 0.0654 - accuracy: 0.9786 - val_loss: 0.0886 - val_accuracy: 0.9698 - 4s/epoch - 2ms/step
Epoch 3/50
1719/1719 - 4s - loss: 0.0450 - accuracy: 0.9853 - val_loss: 0.0834 - val_accuracy: 0.9762 - 4s/epoch - 2ms/step
Epoch 4/50
1719/1719 - 4s - loss: 0.0317 - accuracy: 0.9898 - val_loss: 0.0775 - val_accuracy: 0.9798 - 4s/epoch - 2ms/step
Epoch 5/50
1719/1719 - 4s - loss: 0.0228 - accuracy: 0.9927 - val_loss: 0.0783 - val_accuracy: 0.9790 - 4s/epoch - 2ms/step
Epoch 6/50
1719/1719 - 4s - loss: 0.0193 - accuracy: 0.9935 - val_loss: 0.0802 - val_accuracy: 0.9802 - 4s/epoch - 2ms/step
Epoch 7/50
1719/1719 - 4s - loss: 0.0152 - accuracy: 0.9949 - val_loss: 0.0713 - val_accuracy: 0.9812 - 4s/epoch - 2ms/step
Epoch 8/50
1719/1719 - 4s - loss: 0.0096 - accuracy: 0.9971 - val_loss: 0.0817 - val_accuracy: 0.9822 - 4s/epoch - 2ms/step
Epoch 9/

### **3.Nestrov Accelerated Gradient(NAG)**

In [9]:
del model_clf

model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.SGD(learning_rate=0.02, momentum=0.9, nesterov=True),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 5s - loss: 0.0094 - accuracy: 0.9969 - val_loss: 0.0970 - val_accuracy: 0.9782 - 5s/epoch - 3ms/step
Epoch 2/50
1719/1719 - 4s - loss: 0.0107 - accuracy: 0.9964 - val_loss: 0.0792 - val_accuracy: 0.9818 - 4s/epoch - 2ms/step
Epoch 3/50
1719/1719 - 4s - loss: 0.0055 - accuracy: 0.9984 - val_loss: 0.0727 - val_accuracy: 0.9846 - 4s/epoch - 2ms/step
Epoch 4/50
1719/1719 - 5s - loss: 0.0024 - accuracy: 0.9993 - val_loss: 0.0710 - val_accuracy: 0.9850 - 5s/epoch - 3ms/step
Epoch 5/50
1719/1719 - 4s - loss: 8.7837e-04 - accuracy: 0.9998 - val_loss: 0.0734 - val_accuracy: 0.9844 - 4s/epoch - 2ms/step
Epoch 6/50
1719/1719 - 4s - loss: 6.7114e-04 - accuracy: 0.9999 - val_loss: 0.0739 - val_accuracy: 0.9854 - 4s/epoch - 2ms/step
Epoch 7/50
1719/1719 - 4s - loss: 2.3121e-04 - accuracy: 1.0000 - val_loss: 0.0741 - val_accuracy: 0.9862 - 4s/epoch - 2ms/step
Epoch 8/50
1719/1719 - 4s - loss: 1.4092e-04 - accuracy: 1.0000 - val_loss: 0.0748 - val_accuracy: 0.9864 - 4s/epoch - 2

### **4.AdaGrad (Adaptive Gradient Algorithm)**

Adagrad stops early before reaching Global Minima, so it is not recommended to use

### **5.RMS (Root Mean Squared) Propagation**

In [10]:
del model_clf

model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01, rho=0.9), #rho is beta here
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 5s - loss: 0.4172 - accuracy: 0.9238 - val_loss: 0.3873 - val_accuracy: 0.9370 - 5s/epoch - 3ms/step
Epoch 2/50
1719/1719 - 5s - loss: 0.3536 - accuracy: 0.9389 - val_loss: 0.4298 - val_accuracy: 0.9428 - 5s/epoch - 3ms/step
Epoch 3/50
1719/1719 - 5s - loss: 0.3947 - accuracy: 0.9431 - val_loss: 0.5168 - val_accuracy: 0.9202 - 5s/epoch - 3ms/step
Epoch 4/50
1719/1719 - 5s - loss: 0.4145 - accuracy: 0.9391 - val_loss: 0.4601 - val_accuracy: 0.9216 - 5s/epoch - 3ms/step
Epoch 5/50
1719/1719 - 5s - loss: 0.4519 - accuracy: 0.9373 - val_loss: 0.4311 - val_accuracy: 0.9476 - 5s/epoch - 3ms/step
Epoch 6/50
1719/1719 - 5s - loss: 0.5009 - accuracy: 0.9284 - val_loss: 0.6449 - val_accuracy: 0.9176 - 5s/epoch - 3ms/step


### **6.Adam (Adaptive Moment Estimation) Optimization**

In [11]:
del model_clf

model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 6s - loss: 0.2382 - accuracy: 0.9419 - val_loss: 0.1862 - val_accuracy: 0.9538 - 6s/epoch - 3ms/step
Epoch 2/50
1719/1719 - 4s - loss: 0.1676 - accuracy: 0.9564 - val_loss: 0.1819 - val_accuracy: 0.9582 - 4s/epoch - 2ms/step
Epoch 3/50
1719/1719 - 4s - loss: 0.1572 - accuracy: 0.9609 - val_loss: 0.2048 - val_accuracy: 0.9626 - 4s/epoch - 3ms/step
Epoch 4/50
1719/1719 - 4s - loss: 0.1383 - accuracy: 0.9652 - val_loss: 0.2583 - val_accuracy: 0.9558 - 4s/epoch - 3ms/step
Epoch 5/50
1719/1719 - 5s - loss: 0.1332 - accuracy: 0.9664 - val_loss: 0.2389 - val_accuracy: 0.9578 - 5s/epoch - 3ms/step
Epoch 6/50
1719/1719 - 4s - loss: 0.1251 - accuracy: 0.9695 - val_loss: 0.2349 - val_accuracy: 0.9584 - 4s/epoch - 3ms/step
Epoch 7/50
1719/1719 - 4s - loss: 0.1275 - accuracy: 0.9692 - val_loss: 0.2166 - val_accuracy: 0.9650 - 4s/epoch - 3ms/step
