<a href="https://colab.research.google.com/github/amitsiwach/Deep_Learning_notebooks/blob/main/All_optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [2]:
mnist = tf.keras.datasets.mnist

(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()

In [3]:
X_train_full.shape, y_train_full.shape

((60000, 28, 28), (60000,))

In [4]:
X_test.shape, y_test.shape

((10000, 28, 28), (10000,))

In [5]:
X_valid, X_train = X_train_full[:5000]/255.0, X_train_full[5000:]/255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [6]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

LAYERS = [tf.keras.layers.Flatten(input_shape = [28,28], name = "inputLayer"),
         tf.keras.layers.Dense(300,activation="relu", name = "hiddenlayer1"),
         tf.keras.layers.Dense(100,activation="relu", name = "hiddenlayer2"),
         tf.keras.layers.Dense(10,activation="softmax", name = "outputLayer")]

EPOCHS = 50
VALIDATION_SET = (X_valid, y_valid)

### **1. SGD**

In [7]:
model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.SGD(0.02),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 12s - loss: 0.4430 - accuracy: 0.8819 - val_loss: 0.2374 - val_accuracy: 0.9350 - 12s/epoch - 7ms/step
Epoch 2/50
1719/1719 - 7s - loss: 0.2217 - accuracy: 0.9361 - val_loss: 0.1725 - val_accuracy: 0.9562 - 7s/epoch - 4ms/step
Epoch 3/50
1719/1719 - 8s - loss: 0.1697 - accuracy: 0.9517 - val_loss: 0.1397 - val_accuracy: 0.9630 - 8s/epoch - 4ms/step
Epoch 4/50
1719/1719 - 7s - loss: 0.1371 - accuracy: 0.9603 - val_loss: 0.1246 - val_accuracy: 0.9662 - 7s/epoch - 4ms/step
Epoch 5/50
1719/1719 - 8s - loss: 0.1138 - accuracy: 0.9678 - val_loss: 0.1105 - val_accuracy: 0.9686 - 8s/epoch - 4ms/step
Epoch 6/50
1719/1719 - 8s - loss: 0.0970 - accuracy: 0.9724 - val_loss: 0.0990 - val_accuracy: 0.9706 - 8s/epoch - 5ms/step
Epoch 7/50
1719/1719 - 8s - loss: 0.0840 - accuracy: 0.9763 - val_loss: 0.0941 - val_accuracy: 0.9710 - 8s/epoch - 5ms/step
Epoch 8/50
1719/1719 - 8s - loss: 0.0735 - accuracy: 0.9793 - val_loss: 0.0844 - val_accuracy: 0.9764 - 8s/epoch - 4ms/step
Epoch 

### **2. Momentum Optimization**

In [8]:
del model_clf

model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.SGD(learning_rate=0.02, momentum=0.9),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 9s - loss: 0.1138 - accuracy: 0.9636 - val_loss: 0.0875 - val_accuracy: 0.9768 - 9s/epoch - 5ms/step
Epoch 2/50
1719/1719 - 8s - loss: 0.0670 - accuracy: 0.9785 - val_loss: 0.0808 - val_accuracy: 0.9770 - 8s/epoch - 5ms/step
Epoch 3/50
1719/1719 - 7s - loss: 0.0461 - accuracy: 0.9850 - val_loss: 0.0829 - val_accuracy: 0.9762 - 7s/epoch - 4ms/step
Epoch 4/50
1719/1719 - 8s - loss: 0.0311 - accuracy: 0.9897 - val_loss: 0.0812 - val_accuracy: 0.9810 - 8s/epoch - 5ms/step
Epoch 5/50
1719/1719 - 9s - loss: 0.0255 - accuracy: 0.9911 - val_loss: 0.0922 - val_accuracy: 0.9770 - 9s/epoch - 5ms/step
Epoch 6/50
1719/1719 - 8s - loss: 0.0195 - accuracy: 0.9933 - val_loss: 0.0868 - val_accuracy: 0.9796 - 8s/epoch - 5ms/step
Epoch 7/50
1719/1719 - 8s - loss: 0.0151 - accuracy: 0.9948 - val_loss: 0.0871 - val_accuracy: 0.9814 - 8s/epoch - 5ms/step


### **3.Nestrov Accelerated Gradient(NAG)**

In [9]:
del model_clf

model_clf = tf.keras.models.Sequential(LAYERS)

model_clf.compile(loss = tf.losses.sparse_categorical_crossentropy,
                  optimizer = tf.keras.optimizers.SGD(learning_rate=0.02, momentum=0.9, nesterov=True),
                  metrics = ["accuracy"])

history = model_clf.fit(X_train, y_train, epochs= EPOCHS, validation_data= VALIDATION_SET, verbose=2, batch_size=32, callbacks=[early_stopping_cb])

Epoch 1/50
1719/1719 - 8s - loss: 0.0442 - accuracy: 0.9863 - val_loss: 0.0753 - val_accuracy: 0.9798 - 8s/epoch - 5ms/step
Epoch 2/50
1719/1719 - 7s - loss: 0.0324 - accuracy: 0.9889 - val_loss: 0.0803 - val_accuracy: 0.9790 - 7s/epoch - 4ms/step
Epoch 3/50
1719/1719 - 9s - loss: 0.0253 - accuracy: 0.9911 - val_loss: 0.0879 - val_accuracy: 0.9774 - 9s/epoch - 5ms/step
Epoch 4/50
1719/1719 - 8s - loss: 0.0201 - accuracy: 0.9936 - val_loss: 0.0785 - val_accuracy: 0.9826 - 8s/epoch - 4ms/step
Epoch 5/50
1719/1719 - 8s - loss: 0.0131 - accuracy: 0.9957 - val_loss: 0.0727 - val_accuracy: 0.9798 - 8s/epoch - 5ms/step
Epoch 6/50
1719/1719 - 8s - loss: 0.0122 - accuracy: 0.9962 - val_loss: 0.0745 - val_accuracy: 0.9830 - 8s/epoch - 5ms/step
Epoch 7/50
1719/1719 - 8s - loss: 0.0076 - accuracy: 0.9976 - val_loss: 0.0899 - val_accuracy: 0.9816 - 8s/epoch - 5ms/step
Epoch 8/50
1719/1719 - 8s - loss: 0.0044 - accuracy: 0.9987 - val_loss: 0.0790 - val_accuracy: 0.9836 - 8s/epoch - 4ms/step
Epoch 9/