<a href="https://colab.research.google.com/github/emrllh/My_works/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Avoiding Overfitting Through Regularization

1. L1 and L2 Regularization  
2. Dropout
3. MC Dropout
4. Max Norm






### L1 and L2 Regularization

In [None]:
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
(X_train_full,y_train_full),(X_test,y_test)=keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
X_train,X_valid=X_train_full[:50000]/255.0,X_train_full[50000:]/255.0
y_train,y_valid=y_train_full[:50000],y_train_full[50000:]
X_test=X_test/255.0

In [None]:
y_valid[0]

9

In [None]:
from functools import partial

RegularizedDense = partial(tf.keras.layers.Dense,
                           activation="relu",
                           kernel_initializer="he_normal",
                           kernel_regularizer=tf.keras.regularizers.l2(0.01))


Or use l1(0.1) for ℓ1 regularization with a factor of 0.1, or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively

In [None]:
from functools import partial

tf.random.set_seed(42)

RegularizedDense= partial(tf.keras.layers.Dense,
                          activation='relu',
                          kernel_initializer='he_normal',
                          kernel_regularizer=tf.keras.regularizers.l2(0.01)
                          )

model=tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    RegularizedDense(100),
    RegularizedDense(100),
    RegularizedDense(10, activation='softmax'),
])


In [None]:
optimizer=tf.keras.optimizers.SGD(learning_rate=0.02)

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer,
              metrics=['accuracy'])

history=model.fit(X_train,y_train, epochs=5,
                  validation_data=(X_valid,y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Dropout

In [None]:
tf.random.set_seed(42)

model=tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(100,activation='relu',
                          kernel_initializer='he_normal'),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(100,activation='relu',
                          kernel_initializer='he_normal'),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(10,activation='softmax')

])

In [None]:
optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='sparse_categorical_crossentropy',optimizer=optimizer,
              metrics=['accuracy'])

In [None]:
history=model.fit(X_train,y_train,epochs=15,
                  validation_data=(X_valid,y_valid))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


The training accuracy looks like it's lower than the validation accuracy, but that's just because dropout is only active during training. If we evaluate the model on the training set after training (i.e., with dropout turned off), we get the "real" training accuracy, which is very slightly higher than the validation accuracy and the test accuracy:

In [None]:
model.evaluate(X_train,y_train)



[0.2912813723087311, 0.8916199803352356]

In [None]:
model.evaluate(X_test,y_test)



[0.35826289653778076, 0.8676999807357788]

Note: make sure to use `AlfaDropout` instead of `Dropout` if you want to build a self-normalizing neural net using `SELU`.

### MC Dropout

In [None]:
y_probas = np.stack([model(X_test, training=True)
                     for sample in range(100)])

In [None]:
tf.random.set_seed(42)
y_probas=np.stack([model(X_test,training=True) for sample in range(100)])
y_probas.shape,X_test.shape


((100, 10000, 10), (10000, 28, 28))

In [None]:
y_proba=y_probas.mean(axis=0)
y_proba

array([[4.9172051e-04, 2.0725818e-04, 1.4942739e-04, ..., 1.4094278e-01,
        9.3070423e-04, 8.0305582e-01],
       [7.6556054e-04, 9.3677481e-06, 9.3398565e-01, ..., 9.5996563e-07,
        2.8183791e-05, 2.9590863e-06],
       [2.1512278e-05, 9.9975145e-01, 4.0346945e-06, ..., 2.0053508e-09,
        1.3373514e-07, 6.5583192e-09],
       ...,
       [2.1891221e-03, 1.5594956e-05, 4.4174623e-04, ..., 8.8276072e-05,
        9.9378240e-01, 1.8065244e-05],
       [2.8802606e-05, 9.9048287e-01, 3.7987131e-05, ..., 1.3543557e-06,
        4.2047029e-07, 3.3670003e-07],
       [2.9330789e-03, 4.7085609e-04, 5.7213325e-03, ..., 9.8057121e-02,
        1.4267991e-01, 3.7976140e-03]], dtype=float32)

In [None]:
model.predict(X_test[:1]).round(3)



array([[0.   , 0.   , 0.   , 0.   , 0.   , 0.021, 0.   , 0.105, 0.   ,
        0.874]], dtype=float32)

In [None]:
y_proba[0].round(3)

array([0.   , 0.   , 0.   , 0.001, 0.001, 0.051, 0.001, 0.141, 0.001,
       0.803], dtype=float32)

In [None]:
y_std=y_probas.std(axis=0)
y_std[0].round(3)

array([0.002, 0.001, 0.   , 0.005, 0.007, 0.077, 0.007, 0.125, 0.003,
       0.151], dtype=float32)

In [None]:
y_pred=y_proba.argmax(axis=1)
accuracy = (y_pred==y_test).sum() / len(y_test)
accuracy

0.8669

In [None]:
y_pred

array([9, 2, 1, ..., 8, 1, 5])

In [None]:
y_proba

array([[4.9172051e-04, 2.0725818e-04, 1.4942739e-04, ..., 1.4094278e-01,
        9.3070423e-04, 8.0305582e-01],
       [7.6556054e-04, 9.3677481e-06, 9.3398565e-01, ..., 9.5996563e-07,
        2.8183791e-05, 2.9590863e-06],
       [2.1512278e-05, 9.9975145e-01, 4.0346945e-06, ..., 2.0053508e-09,
        1.3373514e-07, 6.5583192e-09],
       ...,
       [2.1891221e-03, 1.5594956e-05, 4.4174623e-04, ..., 8.8276072e-05,
        9.9378240e-01, 1.8065244e-05],
       [2.8802606e-05, 9.9048287e-01, 3.7987131e-05, ..., 1.3543557e-06,
        4.2047029e-07, 3.3670003e-07],
       [2.9330789e-03, 4.7085609e-04, 5.7213325e-03, ..., 9.8057121e-02,
        1.4267991e-01, 3.7976140e-03]], dtype=float32)

In [None]:
y_proba.argmax(),y_proba.argmax(axis=1)

(12895, array([9, 2, 1, ..., 8, 1, 5]))

In [None]:
class MCDropout(tf.keras.layers.Dropout):
  def call(self,input,training=None):
    return super().call(inputs, training=True)


In [None]:
#how to convert Dropout to MCDropout in a Sequential model

Dropout =tf.keras.layers.Dropout
mc_model = tf.keras.Sequential([
    MCDropout(layer.rate) if isinstance(layer, Dropout) else layer
    for layer in model.layers
])

mc_model.set_weights(model.get_weights)

NameError: Exception encountered when calling layer "mc_dropout" (type MCDropout).

in user code:

    File "<ipython-input-47-e01b42038de2>", line 3, in call  *
        return super().call(inputs, training=True)

    NameError: name 'inputs' is not defined


Call arguments received by layer "mc_dropout" (type MCDropout):
  • input=tf.Tensor(shape=(None, 784), dtype=float32)
  • training=None

In [None]:
x = np.arange(12).reshape((3,4))
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
x.argmax(axis=0),x.argmax()

(array([2, 2, 2, 2]), 11)

### Max Norm

Weight constraints provide an approach to reduce the overfitting of a deep learning neural network model on the training data and improve the performance of the model on new data, such as the holdout test set.

There are multiple types of weight constraints, such as maximum and unit vector norms, and some require a hyperparameter that must be configured.

https://machinelearningmastery.com/how-to-reduce-overfitting-in-deep-neural-networks-with-weight-constraints-in-keras/

In [None]:
dense= tf.keras.layers.Dense(
    100,activation='relu', kernel_initializer='he_normal',
    kernel_constraint= tf.keras.constraints.max_norm(1.)
)

In [None]:
#how to apply max norm to every hidden layer in a model
MaxNormDense= partial(tf.keras.layers.Dense,
                      activation='relu', kernel_initializer='he_normal',
                      kernel_constraint=tf.keras.constraints.max_norm(1.)
                      )

tf.random.set_seed(42)

model=tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    MaxNormDense(100),
    MaxNormDense(100),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [None]:
optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer,
              metrics=['accuracy']
              )

history= model.fit(X_train,y_train,epochs=10,
                   validation_data=(X_valid,y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
