# Chapter 11 - Deep Network on CIFAR10 Dataset

In [57]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd 
import numpy as np 
import ssl
import matplotlib.pyplot as plt 
from functools import partial 
import os

In [58]:
ssl._create_default_https_context = ssl._create_unverified_context

In [59]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()

In [60]:
X_train, y_train, X_valid, y_valid = X_train_full[:40000], y_train_full[:40000], X_train_full[40000:], y_train_full[40000:]

Default DNN Architecture 
- LeCun Initialiation
- ELU Activation

In [61]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

DenseLayer = partial(
    keras.layers.Dense, 
    activation = 'elu',
    kernel_initializer = 'he_normal'
)

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(DenseLayer(100))

In [62]:
model.add(keras.layers.Dense(10, activation='softmax'))

In [63]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 3072)              0         
                                                                 
 dense (Dense)               (None, 100)               307300    
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 100)               10100     
                                                                 
 dense_3 (Dense)             (None, 100)               10100     
                                                                 
 dense_4 (Dense)             (None, 100)               10100     
                                                                 
 dense_5 (Dense)             (None, 100)               1

In [64]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint('models/ch11_model.h5', save_best_only=True)
run_index = 1
run_logdir = os.path.join(os.curdir, 'cifar10_logs', 'run_{:03d}'.format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

In [65]:
# default learning rate works alright, but not as good as 5e-5
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.Nadam(learning_rate=5e-5),
              metrics=["accuracy"])

In [66]:
model.fit(
    X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[callbacks]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


<keras.callbacks.History at 0x2473926f130>

In [67]:
model.evaluate(X_test, y_test)



[1.5956181287765503, 0.4578000009059906]

Using Batch Normalization 

In [68]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [69]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32,32,3]))
model.add(keras.layers.BatchNormalization())
for _ in range(20):
    model.add(keras.layers.Dense(100, kernel_initializer='he_normal'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('elu'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [70]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 3072)              0         
                                                                 
 batch_normalization (BatchN  (None, 3072)             12288     
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 100)               307300    
                                                                 
 batch_normalization_1 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 100)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               1

In [71]:
# callback method to decrease lr exponentially after 10 epochs
# NOT FOR FINDING OPTIMAL RATE, METHOD OF TRAINING THAT DECREASES LR AS CONVERGENCE BEGINS
def exp_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    return lr * tf.math.exp(-0.1)

In [72]:
lr_callback = tf.keras.callbacks.LearningRateScheduler(exp_scheduler)

In [73]:
model_checkpoint_cb = keras.callbacks.ModelCheckpoint('models/ch11_model_batchnorm.h5', save_best_only=True)
run_logdir = os.path.join(os.curdir, 'cifar10_logs_batchnorm', 'run_{:03d}'.format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

In [74]:
optimizer = keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(
    loss='sparse_categorical_crossentropy', optimizer = optimizer, metrics=['accuracy'] 
)

In [75]:
history = model.fit(
    X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), 
    callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100


In [76]:
model.evaluate(X_test, y_test)



[1.4547209739685059, 0.5253999829292297]

Batch Norm and Convolutions 

In [84]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

In [85]:
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(32,32,3)))
model.add(keras.layers.MaxPool2D((2,2)))
model.add(keras.layers.Conv2D(32, (3,3), activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Flatten(input_shape=[32,32,3]))
model.add(keras.layers.BatchNormalization())
for _ in range(20):
    model.add(keras.layers.Dense(100, kernel_initializer='he_normal'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('elu'))
model.add(keras.layers.Dense(10, activation='softmax'))

In [86]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 16)        448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 16)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 6, 32)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 4, 4, 64)          18496     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 2, 2, 64)         0

In [87]:
# same learning rate with conv layers?
optimizer = keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(
    loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']
)

In [88]:
model_checkpoint_cb = keras.callbacks.ModelCheckpoint('models/ch11_model_batchnorm_conv.h5', save_best_only=True)
run_logdir = os.path.join(os.curdir, 'cifar10_logs_batchnorm_conv', 'run_{:03d}'.format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

In [89]:
conv_hisotry = model.fit(
    X_train, y_train, epochs=100, validation_data=(X_valid, y_valid),
    callbacks=[early_stopping_cb, model_checkpoint_cb, tensorboard_cb]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100

In [None]:
model.evaluate(X_test, y_test)

Add dropout to model