# Caelan Osman
# Homework 19.3
# March 31, 2022

## Exercise 19.7

As we can see, the model we had with the largest parameters (from the last homework) was the model with 1.57 million parameters given by layers with widths [1000, 500, 300, 200, 100].
We rebuild that model here and experiment with it.


In [2]:
import keras
import time
import numpy as np
import tensorflow as tf
from keras.datasets import fashion_mnist
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7



In [3]:
from keras.backend import batch_normalization
########################
# HELPER CODE FOR TRAINING
########################
def prepare_data():
    """
    This function prepares the data for training
    """
    (X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
    # Rescale the inputs to be in the interval [-0.5,0.5], 
    X_train_full = X_train_full/255 - 0.5  # Rescale the training set input
    X_test = X_test/255  - 0.5   # Apply the same transform to the test set

    # make a validation split beyond the basic test/train
    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full,
                                                      test_size = 0.2, 
                                                      random_state = 42)
    
    return X_train, X_val, y_train, y_val, X_test, y_test

   
def build_model(widths=[], l2=0, dropout=0, batch = False):
    """
    This function builds a model with the given widths for the dense layers
    """

    if dropout != 0 and batch:
        raise ValueError('Cannot apply batch and dropout at the same time')


    # add initial layers
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=[28, 28]))
    

    # add as many dense layers as we want
    for i, w in enumerate(widths):
        
        # add regularization
        if l2 != 0:
            model.add(keras.layers.Dense(w, activation="relu",
                                         activity_regularizer = tf.keras.regularizers.l2(l2)))
        else:
            model.add(keras.layers.Dense(w, activation="relu"))

        # add dropout model
        if dropout != 0 and i == 2:
            model.add(tf.keras.layers.Dropout(dropout, input_shape = (w, )))



    if batch:
        model.add(tf.keras.layers.BatchNormalization(
            axis=-1,
            momentum=0.99,
            epsilon=0.001,
            center=True,
            scale=True,
            beta_initializer="zeros",
            gamma_initializer="ones",
            moving_mean_initializer="zeros",
            moving_variance_initializer="ones",
            beta_regularizer=None,
            gamma_regularizer=None,
            beta_constraint=None,
            gamma_constraint=None
            ))

    # add softmax layer
    model.add(keras.layers.Dense(10, activation="softmax"))
    

    return model
    
def set_optimizer(learning_rate=0.2, decay=1e-4, momentum=0.8, nesterov=False, Adam=False):
    """
    This function returns the appropriate optimizer
    """
    if Adam:
        return keras.optimizers.Adam(lr=learning_rate)
    else:
        ## Learning rate with decay
        ## decay works as   lr *= (1. / (1. + decay * iterations))
        opt = tf.keras.optimizers.SGD(learning_rate = learning_rate, 
                                      decay=decay, 
                                      momentum=momentum, 
                                      nesterov=nesterov)
        
        return opt
    
def train_models(model, opt, X_train, y_train, X_val, y_val,  batch_size=32, epochs=12, verbose=0):
    """
    This function trains the model
    """
    model.compile(loss="sparse_categorical_crossentropy",
                      optimizer=opt,
                      metrics=["accuracy"])
    
    history = model.fit(X_train, y_train, 
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=verbose,
                    validation_data=(X_val, y_val)
                   )
    return model, history


def plot_history(history, title=""):
    """
    This function plots the history
    """
    # Plot the history
    plt.figure(figsize=(8,4),dpi=100)

    ax = plt.subplot(1,2,1)
    ax.plot(history.history['accuracy'], label='training accuracy')
    ax.plot(history.history['val_accuracy'], label='validation accuracy')
    ax.grid(True, alpha=0.2)
    ax.legend(frameon=False)
    ax.set_xlabel('Epoch')
    ax.grid(True, alpha=0.3)

    ax = plt.subplot(1,2,2)
    ax.plot(history.history['loss'], label='training loss')
    ax.plot(history.history['val_loss'], label='validation loss')
    ax.legend(frameon=False)
    ax.set_xlabel('Epoch')
    ax.grid(True, alpha=0.3)
    #plt.ylim(0,1);
    
    plt.suptitle(title)
    plt.show()
    
    return

In [4]:
# get best model
def get_best_model():

    # get training, validat, and testing data 
    X_train, X_val, y_train, y_val, X_test, y_test = prepare_data()

    # initialization best validation score
    best_val_score = -np.inf

    # define layers
    layers = [1000, 500, 300, 200, 100]
    # l2, dropout, batch parameters
    l2_params = [1e-4, 1e-3, 1e-2, 0, 0, 0, 0]
    dropout_params = [0.05, 0.01, 0, 0.1, 0.25, 0.25, 0]
    batch_params = [False, False, True, False, False, False, True]

    # P, LR, DR paramters
    P = [0.85, 0.8, 0.65]
    LR = [0.1, 0.2, 0.3]
    DR = [1e-4, 1e-3, 1e-2] 

    for l2, drop, batch in zip(l2_params, dropout_params, batch_params):
        
        # get model
        model = build_model(widths=layers, l2=l2, dropout=drop, batch=batch  )

        for p, lr, dr in zip(P, LR, DR):
            # get optimizer
            opt = set_optimizer(learning_rate=lr, decay=dr, momentum=p)

            # train model
            model, _ = train_models(model, opt, X_train, y_train, X_val, y_val)

            # get validation accuracy
            _, accuracy = model.evaluate(X_val, y_val)

            # now save the best model 
            if accuracy > best_val_score:
                best_params = {"layers": layers, "l2" : l2, "drop" :drop, 
                               "batch" : batch, "p" :p, 
                               "lr" :lr, "dr": dr} 
                best_val_score = accuracy
                best_model = model

    return best_model, best_params, best_val_score

best_model, best_params, best_val_score = get_best_model()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [7]:
X_train, X_val, y_train, y_val, X_test, y_test = prepare_data()

loss, accuracy = best_model.evaluate(X_test, y_test) 

print('Best Hyper Parameters:')
for key in list(best_params.keys()):
    print(key, best_params[key], sep=": ")

print('Best model Test Score: ', accuracy)
print('Best model Loss: ', loss)
best_model.summary()

Best Hyper Parameters:
layers: [1000, 500, 300, 200, 100]
l2: 0
drop: 0.1
batch: False
p: 0.8
lr: 0.2
dr: 0.001
Best model Test Score:  0.8956000208854675
Best model Loss:  0.4219379425048828
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 dense_18 (Dense)            (None, 1000)              785000    
                                                                 
 dense_19 (Dense)            (None, 500)               500500    
                                                                 
 dense_20 (Dense)            (None, 300)               150300    
                                                                 
 dropout_2 (Dropout)         (None, 300)               0         
                                                            