## Deeper Dense Models (Adam optimizer with Dropout)

In [1]:
import pandas as pd
import numpy as np

In [2]:
%run load_mnist_dataset_flat.py

In [3]:
## Script create_dense_model.py
import tensorflow as tf
import numpy as np
from tensorflow import keras
DIGIT_CLASSES = 10
RESHAPED = 28 * 28 ## 784 pixels per image
import time
from datetime import datetime


def create_dense_model(hidden_layers=1, hidden_units_per_layer=128):
    """
    create a keras sequential model with the specified
    number of hidden_layers and number of units per layer
    """
    DIGIT_CLASSES = 10
    RESHAPED = 28 * 28 ## 784 pixels per image
    DROPOUT = 0.3
    model = tf.keras.models.Sequential()
    count = 0
    
    
    count += 1
    model.add(keras.layers.Dense(units=hidden_units_per_layer,
        input_shape=(RESHAPED,),
        name=f'dense_layer_{count}', activation='relu'))
    
    for i in range(1, hidden_layers):
        count += 1
        model.add(keras.layers.Dense(units=hidden_units_per_layer,
           name=f'dense_layer_{count}', activation='relu'))
        ## Droput added after each hidden layer to help with regularization
        model.add(keras.layers.Dropout(DROPOUT))
        
    count += 1        
    model.add(keras.layers.Dense(DIGIT_CLASSES,
       name=f'dense_layer_{count}', activation='softmax'))

    # summary of the model
    model.summary()
    return model


def run_dense_model(hidden_layers=1, hidden_units_per_layer=128, optimizer='SGD', verbose=1):
    """
    Create and run a dense Keras sequential model over the MNIST training data,
    using the specified numbers of hidden_layers, hidden_units_per_layer, and optimizer.
    Maybe add optional dropout TBD,
    Return values:
    history, training_time, test_accuracy, model
    """
    from keras.callbacks import EarlyStopping
    early_stopping = EarlyStopping(monitor='val_loss',verbose=1,patience=3,min_delta=0.0001)
    
    BATCH_SIZE = 128
    EPOCHS = 400
    VALIDATION_SPLIT = 0.2
    
    ## Load the training and test data
    X_train, X_test, Y_train, Y_test = load_mnist_dataset_flat()
    
    model = create_dense_model(hidden_layers, hidden_units_per_layer)
    
    start_time = time.time()

    # compiling the model
    model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

    #training the model
    history = model.fit(X_train, Y_train,
                        batch_size=BATCH_SIZE, epochs=EPOCHS,
                        verbose=verbose, validation_split=VALIDATION_SPLIT,
                        callbacks=[early_stopping]
                       )
    
    elapsed_time = time.time() - start_time
    
    test_loss, test_accuracy = model.evaluate(X_test, Y_test)
    print(f'{hidden_layers=} {hidden_units_per_layer=} {optimizer=}')
    print(f'training elapsed_time = {elapsed_time:10.1f}')
    print('Test accuracy:', test_accuracy)
    
    return history, elapsed_time, test_accuracy, model

In [4]:

model = create_dense_model(hidden_layers=1, hidden_units_per_layer=64)
model.summary

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_layer_1 (Dense)       (None, 64)                50240     
                                                                 
 dense_layer_2 (Dense)       (None, 10)                650       
                                                                 
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


<bound method Model.summary of <keras.engine.sequential.Sequential object at 0x0000018A0AC16D00>>

In [5]:
dense1 = pd.read_csv('data/dense_network_comparisons.csv')
display(dense1)

Unnamed: 0,hidden_layers,hidden_units_per_layer,elapsed_time,test_accuracy
0,1.0,64.0,256.643188,0.9716
1,1.0,128.0,269.81577,0.9742
2,1.0,192.0,265.792969,0.9763
3,2.0,64.0,134.144796,0.969
4,2.0,128.0,148.220814,0.9734
5,2.0,192.0,147.220577,0.9746
6,3.0,64.0,94.2108,0.9673
7,3.0,128.0,106.395729,0.9729
8,3.0,192.0,87.828249,0.9749
9,4.0,64.0,69.781558,0.9695


In [6]:
dense2 = pd.read_csv('data/dense_network_comparisons2.csv')
display(dense2)

Unnamed: 0,hidden_layers,hidden_units_per_layer,elapsed_time,test_accuracy
0,1.0,64.0,21.059321,0.9735
1,1.0,128.0,19.19761,0.9765
2,1.0,192.0,17.795359,0.9792
3,2.0,64.0,15.609005,0.9716
4,2.0,128.0,13.113215,0.9759
5,2.0,192.0,13.247705,0.9776
6,3.0,64.0,15.438182,0.9726
7,3.0,128.0,13.106203,0.9698
8,3.0,192.0,14.660831,0.9793
9,4.0,64.0,20.492837,0.9721


In [7]:
dropout1 = pd.read_csv('data/dense_network_comparisons.csv')
display(dropout1)

Unnamed: 0,hidden_layers,hidden_units_per_layer,elapsed_time,test_accuracy
0,1.0,64.0,256.643188,0.9716
1,1.0,128.0,269.81577,0.9742
2,1.0,192.0,265.792969,0.9763
3,2.0,64.0,134.144796,0.969
4,2.0,128.0,148.220814,0.9734
5,2.0,192.0,147.220577,0.9746
6,3.0,64.0,94.2108,0.9673
7,3.0,128.0,106.395729,0.9729
8,3.0,192.0,87.828249,0.9749
9,4.0,64.0,69.781558,0.9695


In [8]:
dropout2 = pd.read_csv('data/dense_network_comparisons2.csv')
display(dropout2)

Unnamed: 0,hidden_layers,hidden_units_per_layer,elapsed_time,test_accuracy
0,1.0,64.0,21.059321,0.9735
1,1.0,128.0,19.19761,0.9765
2,1.0,192.0,17.795359,0.9792
3,2.0,64.0,15.609005,0.9716
4,2.0,128.0,13.113215,0.9759
5,2.0,192.0,13.247705,0.9776
6,3.0,64.0,15.438182,0.9726
7,3.0,128.0,13.106203,0.9698
8,3.0,192.0,14.660831,0.9793
9,4.0,64.0,20.492837,0.9721


In [9]:
LABEL = 'Adam_Dropout'
OPTIMIZER = 'Adam'
units = [64,128,192]
hidden_layers = [1,2,3,4]

test_data2 = []

for layers in hidden_layers:
    for hidden_units in units:
        history, elapsed_time, test_accuracy, model = \
        run_dense_model(hidden_layers=layers, hidden_units_per_layer=hidden_units, verbose=0, optimizer=OPTIMIZER)
        
        data2 = (layers, hidden_units, elapsed_time, test_accuracy)
        
        test_data2.append(data2)
        
        
df = pd.DataFrame(test_data2)
df.columns = ['hidden_layers', 'hidden_units_per_layer', 'elapsed_time', 'test_accuracy']
display(df)
path = f'data/DENSE_{LABEL}'
now = datetime.now()
result_name = now.strftime(f"{path}_%Y-%m-%d_%H_%M.csv")
print(f'Saving results to file {result_name}')
df.to_csv(result_name, index=False)

60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_layer_1 (Dense)       (None, 64)                50240     
                                                                 
 dense_layer_2 (Dense)       (None, 10)                650       
                                                                 
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________
Epoch 23: early stopping
hidden_layers=1 hidden_units_per_layer=64 optimizer='Adam'
training elapsed_time =       30.9
Test accuracy: 0.9728000164031982
60000 train samples
10000 test samples
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_layer_1 (Dense)       (None, 128)               100480

 Layer (type)                Output Shape              Param #   
 dense_layer_1 (Dense)       (None, 128)               100480    
                                                                 
 dense_layer_2 (Dense)       (None, 128)               16512     
                                                                 
 dropout_5 (Dropout)         (None, 128)               0         
                                                                 
 dense_layer_3 (Dense)       (None, 128)               16512     
                                                                 
 dropout_6 (Dropout)         (None, 128)               0         
                                                                 
 dense_layer_4 (Dense)       (None, 10)                1290      
                                                                 
Total params: 134,794
Trainable params: 134,794
Non-trainable params: 0
_________________________________________________________________
Epoc

Unnamed: 0,hidden_layers,hidden_units_per_layer,elapsed_time,test_accuracy
0,1,64,30.902522,0.9728
1,1,128,22.212532,0.978
2,1,192,16.219516,0.979
3,2,64,26.160845,0.9766
4,2,128,21.860066,0.979
5,2,192,16.069946,0.9792
6,3,64,27.400348,0.9734
7,3,128,19.842423,0.9774
8,3,192,15.032671,0.9765
9,4,64,35.237027,0.9723


Saving results to file data/DENSE_Adam_Dropout_2022-09-30_22_34.csv
