# Importing packages and libraries

In [1]:
# utility packages
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

# tensorflow 
import tensorflow as tf

# random package
import random 

# warnings 
import warnings 
warnings.filterwarnings('ignore')

### Checking for GPU usage

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':  
  print(f'No GPU was found.')
else:
  print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


### Loading the MNIST dataset

In [3]:
from keras.datasets import mnist

In [4]:
### unzipping the mnist dataset 

(xTrain,yTrainLabel),(xTest,yTestLabel) = mnist.load_data()

### Manipulation the MNIST dataset

In [5]:
### One-hot encoding the training and test labels

classes = 10 # 0-9 categories for the num_classes parameter

# training
yTrainCat = tf.keras.utils.to_categorical(y = yTrainLabel, num_classes = classes, dtype = 'float32')

# testing
yTestCat = tf.keras.utils.to_categorical(y = yTestLabel, num_classes = classes, dtype = 'float32')

In [6]:
### Shape and datatype of MNIST dataset

print(f'Training dataset shape: {xTrain.shape} | Training dataset datatype: {xTrain.dtype} \nTesting dataset shape: {xTest.shape} | Testing dataset datatype: {xTrain.dtype}')
print(f'Training dataset pixel range: {(np.min(xTrain),np.max(xTrain))} | Testing dataset pixel range: {(np.min(xTest),np.max(xTest))}' )


Training dataset shape: (60000, 28, 28) | Training dataset datatype: uint8 
Testing dataset shape: (10000, 28, 28) | Testing dataset datatype: uint8
Training dataset pixel range: (0, 255) | Testing dataset pixel range: (0, 255)


**Need to reshape, change the datatype, and min_max scale the tensor values**

In [7]:
### Manipulate the training and testing input

xTrain = (xTrain/255).astype('float32') # 0-1 scaled, datatype is now a float
xTest = (xTest/255).astype('float32') # 0-1 scaled, datatype is now a float

### Build and establish the model
The upper restriciton on the number of params: 200,000. I will initialize a function api model using keras.


In [8]:
### Model ### --- layer adjustments

#creating lists for column names 
model_names = []
first_layer_nodes = []
second_layer_nodes = []
first_layer_activation = []
second_layer_activation = []
output_layer_activation = []
training_loss = []
training_accuracy = []
validation_loss = []
validation_accuracy = []
batch_size = [] 
num_of_epochs = []
test_loss = []
test_accuracy = []
optimizer_function = []
loss_function = []

def model_builder1(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 0
  firstActive = 'relu'
  secondActive = 'relu'
  outputActive = 'sigmoid'
  firstLayerNodes = [150,200] # number of nodes in first layer list
  secondLayerNodes = [30,50]  # number of nodes in second layer list

  ### ----- Creating the full-dense network ----- ###

  for i in firstLayerNodes: # will iterate through first layer nodes
    for j in secondLayerNodes: # iterate through second layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape = (28,28), name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = i, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = j, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer2)
      
      # appending all hyperparameters into lists
      first_layer_nodes.append(i) # append number of first layer nodes 
      second_layer_nodes.append(j) # append number of second layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)

      ### ----- Model Parameters ----- ###

      optFunction = tf.keras.optimizers.Adam(learning_rate =  0.025) # optimizer function
      lossFunction = tf.keras.losses.BinaryCrossentropy() # loss function

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = tf.keras.metrics.Accuracy()
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: {i}, second layer {j}') 

      tf.random.set_seed(42)

      bSize = 200
      epoch = 250
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                  
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

In [9]:
def model_builder2(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 4 #starting where model_builder1 left off
  firstActive = 'gelu'
  secondActive = 'gelu'
  outputActive = 'softmax'
  firstLayerNodes = [150,200] # number of nodes in first layer list
  secondLayerNodes = [30,50]  # number of nodes in second layer list

  ### ----- Creating the full-dense network ----- ###

  for i in firstLayerNodes: # will iterate through first layer nodes
    for j in secondLayerNodes: # iterate through second layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape = (28,28), name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = i, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = j, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer2)

      # appending all hyperparameters into lists
      first_layer_nodes.append(i) # append number of first layer nodes 
      second_layer_nodes.append(j) # append number of second layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)

      ### ----- Model Parameters ----- ###
      if counter%2 == 0: 
        optFunction = tf.keras.optimizers.Adam(learning_rate =  0.025) # optimizer function
        lossFunction = tf.keras.losses.BinaryCrossentropy() # loss function
      else:
        optFunction = tf.keras.optimizers.SGD(learning_rate =  0.025) # optimizer function
        lossFunction = tf.keras.losses.Hinge() # loss function  

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = tf.keras.metrics.Accuracy()
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: {i}, second layer {j}') 

      tf.random.set_seed(42)

      bSize = 200
      epoch = 250
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                 
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

In [10]:
def model_builder3(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 8 #starting where model_builder2 left off
  firstActive = 'elu'
  secondActive = 'elu'
  outputActive = 'swish'
  firstLayerNodes = [150,200] # number of nodes in first layer list
  secondLayerNodes = [30,50]  # number of nodes in second layer list

  ### ----- Creating the full-dense network ----- ###

  for i in firstLayerNodes: # will iterate through first layer nodes
    for j in secondLayerNodes: # iterate through second layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape = (28,28), name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = i, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = j, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer2)

      # appending all hyperparameters into lists
      first_layer_nodes.append(i) # append number of first layer nodes 
      second_layer_nodes.append(j) # append number of second layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)

      ### ----- Model Parameters ----- ###
      if counter%2 == 0: 
        optFunction = tf.keras.optimizers.RMSprop(learning_rate =  0.025) # optimizer function
        lossFunction = tf.keras.losses.Hinge() # loss function
      else:
        optFunction = tf.keras.optimizers.Adam(learning_rate =  0.025) # optimizer function
        lossFunction = tf.keras.losses.SquaredHinge() # loss function  

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = tf.keras.metrics.Accuracy()
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: {i}, second layer {j}') 

      tf.random.set_seed(42)

      bSize = 200
      epoch = 250
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                 
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

### Running the hyper-parameter tunned Model ### 
This first round of models controls the number of nodes/perceptrons within the model.

In [11]:
if device_name != '/device:GPU:0':  
  model_builder1()
else:
    with tf.device('GPU:0'):
        model_builder1()

first layer: 150, second layer 30
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200


In [12]:
if device_name != '/device:GPU:0':  
  model_builder2()
else:
    with tf.device('GPU:0'):
        model_builder2()

first layer: 150, second layer 30
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200


In [13]:
if device_name != '/device:GPU:0':  
  model_builder3()
else:
    with tf.device('GPU:0'):
        model_builder3()

first layer: 150, second layer 30
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200


### Creating the dataframe that denotes all hyperparameters and their respective loss and accuracy ###

In [14]:
model_data = {
    'model_names' : model_names,
    'first_layer_nodes' : first_layer_nodes,
    'first_layer_activation' : first_layer_activation,
    'second_layer_nodes' : second_layer_nodes,
    'second_layer_activation' : second_layer_activation,
    'output_layer_activation' : output_layer_activation,
    'optimizer_function' : optimizer_function,
    'loss_function' : loss_function,
    'batch_size' : batch_size,
    'num_of_epochs' : num_of_epochs,
    'training_loss' : training_loss,
    'training_accuracy' : training_accuracy,
    'validation_loss' : validation_loss,
    'validation_accuracy' : validation_accuracy,
    'test_loss' : test_loss,
    'test_accuracy' : test_accuracy
}

**Dataframe**

In [15]:
df = pd.DataFrame(data = model_data)
df = df.set_index('model_names')

In [16]:
df = pd.DataFrame(data = model_data)
df = df.set_index('model_names')

### Highlighter

In [17]:
# max_test_acc = max(df.test_accuracy)

# def highlighter(cell_value):
    
#     highlight = 'background-color: green'
#     default = ''

#     if cell_value == max_test_acc:
#         return highlight
#     else:
#         return default
    
# df.style.applymap(highlighter)

In [18]:
df

Unnamed: 0_level_0,first_layer_nodes,first_layer_activation,second_layer_nodes,second_layer_activation,output_layer_activation,optimizer_function,loss_function,batch_size,num_of_epochs,training_loss,training_accuracy,validation_loss,validation_accuracy,test_loss,test_accuracy
model_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
NN_Model1,150,relu,30,relu,sigmoid,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.BinaryCrossentropy object at 0x0...,250,200,0.003301,0.844024,0.020041,0.843317,0.161303,0.79203
NN_Model2,150,relu,50,relu,sigmoid,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.BinaryCrossentropy object at 0x0...,250,200,0.005589,0.8984,0.019817,0.907917,0.294409,0.88246
NN_Model3,200,relu,30,relu,sigmoid,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.BinaryCrossentropy object at 0x0...,250,200,0.005373,0.895976,0.019144,0.89455,0.233624,0.84154
NN_Model4,200,relu,50,relu,sigmoid,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.BinaryCrossentropy object at 0x0...,250,200,0.005038,0.874976,0.019717,0.887217,0.199144,0.81215
NN_Model5,150,gelu,30,gelu,softmax,<keras.optimizers.optimizer_v2.gradient_descen...,<keras.losses.Hinge object at 0x000001B5CFE3B040>,250,200,0.930914,0.007298,0.928722,0.007217,0.930256,0.00765
NN_Model6,150,gelu,50,gelu,softmax,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.BinaryCrossentropy object at 0x0...,250,200,0.005358,0.957972,0.018716,0.952767,0.188839,0.89785
NN_Model7,200,gelu,30,gelu,softmax,<keras.optimizers.optimizer_v2.gradient_descen...,<keras.losses.Hinge object at 0x000001B58E8164C0>,250,200,0.934836,0.00487,0.931974,0.00445,0.933712,0.00242
NN_Model8,200,gelu,50,gelu,softmax,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.BinaryCrossentropy object at 0x0...,250,200,0.005741,0.966204,0.019082,0.954933,0.240633,0.94574
NN_Model9,150,elu,30,elu,swish,<keras.optimizers.optimizer_v2.adam.Adam objec...,<keras.losses.SquaredHinge object at 0x000001B...,250,200,1.0,0.861074,0.999998,0.861717,1.0,0.86318
NN_Model10,150,elu,50,elu,swish,<keras.optimizers.optimizer_v2.rmsprop.RMSprop...,<keras.losses.Hinge object at 0x000001B5B0FFECD0>,250,200,1.0,0.873322,1.0,0.871933,1.0,0.87356
