# Importing packages and libraries

In [1]:
# utility packages
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

# tensorflow 
import tensorflow as tf

# random package
import random 

# warnings 
import warnings 
warnings.filterwarnings('ignore')

### Checking for GPU usage

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':  
  print(f'No GPU was found.')
else:
  print('Found GPU at: {}'.format(device_name))

No GPU was found.


### Loading the MNIST dataset

In [3]:
from keras.datasets import mnist

In [4]:
### unzipping the mnist dataset 

(xTrain,yTrainLabel),(xTest,yTestLabel) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


### Manipulation the MNIST dataset

In [5]:
### One-hot encoding the training and test labels

classes = 10 # 0-9 categories for the num_classes parameter

# training
yTrainCat = tf.keras.utils.to_categorical(y = yTrainLabel, num_classes = classes, dtype = 'float32')

# testing
yTestCat = tf.keras.utils.to_categorical(y = yTestLabel, num_classes = classes, dtype = 'float32')

In [6]:
### Shape and datatype of MNIST dataset

print(f'Training dataset shape: {xTrain.shape} | Training dataset datatype: {xTrain.dtype} \nTesting dataset shape: {xTest.shape} | Testing dataset datatype: {xTrain.dtype}')
print(f'Training dataset pixel range: {(np.min(xTrain),np.max(xTrain))} | Testing dataset pixel range: {(np.min(xTest),np.max(xTest))}' )


Training dataset shape: (60000, 28, 28) | Training dataset datatype: uint8 
Testing dataset shape: (10000, 28, 28) | Testing dataset datatype: uint8
Training dataset pixel range: (0, 255) | Testing dataset pixel range: (0, 255)


**Need to reshape, change the datatype, and min_max scale the tensor values**

In [7]:
### Manipulate the training and testing input

xTrain = (xTrain/255).astype('float32') # 0-1 scaled, datatype is now a float
xTest = (xTest/255).astype('float32') # 0-1 scaled, datatype is now a float

### Build and establish the model
The upper restriciton on the number of params: 200,000. I will initialize a function api model using keras.


In [8]:
### Model ### --- layer adjustments

#creating lists for column names 
model_names = []
first_layer_nodes = []
second_layer_nodes = []
first_layer_activation = []
second_layer_activation = []
output_layer_activation = []
training_loss = []
training_accuracy = []
validation_loss = []
validation_accuracy = []
batch_size = [] 
num_of_epochs = []
test_loss = []
test_accuracy = []
optimizer_function = []
loss_function = []

def model_builder(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 0
  firstActive = 'relu'
  secondActive = 'relu'
  outputActive = 'sigmoid'
  firstLayerNodes = [100,150,200] # number of nodes in first layer list
  secondLayerNodes = [50,87,125]  # number of nodes in second layer list

  ### ----- Creating the full-dense network ----- ###

  for i in firstLayerNodes: # will iterate through first layer nodes
    for j in secondLayerNodes: # iterate through second layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape = (28,28), name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = i, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = j, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer2)

      # appending all hyperparameters into lists
      first_layer_nodes.append(i) # append number of first layer nodes 
      second_layer_nodes.append(j) # append number of second layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)

      ### ----- Model Parameters ----- ###

      optFunction = tf.keras.optimizers.Adam(learning_rate =  0.025) # optimizer function
      lossFunction = tf.keras.losses.BinaryCrossentropy() # loss function

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = tf.keras.metrics.Accuracy()
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: {i}, second layer {j}') 

      tf.random.set_seed(42)

      bSize = 200
      epoch = 250 
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                 
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

### Running the hyper-parameter tunned Model ### 
This first round of models controls the number of nodes/perceptrons within the model.

In [9]:
model_builder()

first layer: 100, second layer 50
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 100, second layer 87
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 100, second layer 125
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 150, second layer 50
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 150, second layer 87
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 150, second layer 125
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 200, second layer 50
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 

### Creating the dataframe that denotes all hyperparameters and their respective loss and accuracy ###

In [11]:
model_data = {
    'model_names' : model_names,
    'first_layer_nodes' : first_layer_nodes,
    'first_layer_activation' : first_layer_activation,
    'second_layer_nodes' : second_layer_nodes,
    'second_layer_activation' : second_layer_activation,
    'output_layer_activation' : output_layer_activation,
    'optimizer_function' : optimizer_function,
    'loss_function' : loss_function,
    'batch_size' : batch_size,
    'num_of_epochs' : num_of_epochs,
    'training_loss' : training_loss,
    'training_accuracy' : training_accuracy,
    'validation_loss' : validation_loss,
    'validation_accuracy' : validation_accuracy,
    'test_loss' : test_loss,
    'test_accuracy' : test_accuracy
}

**Dataframe**

In [12]:
df = pd.DataFrame(data = model_data)
df = df.set_index('model_names')

In [13]:
df = pd.DataFrame(data = model_data)
df = df.set_index('model_names')

In [14]:
max_test_acc = max(df.test_accuracy)

def highlighter(cell_value):
    
    highlight = 'background-color: green'
    default = ''

    if cell_value == max_test_acc:
        return highlight
    else:
        return default
    
df.style.applymap(highlighter)

Unnamed: 0_level_0,first_layer_nodes,first_layer_activation,second_layer_nodes,second_layer_activation,output_layer_activation,optimizer_function,loss_function,batch_size,num_of_epochs,training_loss,training_accuracy,validation_loss,validation_accuracy,test_loss,test_accuracy
model_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
NN_Model1,100,relu,50,relu,sigmoid,,,200,10,0.016792,0.016317,0.023322,0.018667,0.025328,0.01739
NN_Model2,100,relu,87,relu,sigmoid,,,200,10,0.01678,0.030215,0.022101,0.038467,0.02744,0.0324
NN_Model3,100,relu,125,relu,sigmoid,,,200,10,0.019608,0.026096,0.023407,0.034733,0.032271,0.03245
NN_Model4,150,relu,50,relu,sigmoid,,,200,10,0.014361,0.026535,0.017129,0.03265,0.026774,0.03024
NN_Model5,150,relu,87,relu,sigmoid,,,200,10,0.01518,0.036254,0.019605,0.0454,0.029222,0.04257
NN_Model6,150,relu,125,relu,sigmoid,,,200,10,0.016644,0.039961,0.020471,0.048133,0.027021,0.04335
NN_Model7,200,relu,50,relu,sigmoid,,,200,10,0.013724,0.021141,0.019574,0.020717,0.024502,0.01706
NN_Model8,200,relu,87,relu,sigmoid,,,200,10,0.01426,0.040257,0.019621,0.052817,0.029096,0.05065
NN_Model9,200,relu,125,relu,sigmoid,,,200,10,0.015758,0.036474,0.021127,0.03885,0.025429,0.03697
