# Importing packages and libraries

In [1]:
# utility packages
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

# tensorflow 
import tensorflow as tf

# random package
import random 

# warnings 
import warnings 
warnings.filterwarnings('ignore')

### Checking for GPU usage

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':  
  print(f'No GPU was found.')
else:
  print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


### Loading the MNIST dataset

In [3]:
from keras.datasets import mnist

In [4]:
### unzipping the mnist dataset 

(xTrain,yTrainLabel),(xTest,yTestLabel) = mnist.load_data()

### Manipulation the MNIST dataset

In [5]:
### One-hot encoding the training and test labels

classes = 10 # 0-9 categories for the num_classes parameter

# training
yTrainCat = tf.keras.utils.to_categorical(y = yTrainLabel, num_classes = classes, dtype = 'float32')

# testing
yTestCat = tf.keras.utils.to_categorical(y = yTestLabel, num_classes = classes, dtype = 'float32')

In [6]:
### Shape and datatype of MNIST dataset

print(f'Training dataset shape: {xTrain.shape} | Training dataset datatype: {xTrain.dtype} \nTesting dataset shape: {xTest.shape} | Testing dataset datatype: {xTrain.dtype}')
print(f'Training dataset pixel range: {(np.min(xTrain),np.max(xTrain))} | Testing dataset pixel range: {(np.min(xTest),np.max(xTest))}' )


Training dataset shape: (60000, 28, 28) | Training dataset datatype: uint8 
Testing dataset shape: (10000, 28, 28) | Testing dataset datatype: uint8
Training dataset pixel range: (0, 255) | Testing dataset pixel range: (0, 255)


**Need to reshape, change the datatype, and min_max scale the tensor values**

In [7]:
### Manipulate the training and testing input

xTrain = (xTrain/255).astype('float32') # 0-1 scaled, datatype is now a float
xTest = (xTest/255).astype('float32') # 0-1 scaled, datatype is now a float

### Build and establish the model
The upper restriciton on the number of params: 200,000. I will initialize a function api model using keras.


In [8]:
### Model ### --- layer adjustments

#creating lists for column names 
model_names = []
first_layer_nodes = []
second_layer_nodes = []
third_layer_nodes = []
first_layer_activation = []
second_layer_activation = []
third_layer_activation = []
output_layer_activation = []
training_loss = []
training_accuracy = []
validation_loss = []
validation_accuracy = []
batch_size = [] 
num_of_epochs = []
test_loss = []
test_accuracy = []
optimizer_function = []
loss_function = []

def model_builder1(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 0
  firstActive = 'relu'
  secondActive = 'relu'
  thirdActive = 'relu'
  outputActive = 'sigmoid'
  firstLayerNodes = 128 # number of nodes in first layer list
  secondLayerNodes = [150,200,250]  # number of nodes in second layer list
  thirdLayerNodes = [150,200,250] # number of nodes in the third layer list

  ### ----- Creating the full-dense network ----- ###

  for i in secondLayerNodes: # will iterate through second layer nodes
    for j in thirdLayerNodes: # iterate through third layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape = xTrain.shape[1:], name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = firstLayerNodes, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = i, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      dropoutLayer = tf.keras.layers.Dropout(0.2, name = 'dropout_layer')(denseLayer2)
      denseLayer3 = tf.keras.layers.Dense(units = j, activation = thirdActive, name = 'dense_layer_3')(dropoutLayer)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer3)
      
      # appending all hyperparameters into lists
      first_layer_nodes.append(firstLayerNodes) # append number of first layer nodes 
      second_layer_nodes.append(i) # append number of second layer nodes
      third_layer_nodes.append(j) # append number of third layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      third_layer_activation.append(thirdActive) # append third layer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)
      
      ### ----- Model Parameters ----- ###

      optFunction = 'adam' # optimizer function
      lossFunction = 'binary_crossentropy' # loss function

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = 'accuracy'
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: 128 | second layer: {i} | third layer: {j}') 

      # tf.random.set_seed(42)

      bSize = 128
      epoch = 10
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                  
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

In [9]:
def model_builder2(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 9 #starting where model_builder1 left off
  firstActive = 'gelu'
  secondActive = 'gelu'
  thirdActive = 'gelu'
  outputActive = 'softmax'
  firstLayerNodes = 128 # number of nodes in first layer list
  secondLayerNodes = [175,200,225]  # number of nodes in second layer list
  thirdLayerNodes = [175,200,225] # number of nodes in the third layer list
  
  ### ----- Creating the full-dense network ----- ###

  for i in secondLayerNodes: # will iterate through second layer nodes
    for j in thirdLayerNodes: # iterate through third layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape = xTrain.shape[1:], name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = firstLayerNodes, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = i, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      dropoutLayer = tf.keras.layers.Dropout(rate = 0.2, name = 'dropout_layer')(denseLayer2)
      denseLayer3 = tf.keras.layers.Dense(units = j, activation = thirdActive, name = 'dense_layer_3')(dropoutLayer)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer3)

      # appending all hyperparameters into lists
      first_layer_nodes.append(firstLayerNodes) # append number of first layer nodes 
      second_layer_nodes.append(i) # append number of second layer nodes
      third_layer_nodes.append(j) # append number of third layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      third_layer_activation.append(thirdActive) # append third lyaer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)

      ### ----- Model Parameters ----- ###
      if counter%2 == 0: 
        optFunction = 'adam' # optimizer function
        lossFunction = 'binary_crossentropy' # loss function
      else:
        optFunction = 'SGD' # optimizer function
        lossFunction = 'hinge' # loss function  

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = 'accuracy'
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: 128 | second layer : {i}, third layer {j}') 

      tf.random.set_seed(42)

      bSize = 128
      epoch = 10
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                 
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

In [10]:
def model_builder3(training_dataset = xTrain, 
                  training_labels = yTrainCat, 
                  testing_dataset = xTest, 
                  testing_labels = yTestCat):
  
  ### ----- Local Variables ----- ###
  counter = 18 #starting where model_builder2 left off
  firstActive = 'elu'
  secondActive = 'elu'
  thirdActive = 'elu'
  outputActive = 'swish'
  firstLayerNodes = 128 # number of nodes in first layer list
  secondLayerNodes = [190,200,210]  # number of nodes in second layer list
  thirdLayerNodes = [190,200,210] # number of nodes in the third layer list

  ### ----- Creating the full-dense network ----- ###

  for i in secondLayerNodes: # will iterate through second layer nodes
    for j in thirdLayerNodes: # iterate through third layer nodes
      counter += 1
      inputLayer = tf.keras.Input(shape =xTrain.shape[1:], name = 'input_layer')
      flattenLayer = tf.keras.layers.Flatten(name = 'flatten_layer')(inputLayer)
      denseLayer1 = tf.keras.layers.Dense(units = firstLayerNodes, activation = firstActive, name = 'dense_layer_1')(flattenLayer)
      denseLayer2 = tf.keras.layers.Dense(units = i, activation = secondActive, name = 'dense_layer_2')(denseLayer1)
      dropoutLayer = tf.keras.layers.Dropout(rate = 0.2, name = 'dropout_layer')(denseLayer2)
      denseLayer3 = tf.keras.layers.Dense(units = j, activation = thirdActive, name = 'dense_layer_3')(dropoutLayer)
      outputLayer = tf.keras.layers.Dense(units = 10, activation = outputActive, name = 'output_layer')(denseLayer3)

      # appending all hyperparameters into lists
      first_layer_nodes.append(firstLayerNodes) # append number of first layer nodes 
      second_layer_nodes.append(i) # append number of second layer nodes
      third_layer_nodes.append(j) # append number of third layer nodes
      first_layer_activation.append(firstActive) # append first layer activation function
      second_layer_activation.append(secondActive) # append second layer activation function
      third_layer_activation.append(thirdActive) # append third layer activation function
      output_layer_activation.append(outputActive) # append output layer activation function

      modelName = f'NN_Model{str(counter)}' # generating model names to put into list
      model_names.append(modelName) # appending name to model_names list

      model = tf.keras.Model(inputs = inputLayer, outputs = outputLayer, name = modelName)

      ### ----- Model Parameters ----- ###
      if counter%2 == 0: 
        optFunction = 'RMSprop' # optimizer function
        lossFunction = 'hinge' # loss function
      else:
        optFunction = 'adam' # optimizer function
        lossFunction = 'squared_hinge' # loss function  

      optimizer_function.append(str(optFunction)) # append optimizer function
      loss_function.append(str(lossFunction)) # append loss function

      ### ----- Compiler ----- ###
      model.compile(
        optimizer = optFunction,
        loss = lossFunction,
        metrics = 'accuracy'
      )

      ##### ----- Fitting the model ----- ###
      print(f'first layer: 128 | second layer {i} | third layer {j}') 

      tf.random.set_seed(42)

      bSize = 128
      epoch = 10
      vSplit = 0.1
      trainModel = model.fit(
          x = xTrain,
          y = yTrainCat,
          batch_size = bSize,
          epochs = epoch,
          validation_split = vSplit
      )

      batch_size.append(bSize) # append batch size
      num_of_epochs.append(epoch) # append number of epochs 
      
      ### ----- Results ----- ###
      # appending the validation accuracy into the list
      training_loss.append(min(trainModel.history['loss'])) # append training loss
      training_accuracy.append(max(trainModel.history['accuracy'])) # append training accuracy
      validation_loss.append(min(trainModel.history['val_loss'])) # append validation loss
      validation_accuracy.append(max(trainModel.history['val_accuracy'])) # append validation accuracy 
                                 
      # appending the test accuracy into the list
      finalResults = model.evaluate(xTest,yTestCat)

      test_loss.append(finalResults[0]) # append test loss
      test_accuracy.append(finalResults[1]) # append test accuracy

### Running the hyper-parameter tunned Model ### 
This first round of models controls the number of nodes/perceptrons within the model.

In [11]:
if device_name != '/device:GPU:0':  
  model_builder1()
else:
    with tf.device('GPU:0'):
        model_builder1()

first layer: 128 | second layer: 150 | third layer: 150
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer: 150 | third layer: 200
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer: 150 | third layer: 250
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer: 200 | third layer: 150
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer: 200 | third layer: 200
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer: 200 | third layer: 250
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/1

In [12]:
if device_name != '/device:GPU:0':  
  model_builder2()
else:
    with tf.device('GPU:0'):
        model_builder2()

first layer: 128 | second layer : 175, third layer 175
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer : 175, third layer 200
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer : 175, third layer 225
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer : 200, third layer 175
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer : 200, third layer 200
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer : 200, third layer 225
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
firs

In [13]:
if device_name != '/device:GPU:0':  
  model_builder3()
else:
    with tf.device('GPU:0'):
        model_builder3()

first layer: 128 | second layer 190 | third layer 190
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer 190 | third layer 200
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer 190 | third layer 210
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer 200 | third layer 190
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer 200 | third layer 200
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first layer: 128 | second layer 200 | third layer 210
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
first laye

### Creating the dataframe that denotes all hyperparameters and their respective loss and accuracy ###

In [14]:
model_data = {
    'model_names' : model_names,
    'first_layer_nodes' : first_layer_nodes,
    'first_layer_activation' : first_layer_activation,
    'second_layer_nodes' : second_layer_nodes,
    'second_layer_activation' : second_layer_activation,
    'third_layer_nodes' : third_layer_nodes,
    'third_layer_activation' : third_layer_activation,
    'output_layer_activation' : output_layer_activation,
    'optimizer_function' : optimizer_function,
    'loss_function' : loss_function,
    'batch_size' : batch_size,
    'num_of_epochs' : num_of_epochs,
    'training_loss' : training_loss,
    'training_accuracy' : training_accuracy,
    'validation_loss' : validation_loss,
    'validation_accuracy' : validation_accuracy,
    'test_loss' : test_loss,
    'test_accuracy' : test_accuracy
}

**Dataframe**

In [15]:
df = pd.DataFrame(data = model_data)
df = df.set_index('model_names')

### Full dataset

In [16]:
df

Unnamed: 0_level_0,first_layer_nodes,first_layer_activation,second_layer_nodes,second_layer_activation,third_layer_nodes,third_layer_activation,output_layer_activation,optimizer_function,loss_function,batch_size,num_of_epochs,training_loss,training_accuracy,validation_loss,validation_accuracy,test_loss,test_accuracy
model_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
NN_Model1,128,relu,150,relu,150,relu,sigmoid,adam,binary_crossentropy,128,10,0.004854,0.992315,0.011905,0.98,0.016297,0.9764
NN_Model2,128,relu,150,relu,200,relu,sigmoid,adam,binary_crossentropy,128,10,0.004738,0.992463,0.012179,0.981667,0.015989,0.9775
NN_Model3,128,relu,150,relu,250,relu,sigmoid,adam,binary_crossentropy,128,10,0.004985,0.992019,0.012793,0.981167,0.014604,0.9787
NN_Model4,128,relu,200,relu,150,relu,sigmoid,adam,binary_crossentropy,128,10,0.004483,0.992889,0.012221,0.981333,0.015238,0.9804
NN_Model5,128,relu,200,relu,200,relu,sigmoid,adam,binary_crossentropy,128,10,0.004479,0.993185,0.012013,0.981667,0.014029,0.9797
NN_Model6,128,relu,200,relu,250,relu,sigmoid,adam,binary_crossentropy,128,10,0.004363,0.993185,0.012088,0.981667,0.015033,0.9804
NN_Model7,128,relu,250,relu,150,relu,sigmoid,adam,binary_crossentropy,128,10,0.004343,0.992741,0.012231,0.981,0.016622,0.9771
NN_Model8,128,relu,250,relu,200,relu,sigmoid,adam,binary_crossentropy,128,10,0.003791,0.994241,0.012957,0.98,0.015725,0.9788
NN_Model9,128,relu,250,relu,250,relu,sigmoid,adam,binary_crossentropy,128,10,0.004615,0.992463,0.013242,0.983167,0.015259,0.9778
NN_Model10,128,gelu,175,gelu,175,gelu,softmax,adam,binary_crossentropy,128,10,0.004314,0.993315,0.01295,0.980167,0.016143,0.9765


### Best Model without using convoluted layers and computer vision

In [17]:
def return_best_model(dataframe : pd.DataFrame):    
    max_value = dataframe.test_accuracy.sort_values(ascending = False)[0]
    return dataframe[dataframe.test_accuracy == max_value]

return_best_model(df)


Unnamed: 0_level_0,first_layer_nodes,first_layer_activation,second_layer_nodes,second_layer_activation,third_layer_nodes,third_layer_activation,output_layer_activation,optimizer_function,loss_function,batch_size,num_of_epochs,training_loss,training_accuracy,validation_loss,validation_accuracy,test_loss,test_accuracy
model_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
NN_Model12,128,gelu,175,gelu,225,gelu,softmax,adam,binary_crossentropy,128,10,0.004667,0.992778,0.012754,0.981667,0.013101,0.9822
