In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [9]:
(trainX, trainY), (testX, testY) = mnist.load_data()
trainX = trainX.reshape(-1, 28 * 28).astype("float32") / 255.0
testX = testX.reshape(-1, 28 * 28).astype("float32") / 255.0

In [10]:
N = 100000 # total params
L = 10 # hidden layer + output layer
D = 0.3 # dropout probability
F = 50 # number of neurons in first hidden layer
LR = 0.0001 # learning rate
initializer = tf.keras.initializers.GlorotUniform() # xavier initialization
f = open("output_1.txt", "w")

In [11]:
##UniformNet
# To get number of neurons in each layer solve the quadratic equation
# 3136 + 785 * F + (F + 1)*J + (L-3) * J * (J+1) + 10 * (J+1) = N
J = 85
JL = 94
UniformNet = tf.keras.Sequential()
UniformNet.add(keras.layers.InputLayer(784))
UniformNet.add(layers.BatchNormalization()) # BATCH NORMALIZATION
UniformNet.add(layers.Dense(F, activation='relu', name='LAYER_1', kernel_initializer=initializer))
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_2', kernel_initializer=initializer))
UniformNet.add(layers.Dropout(D)) # DROPOUT
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_3', kernel_initializer=initializer))
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_4', kernel_initializer=initializer))
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_5', kernel_initializer=initializer))
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_6', kernel_initializer=initializer))
UniformNet.add(layers.Dropout(D)) # DROPOUT
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_7', kernel_initializer=initializer))
UniformNet.add(layers.Dense(J, activation='relu', name='LAYER_8', kernel_initializer=initializer))
UniformNet.add(layers.Dense(JL, activation='relu', name='LAYER_9', kernel_initializer=initializer))
UniformNet.add(layers.BatchNormalization()) # BATCH NORMALIZATION
UniformNet.add(layers.Dense(10, activation='softmax', name='OUTPUT_LAYER'))

UniformNet.summary()

UniformNet.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=LR),
    metrics=["accuracy"],
)

UniformNet.fit(trainX, trainY, batch_size=16, epochs=5, verbose=2)
acc = UniformNet.evaluate(testX, testY, batch_size=16, verbose=2)[1]
print("Accuracy of UniformNet: "+str(acc)+'\n')
f.write("Accuracy of UniformNet: "+str(acc)+'\n')

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_6 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 LAYER_1 (Dense)             (None, 50)                39250     
                                                                 
 LAYER_2 (Dense)             (None, 85)                4335      
                                                                 
 dropout_4 (Dropout)         (None, 85)                0         
                                                                 
 LAYER_3 (Dense)             (None, 85)                7310      
                                                                 
 LAYER_4 (Dense)             (None, 85)                7310      
                                                      

43

In [12]:
#PyramidNet
# J's denote the neurons in each layer they are calculated to make parameters ROUGHLY half every layer
# do not consider number of parameters of batch normalization layers and initial layers

J1 = 579
J2 = (F + 1)  * J1  // ((J1 + 1) * 2)
J3 = (J1 + 1) * J2 // ((J2 + 1) * 2)
J4 = (J2 + 1) * J3 // ((J3 + 1) * 2)
J5 = (J3 + 1) * J4 // ((J4 + 1) * 2)
J6 = (J4 + 1) * J5 // ((J5 + 1) * 2)
J7 = (J5 + 1) * J6 // ((J6 + 1) * 2)
J8 = (J6 + 1) * J7 // ((J7 + 1) * 2)

PyramidNet = tf.keras.Sequential()
PyramidNet.add(keras.layers.InputLayer(784))
PyramidNet.add(layers.BatchNormalization()) # BATCH NORMALIZATION
PyramidNet.add(layers.Dense(F, activation='relu', name='LAYER_1', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J1, activation='relu', name='LAYER_2', kernel_initializer=initializer))
PyramidNet.add(layers.Dropout(D)) # DROPOUT
PyramidNet.add(layers.Dense(J2, activation='relu', name='LAYER_3', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J3, activation='relu', name='LAYER_4', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J4, activation='relu', name='LAYER_5', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J5, activation='relu', name='LAYER_6', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J6, activation='relu', name='LAYER_7', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J7, activation='relu', name='LAYER_8', kernel_initializer=initializer))
PyramidNet.add(layers.Dense(J8, activation='relu', name='LAYER_9', kernel_initializer=initializer))
PyramidNet.add(layers.BatchNormalization()) # BATCH NORMALIZATION
PyramidNet.add(layers.Dense(10, activation='softmax', name='OUTPUT_LAYER'))

PyramidNet.summary()

PyramidNet.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=LR),
    metrics=["accuracy"],
)

PyramidNet.fit(trainX, trainY, batch_size=16, epochs=5, verbose=2)
acc = PyramidNet.evaluate(testX, testY, batch_size=16, verbose=2)[1]
print("Accuracy of PyramidNet: "+str(acc)+'\n')
f.write("Accuracy of PyramidNet: "+str(acc)+'\n')

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_8 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 LAYER_1 (Dense)             (None, 50)                39250     
                                                                 
 LAYER_2 (Dense)             (None, 579)               29529     
                                                                 
 dropout_6 (Dropout)         (None, 579)               0         
                                                                 
 LAYER_3 (Dense)             (None, 25)                14500     
                                                                 
 LAYER_4 (Dense)             (None, 278)               7228      
                                                      

43

In [13]:
#InvPyramidNet
# J's denote the neurons in each layer they are calculated to make parameters ROUGHLY double every layer 
# do not consider number of parameters of batch normalization layers and initial layers
J1 = 5
J2 = 2 * (F + 1)  * J1  // (J1 + 1)
J3 = 2 * (J1 + 1) * J2 // (J2 + 1)
J4 = 2 * (J2 + 1) * J3 // (J3 + 1)
J5 = 2 * (J3 + 1) * J4 // (J4 + 1)
J6 = 2 * (J4 + 1) * J5 // (J5 + 1)
J7 = 2 * (J5 + 1) * J6 // (J6 + 1)
JL = 466

InvPyramidNet = tf.keras.Sequential()
InvPyramidNet.add(keras.layers.InputLayer(784))
InvPyramidNet.add(layers.BatchNormalization()) # BATCH NORMALIZATION
InvPyramidNet.add(layers.Dense(F, activation='relu', name='LAYER_1', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(J1, activation='relu', name='LAYER_2', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(J2, activation='relu', name='LAYER_3', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(J3, activation='relu', name='LAYER_4', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(J4, activation='relu', name='LAYER_5', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(J5, activation='relu', name='LAYER_6', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dropout(D)) # DROPOUT
InvPyramidNet.add(layers.Dense(J6, activation='relu', name='LAYER_7', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(J7, activation='relu', name='LAYER_8', kernel_initializer=initializer))
InvPyramidNet.add(layers.Dense(JL, activation='relu', name='LAYER_9', kernel_initializer=initializer))
InvPyramidNet.add(layers.BatchNormalization()) # BATCH NORMALIZATION
InvPyramidNet.add(layers.Dense(10, activation='softmax', name='OUTPUT_LAYER'))

InvPyramidNet.summary()

InvPyramidNet.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=LR),
    metrics=["accuracy"],
)

InvPyramidNet.fit(trainX, trainY, batch_size=16, epochs=5, verbose=2)
acc = InvPyramidNet.evaluate(testX, testY, batch_size=16, verbose=2)[1]
print("Accuracy of InvPyramidNet: "+str(acc)+'\n')
f.write("Accuracy of InvPyramidNet: "+str(acc)+'\n')

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_10 (Bat  (None, 784)              3136      
 chNormalization)                                                
                                                                 
 LAYER_1 (Dense)             (None, 50)                39250     
                                                                 
 LAYER_2 (Dense)             (None, 5)                 255       
                                                                 
 LAYER_3 (Dense)             (None, 85)                510       
                                                                 
 LAYER_4 (Dense)             (None, 11)                946       
                                                                 
 LAYER_5 (Dense)             (None, 157)               1884      
                                                      

46

In [14]:
f.close()