# MNIST Large Untrained Net

Derived from https://github.com/tensorflow/docs/blob/master/site/en/tutorials/quickstart/beginner.ipynb

In [None]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.8.2


## Load data

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

## Define model

In [None]:
generateLargeNetworkUntrained = True
useSparsity = True
if(useSparsity):
    sparsityProbabilityOfConnection = 0.1 #1-sparsity
if(generateLargeNetworkUntrained):
        layerRatio = 100 #100
else:
        layerRatio = 1

def kernelInitializerWithSparsity(shape, dtype=None):
    initialisedWeights = tf.random.normal(shape, dtype=dtype) #change to glorot_uniform?
    sparsityMatrixMask = tf.random.uniform(shape, minval=0.0, maxval=1.0, dtype=tf.dtypes.float32)
    sparsityMatrixMask = tf.math.less(sparsityMatrixMask, sparsityProbabilityOfConnection)
    sparsityMatrixMask = tf.cast(sparsityMatrixMask, dtype=tf.dtypes.float32)
    initialisedWeights = tf.multiply(initialisedWeights, sparsityMatrixMask)
    return initialisedWeights
if(useSparsity):
    kernelInitializer = kernelInitializerWithSparsity
else:
    kernelInitializer = 'glorot_uniform'

In [None]:

if(generateLargeNetworkUntrained):
    #only train the last layer
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128*layerRatio, kernel_initializer=kernelInitializer, activation='relu'),
        tf.keras.layers.Dense(128*layerRatio, kernel_initializer=kernelInitializer, activation='relu'),
        #tf.keras.layers.Dense(128*largeNetworkRatio, kernel_initializer=kernelInitializer, activation='relu'),    
        tf.keras.layers.Lambda(lambda x: tf.keras.backend.stop_gradient(x)),
        tf.keras.layers.Dense(10)
    ])
    #evaluation accuracy: 0.9758 (with 1 or 2 hidden layers)
else:
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128*layerRatio, kernel_initializer=kernelInitializer, activation='relu'),
        tf.keras.layers.Dense(128*layerRatio, kernel_initializer=kernelInitializer, activation='relu'),
        tf.keras.layers.Dense(10)
    ])
    #evaluation accuracy: 0.9764


In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])


print(model.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 dense_8 (Dense)             (None, 12800)             10048000  
                                                                 
 dense_9 (Dense)             (None, 12800)             163852800 
                                                                 
 lambda (Lambda)             (None, 12800)             0         
                                                                 
 dense_10 (Dense)            (None, 10)                128010    
                                                                 
Total params: 174,028,810
Trainable params: 174,028,810
Non-trainable params: 0
_________________________________________________________________
None


## Train model

In [None]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc5d0e17290>

## Evaluate model

In [None]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 104s - loss: 15.4941 - accuracy: 0.9622 - 104s/epoch - 334ms/step


[15.494086265563965, 0.9621999859809875]

In [None]:
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

In [None]:
probability_model(x_test[:5])

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)>