<a href="https://colab.research.google.com/github/hickskl/MLP-ASL-Handshape-Classifier/blob/main/MLP_MNIST_Digit_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from keras.utils import to_categorical
from matplotlib import pyplot

mnist = tf.keras.datasets.mnist

# INPUT: Images are 28x28 pixels, so 784 in total.
# OUTPUT: Probability array of size 10, for each numeral 0-9.

#     x_train: train images, numpy array of length 28x28x60000
#     y_train: train labels, numpy array of size 60000
#     x_test: test images, numpy array of size 28x28x10000
#     y_test: test labels, numpy array of size 10000
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Converting integer values to float values
x_train, x_test = x_train / 255.0, x_test / 255.0

# Using a Sequential model to build a stacked layer neural network.
# Each layer has one input tensor and one output tensor.
# Our model has 4 layers: 4 hidden, 1 input, 1 output.
model = tf.keras.models.Sequential([
  
  # INPUT LAYER
  # Flattens the multi-dimensional tensor into a 1-dimensional vector of
  # length 784 = 28x28
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  
  # HIDDEN LAYERS:
  # This layer implements the output = activation(dot(input,kernel)+bias).
  # The kernel is a weights matrix created by the layer.
  # Bias is a bias vector created by the layer.
  tf.keras.layers.Dense(512, activation='relu'),

  tf.keras.layers.Dense(256, activation='relu'),

  tf.keras.layers.Dense(128, activation='relu'),

  # The dropout represents a threshold at which we eliminate some units at random. 
  # In the final hidden layer, this gives each unit a 50% chance of being eliminated
  # at every training step. Helps prevent overfitting.
  tf.keras.layers.Dropout(0.2),
  
  # OUTPUT LAYER: 
  # Apply softmax function here to satisfy loss function requirements.
  tf.keras.layers.Dense(10, activation='softmax')
])

# List model shape and parameters
model.summary()

# Return a vector of logits for each class (each of n digits).
# Make a prediction using the first image. First n values become initial values
# for loss function.
predictions = model(x_train[:1]).numpy()
#print(x_train[:1])
#print(predictions)

# Convert logits into probabilities.
tf.nn.softmax(predictions).numpy()

# Loss function:
#    Estimates the loss of the model, so weights can be updated to reduce loss 
#    next evaluation. Preferred model when working with 2+ classes.
#    Requires that the output layer is configured with a node for each class 
#    (n nodes), and a softmax activation in order to predict the probability for each class.
#
# These lines define the type of loss function (SCCE) and feed it the true 
# training labels for x_train[:1] and the predicted label for x_train[:1].
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()

# Adam: Adaptive Movement Estimation
# Optimizer: e.g. gradient descent. Determines direction and extent of change.
# Loss: estimates loss of system. Used when training the model.
# Metrics: Judges the performance of the model (others: probabilistic, regression, T/F)
# Metrics are not used when training the model.
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Trains the model, updates weights and biases.
model.fit(x_train, y_train, epochs=5)

# Determine accuracy and loss using test data
model.evaluate(x_test,  y_test, verbose=2)

# Predict for first 5 test images
model(x_test[:5])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total params: 567,434
Trainabl

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[4.93457890e-16, 3.51332852e-10, 6.06562300e-11, 4.58143390e-09,
        6.14565066e-11, 1.61365367e-14, 1.28872891e-19, 1.00000000e+00,
        7.81519988e-11, 1.33350531e-08],
       [4.33590582e-13, 3.62838995e-11, 9.99999046e-01, 9.11742347e-07,
        1.59440464e-11, 2.57122969e-12, 1.03897828e-11, 4.84622724e-11,
        1.44298147e-08, 4.04241629e-14],
       [1.70263161e-11, 9.99999762e-01, 1.19841896e-08, 2.19610198e-11,
        2.48003644e-08, 2.45190535e-08, 1.13124115e-08, 7.01859859e-09,
        1.87569768e-07, 1.10250219e-10],
       [1.00000000e+00, 1.19343086e-19, 5.49330199e-11, 5.07314839e-13,
        2.16962096e-13, 7.32293891e-13, 1.44188785e-08, 4.13638534e-16,
        7.11832123e-14, 4.01041145e-10],
       [1.47796975e-09, 6.61504740e-10, 8.08910006e-09, 3.49270543e-11,
        9.98916507e-01, 1.71223071e-08, 1.95110886e-10, 5.68065616e-07,
        2.12504747e-09, 1.08287192e-03]], dtype=float32)>