<a href="https://colab.research.google.com/github/hickskl/MLP-ASL-Handshape-Classifier/blob/main/MLP_MNIST_Digit_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from keras.utils import to_categorical
from matplotlib import pyplot

mnist = tf.keras.datasets.mnist

# INPUT: Images are 28x28 pixels, so 784 in total.
# OUTPUT: Probability array of size 10, for each numeral 0-9.

#     x_train: train images, numpy array of length 28x28x60000
#     y_train: train labels, numpy array of size 60000
#     x_test: test images, numpy array of size 28x28x10000
#     y_test: test labels, numpy array of size 10000
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Converting integer values to float values
x_train, x_test = x_train / 255.0, x_test / 255.0

# Using a Sequential model to build a stacked layer neural network.
# Each layer has one input tensor and one output tensor.
# Our model has 4 layers: 2 hiddne, 1 input, 1 output.
model = tf.keras.models.Sequential([
  
  # INPUT LAYER
  # Flattens the multi-dimensional tensor into a 1-dimensional vector of
  # length 784 = 28x28
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  
  # HIDDEN LAYER 1:
  # This layer implements the output = activation(dot(input,kernel)+bias).
  # The kernel is a weights matrix created by the layer.
  # Bias is a bias vector created by the layer.
  tf.keras.layers.Dense(512, activation='relu'),

  tf.keras.layers.Dense(256, activation='relu'),

  tf.keras.layers.Dense(128, activation='relu'),

  # HIDDEN LAYER 2:
  # The dropout represents a threshold at which we eliminate some units at random. 
  # In the final hidden layer, this gives each unit a 50% chance of being eliminated
  # at every training step. Helps prevent overfitting.
  tf.keras.layers.Dropout(0.2),
  
  # OUTPUT LAYER: 
  # Apply softmax function here to satisfy loss function requirements.
  tf.keras.layers.Dense(10, activation='softmax')
])

# List model shape and parameters
model.summary()

# Return a vector of logits for each class (each of n digits).
# Make a prediction using the first image. First n values become initial values
# for loss function.
predictions = model(x_train[:1]).numpy()
print(x_train[:1])
print(predictions)

# Convert logits into probabilities.
tf.nn.softmax(predictions).numpy()

# Loss function:
#    Estimates the loss of the model, so weights can be updated to reduce loss 
#    next evaluation. Preferred model when working with 2+ classes.
#    Requires that the output layer is configured with a node for each class 
#    (n nodes), and a softmax activation in order to predict the probability for each class.
#
# These lines define the type of loss function (SCCE) and feed it the true 
# training labels for x_train[:1] and the predicted label for x_train[:1].
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()

# Adam: Adaptive Movement Estimation
# Optimizer: e.g. gradient descent. Determines direction and extent of change.
# Loss: estimates loss of system. Used when training the model.
# Metrics: Judges the performance of the model (others: probabilistic, regression, T/F)
# Metrics are not used when training the model.
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Trains the model, updates weights and biases.
#     x_train: 3-D tensor of length 2x28x60000
#     y_train: 1-D tensor (vector) for length 60000
#     epochs: number of rounds of training
print("\n------------Fit-------------\n")

# Fit model with training and validation test data.
# Then evaluate the model 

model.fit(x_train, y_train, epochs=5)

print("\n----------Evaluate----------\n")
# Should evaluate with validation set as if it were the test set
# We can run through different parameters and revalidate before running test set
model.evaluate(x_test,  y_test, verbose=2)

print("\n------Probability Model------\n")

# Add additional Softmax layer on to model, required by cross-entropy loss function.
# Converts final values to probability within [0,1]
#probability_model = tf.keras.Sequential([
#  model,
#  tf.keras.layers.Softmax()
#])

#probability_model(x_test[:5])

# Predict for first 5 test images
model(x_test[:5])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total params: 567,434
Trainabl

<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[2.1738993e-11, 1.4132235e-08, 1.5044568e-09, 1.4219698e-07,
        5.2448743e-09, 3.6150047e-09, 3.8919368e-14, 9.9999297e-01,
        8.2766274e-09, 6.9229673e-06],
       [1.8427061e-18, 7.1837737e-11, 1.0000000e+00, 7.1731898e-11,
        5.7291208e-18, 3.5121585e-20, 1.7122975e-19, 7.5706704e-13,
        1.6826309e-16, 3.9309537e-21],
       [4.0162626e-12, 9.9999976e-01, 6.8517475e-10, 1.4951741e-11,
        1.7460180e-09, 1.4476564e-10, 6.9528805e-10, 6.9995747e-09,
        2.8295625e-07, 5.7343325e-11],
       [9.9999738e-01, 2.6053517e-09, 9.7793077e-08, 1.5420749e-09,
        4.0915352e-10, 8.4426599e-09, 2.4601702e-06, 2.0281243e-10,
        6.9693215e-08, 2.3816989e-08],
       [2.5472511e-11, 8.1893360e-11, 2.4353270e-08, 9.4881887e-11,
        9.9876654e-01, 1.3485076e-10, 2.9230474e-10, 2.2958155e-09,
        1.8987301e-08, 1.2334737e-03]], dtype=float32)>