In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)



In [8]:
# load from download
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# create a validation set, INT format

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

# number of test samples was stored in num_examples from before (info data)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

#need to scale down, we know all contained 0-255 values in grey-scale matrix, this would create values 0 to 1

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

#apply this scale to the main dataset

scaled_train_and_validation_data = mnist_train.map(scale)

# apply to test data
test_data = mnist_test.map(scale)

BUFFER_SIZE = 10000

# shuffle the data and extract the buffer size for splitting
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)


# extract train and validation data
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

#skip the initial validation numbers
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

#mini batch gradient descent
# batch size = 1, Stochastic Gradient Descent
# batch size = number of samples = single batch Gradient Descent
# 1 < Batch Size < samples = mini-batch gradient descent

BATCH_SIZE = 100

#combine to batches

train_data =train_data.batch(BATCH_SIZE)

# don't batch validation set or test set, won't be back propogating

# so set the validation data to a batch = to the acutal size

validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

# load inputs and validation targets

validation_inputs, validation_targets = next(iter(validation_data))

In [15]:
# Model

#Hyperparameters
input_size = 784
output_size = 10
hidden_layer_size = 200


# Sequential used to start layering the model (STACK LAYERS)

model = tf.keras.Sequential([
    
    # flatten the pics
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    # dot product math to find the bias and weights + activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    # second layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    # third layer
    tf.keras.layers.Dense(hidden_layer_size, activation='selu'),
    # output layer want probabilities so use softmax
    tf.keras.layers.Dense(output_size, activation='softmax'),
                            ])

In [16]:
# choose the optimizer and the loss function

# ready the model for trainning
# loss functions for classification
# binary_crossentropy - binary data
# categorical_crossentropy - one-hot encoded targets
# sparse_categorical_crossentropy - will apply one-hot encoding

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [17]:
# trainning

NUM_EPOCHS = 5

# validation include as an agrument equal to validation inputs

model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

# HOW IT WORKS
# 1.) At the beginning of each epoch, the traininng loss wil be set to 0
# 2.) The algorithm will iterate over a preset number of batches, all from train_data
# 3.) The weights and biases will be updated as many times as there are batches
# 4.) We will get a value for the loss function, indicating how the trainning is going
# 5.) We will also see a trainning accuracy
# 6.) At the end of the epoch, the algorithm will forward propagate the whole validation set

Epoch 1/5
540/540 - 5s - loss: 0.2377 - accuracy: 0.9294 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 5s - loss: 0.0936 - accuracy: 0.9712 - val_loss: 0.0805 - val_accuracy: 0.9772
Epoch 3/5
540/540 - 5s - loss: 0.0624 - accuracy: 0.9799 - val_loss: 0.0921 - val_accuracy: 0.9738
Epoch 4/5
540/540 - 5s - loss: 0.0506 - accuracy: 0.9847 - val_loss: 0.0533 - val_accuracy: 0.9852
Epoch 5/5
540/540 - 5s - loss: 0.0418 - accuracy: 0.9862 - val_loss: 0.0433 - val_accuracy: 0.9858


<tensorflow.python.keras.callbacks.History at 0x26f85f2fe88>

# 98.58 %

In [18]:
# test the model

#forward propgate through the net
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 1s 837ms/step - loss: 0.0882 - accuracy: 0.976 - 1s 843ms/step - loss: 0.0882 - accuracy: 0.9762

In [21]:
print(test_loss, " " , test_accuracy)


#no longer cna opt

0.08818195760250092   0.9762


In [23]:
#after running on test data can't optimize, as it's seen the test data

# 97.62 %