In [4]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds

In [6]:
mnist_data, mnist_info = tfds.load(name='mnist',as_supervised=True,with_info=True)

In [11]:
mnist_train , mnist_test = mnist_data['train'],mnist_data['test']

In [16]:
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

In [19]:
num_validation_samples = tf.cast(num_validation_samples,tf.int64)

In [21]:
num_test_samples = mnist_info.splits['test'].num_examples

In [22]:
num_test_samples = tf.cast(num_test_samples,tf.int64)

In [26]:
def scale(image,label):
    image=tf.cast(image,tf.float32)
    image /=255.
    return image ,label
scale_trained_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [27]:
BUFFER_SIZE=10000
shuffle_train_validation_data = scale_trained_validation_data.shuffle(BUFFER_SIZE)
validation_data=shuffle_train_validation_data.take(num_validation_samples)
train_data = shuffle_train_validation_data.skip(num_validation_samples)

In [30]:
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

In [31]:
validation_inputs , validation_targets = next(iter(validation_data))

#Model 

In [47]:
input_size = 784
output_size = 10
hidden_layer_size = 100

model = tf.keras.Sequential([
    
    # the first layer (the input layer)
    # each observation is 28x28x1 pixels, therefore it is a tensor of rank 3
    # since we don't know CNNs yet, we don't know how to feed such input into our net, so we must flatten the images
    # there is a convenient method 'Flatten' that simply takes our 28x28x1 tensor and orders it into a (None,) 
    # or (28x28x1,) = (784,) vector
    # this allows us to actually create a feed forward neural network
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)), # input layer
    
    # tf.keras.layers.Dense is basically implementing: output = activation(dot(input, weight) + bias)
    # it takes several arguments, but the most important ones for us are the hidden_layer_size and the activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    
    # the final layer is no different, we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

In [48]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [49]:
model.fit(train_data,epochs=5,validation_data=(validation_inputs,validation_targets),verbose=2)

Epoch 1/5
540/540 - 2s - loss: 0.3363 - accuracy: 0.9036 - val_loss: 0.1613 - val_accuracy: 0.9518
Epoch 2/5
540/540 - 2s - loss: 0.1374 - accuracy: 0.9586 - val_loss: 0.1101 - val_accuracy: 0.9678
Epoch 3/5
540/540 - 2s - loss: 0.0974 - accuracy: 0.9709 - val_loss: 0.0827 - val_accuracy: 0.9742
Epoch 4/5
540/540 - 2s - loss: 0.0736 - accuracy: 0.9779 - val_loss: 0.0659 - val_accuracy: 0.9802
Epoch 5/5
540/540 - 2s - loss: 0.0609 - accuracy: 0.9819 - val_loss: 0.0622 - val_accuracy: 0.9802


<tensorflow.python.keras.callbacks.History at 0x235303dc790>

In [50]:
test_loss,test_accuracy = model.evaluate(test_data)



In [51]:
test_loss

0.0878312736749649

In [52]:
test_accuracy

0.9735000133514404