In [20]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [21]:
inputs = keras.Input(shape=(784,), name="digits")
x1 = layers.Dense(64, activation="relu")(inputs)
x2 = layers.Dense(64, activation="relu")(x1)
outputs = layers.Dense(10, name="predictions")(x2)
model = keras.Model(inputs=inputs, outputs=outputs)

In [22]:
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

In [23]:
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [24]:
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [25]:
x_train.shape

(60000, 28, 28)

In [26]:
x_train = np.reshape (x_train, (-1, 784))

In [27]:
x_train.shape

(60000, 784)

In [28]:
x_test = np.reshape(x_test, (-1, 784))

In [29]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

In [30]:
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

In [31]:
epochs = 2
for epoch in range(epochs):
    print("start of epoch %d" % epoch)
    
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # with statement does not create a scope
        with tf.GradientTape() as tape:
            # forward pass
            logits = model(x_batch_train, training=True)
            # loss
            loss_value = loss_fn(y_batch_train, logits)
            
        grads = tape.gradient(loss_value, model.trainable_weights)
        
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        if step % 100 == 0:
            print("train loss: %.4f at step: %d" % (float(loss_value), step))
        

start of epoch 0
train loss: 98.8836 at step: 0
train loss: 1.5775 at step: 100
train loss: 1.6193 at step: 200
train loss: 1.3226 at step: 300
train loss: 1.4717 at step: 400
train loss: 1.2003 at step: 500
train loss: 0.7726 at step: 600
train loss: 0.6196 at step: 700
train loss: 0.6884 at step: 800
train loss: 0.2521 at step: 900
start of epoch 1
train loss: 0.8705 at step: 0
train loss: 0.4164 at step: 100
train loss: 0.7547 at step: 200
train loss: 0.4202 at step: 300
train loss: 0.4970 at step: 400
train loss: 0.5334 at step: 500
train loss: 0.4267 at step: 600
train loss: 0.5700 at step: 700
train loss: 0.7061 at step: 800
train loss: 0.2783 at step: 900
