In [1]:
import tensorflow as tf
import keras
from tensorflow.keras.losses import Loss

In [30]:
# Loss function
class Custom_Loss(Loss):
    def __init__(self):
        super().__init__()
    
    def call(self, y_true, y_pred):
        y_pred = tf.nn.softmax(y_pred)
        y_true = tf.nn.softmax(y_true)

        return tf.math.reduce_logsumexp(y_pred) - tf.tensordot(tf.transpose(y_true), y_pred, axes=0)

In [22]:
mnist = tf.keras.datasets.mnist

In [23]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train, y_train = tf.cast(x_train, tf.float32), tf.cast(y_train, tf.float32)
x_test, y_test = tf.cast(x_test, tf.float32), tf.cast(y_test, tf.float32)

In [38]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),      # input layer, rehaping image into vectors
    tf.keras.layers.Dense(512, activation='elu'),       # applying a linear layer with activation function
    tf.keras.layers.Dense(512, activation='elu'),
    tf.keras.layers.Dropout(0.2),                       # randomly sets inputs to zero at rate of 0.2 steps, reducing overfitting
    tf.keras.layers.Dense(10)                           # output layer
])

In [37]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [41]:
adagrad = tf.keras.optimizers.experimental.Adagrad(learning_rate=0.01, epsilon=1e-04)

In [42]:
model.compile(optimizer=adagrad, loss=loss, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=100, verbose=2)

Epoch 1/100
1875/1875 - 5s - loss: 0.0181 - accuracy: 0.9941 - 5s/epoch - 3ms/step
Epoch 2/100
1875/1875 - 5s - loss: 0.0166 - accuracy: 0.9947 - 5s/epoch - 3ms/step
Epoch 3/100
1875/1875 - 5s - loss: 0.0151 - accuracy: 0.9952 - 5s/epoch - 3ms/step
Epoch 4/100
1875/1875 - 5s - loss: 0.0139 - accuracy: 0.9956 - 5s/epoch - 3ms/step
Epoch 5/100
1875/1875 - 5s - loss: 0.0122 - accuracy: 0.9963 - 5s/epoch - 3ms/step
Epoch 6/100
1875/1875 - 5s - loss: 0.0120 - accuracy: 0.9966 - 5s/epoch - 3ms/step
Epoch 7/100
1875/1875 - 5s - loss: 0.0100 - accuracy: 0.9971 - 5s/epoch - 3ms/step
Epoch 8/100
1875/1875 - 5s - loss: 0.0097 - accuracy: 0.9972 - 5s/epoch - 3ms/step
Epoch 9/100
1875/1875 - 5s - loss: 0.0088 - accuracy: 0.9976 - 5s/epoch - 3ms/step
Epoch 10/100
1875/1875 - 5s - loss: 0.0083 - accuracy: 0.9978 - 5s/epoch - 3ms/step
Epoch 11/100
1875/1875 - 5s - loss: 0.0077 - accuracy: 0.9982 - 5s/epoch - 3ms/step
Epoch 12/100
1875/1875 - 5s - loss: 0.0075 - accuracy: 0.9981 - 5s/epoch - 3ms/step
E

<keras.callbacks.History at 0x7ff152ddbf10>

In [40]:
# SGD test accuracy
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 0.0741 - accuracy: 0.9802 - 501ms/epoch - 2ms/step


[0.07407862693071365, 0.9801999926567078]

In [43]:
# AdaGrad test accuracy
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 0.1014 - accuracy: 0.9807 - 758ms/epoch - 2ms/step


[0.1013999953866005, 0.9807000160217285]