In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.keras import datasets, layers, models, utils

In [2]:
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

In [3]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

In [4]:
def softmax_loss(t=1.0):

    t = float(t)
    
    def softmax_loss_fixed(y_true, logits):
        """Softmax loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is raw logits
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
        ones = array_ops.ones_like(logits, dtype=logits.dtype)
        
        logit_y = tf.reduce_sum(tf.multiply(y_true, logits))
        I_k = array_ops.where(logits >= logit_y, ones, zeros)
        
        h = tf.exp(tf.multiply(t-1, tf.multiply(logits+1, I_k)))
        
        softmax = tf.exp(logits) / tf.reshape(
            tf.reduce_sum(tf.multiply(tf.exp(logits), h)), 
            [-1, 1])
        
        # We add epsilon because log(0) = nan
        softmax = tf.add(softmax, epsilon)
        ce = tf.multiply(y_true, -tf.log(softmax))
        return tf.reduce_mean(ce)
    
    return softmax_loss_fixed

In [5]:
model.compile(optimizer='adam',
              loss=softmax_loss(),
              metrics=['accuracy'])

model.fit(train_images, utils.to_categorical(train_labels, 10), epochs=10, 
          validation_data=(test_images, utils.to_categorical(test_labels, 10)));

Train on 60000 samples, validate on 10000 samples
Epoch 1/10


UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[{{node conv2d/Conv2D}} = Conv2D[T=DT_FLOAT, _class=["loc:@training/Adam/gradients/conv2d/Conv2D_grad/Conv2DBackpropFilter"], data_format="NCHW", dilations=[1, 1, 1, 1], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/Adam/gradients/conv2d/Conv2D_grad/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer, conv2d/Conv2D/ReadVariableOp)]]
	 [[{{node loss/mul/_101}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_869_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

In [23]:
test_loss, test_acc = model.evaluate(test_images, utils.to_categorical(test_labels, 10))
print(test_acc)

0.9938


In [25]:
model.add(tf.keras.layers.Activation('softmax'))
model.compile(optimizer='adam',
              loss=softmax_loss(),
              metrics=['accuracy'])
test_loss, test_acc = model.evaluate(test_images, utils.to_categorical(test_labels, 10))
print(test_acc)

0.9938
