In [1]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.datasets import mnist
# from tensorflow.contrib.eager.python import tfe

In [2]:
# enable eager mode
# tf.enable_eager_execution()
# tf.set_random_seed(0)
# np.random.seed(0)

In [3]:
# constants
num_threads = 1
tf.config.threading.set_inter_op_parallelism_threads(num_threads)
tf.config.threading.set_intra_op_parallelism_threads(num_threads)
batch_size = 128
epochs = 10
num_classes = 10

In [4]:
# dataset loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# normalization of dataset
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# flatten the dataset
x_train = x_train.reshape((-1, 28 * 28))
x_test = x_test.reshape((-1, 28 * 28))

# one hot encode the labels. convert back to numpy as we cannot use a combination of numpy
# and tensors as input to keras
y_train_ohe = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_ohe = tf.one_hot(y_test, depth=num_classes).numpy()

print('x train', x_train.shape)
print('y train', y_train_ohe.shape)
print('x test', x_test.shape)
print('y test', y_test_ohe.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x train (60000, 784)
y train (60000, 10)
x test (10000, 784)
y test (10000, 10)


In [5]:
# model definition (canonical way)
class LogisticRegression(tf.keras.Model):

    def __init__(self, num_classes):
        super(LogisticRegression, self).__init__()
        self.dense = tf.keras.layers.Dense(num_classes)

    def call(self, inputs, training=None, mask=None):
        output = self.dense(inputs)

        # softmax op does not exist on the gpu
        with tf.device('/cpu:0'):
            output = tf.nn.softmax(output)

        return output

In [6]:
def startTraining():
  device = '/cpu:0'

  with tf.device(device):
      # build model and optimizer
      model = LogisticRegression(num_classes)
      model.compile(optimizer=tf.optimizers.SGD(learning_rate= 0.01,name='SGD' ), loss='categorical_crossentropy',
                    metrics=['accuracy'])
      
      # TF Keras tries to use entire dataset to determine shape without this step when using .fit()
      # Fix = Use exactly one sample from the provided input dataset to determine input/output shape/s for the model
      dummy_x = tf.zeros((1, 28 * 28))
      model._set_inputs(dummy_x)

      # train
      model.fit(x_train, y_train_ohe, batch_size=batch_size, epochs=epochs,
                validation_data=(x_test, y_test_ohe), verbose=2)

      # evaluate on test set
      scores = model.evaluate(x_test, y_test_ohe, batch_size, verbose=2)
      print("Final test loss and accuracy :", scores)


In [7]:
%%time
startTraining()

Epoch 1/10
469/469 - 4s - loss: 1.2562 - accuracy: 0.7123 - val_loss: 0.7978 - val_accuracy: 0.8396 - 4s/epoch - 8ms/step
Epoch 2/10
469/469 - 2s - loss: 0.7070 - accuracy: 0.8466 - val_loss: 0.5997 - val_accuracy: 0.8649 - 2s/epoch - 4ms/step
Epoch 3/10
469/469 - 2s - loss: 0.5820 - accuracy: 0.8618 - val_loss: 0.5210 - val_accuracy: 0.8759 - 2s/epoch - 5ms/step
Epoch 4/10
469/469 - 2s - loss: 0.5220 - accuracy: 0.8701 - val_loss: 0.4761 - val_accuracy: 0.8825 - 2s/epoch - 4ms/step
Epoch 5/10
469/469 - 2s - loss: 0.4854 - accuracy: 0.8755 - val_loss: 0.4474 - val_accuracy: 0.8872 - 2s/epoch - 4ms/step
Epoch 6/10
469/469 - 2s - loss: 0.4601 - accuracy: 0.8803 - val_loss: 0.4263 - val_accuracy: 0.8920 - 2s/epoch - 4ms/step
Epoch 7/10
469/469 - 1s - loss: 0.4413 - accuracy: 0.8836 - val_loss: 0.4108 - val_accuracy: 0.8937 - 1s/epoch - 3ms/step
Epoch 8/10
469/469 - 1s - loss: 0.4266 - accuracy: 0.8865 - val_loss: 0.3981 - val_accuracy: 0.8956 - 1s/epoch - 2ms/step
Epoch 9/10
469/469 - 1s 