In [1]:
import os
import numpy as np

import tensorflow as tf
from tensorflow.keras.datasets import mnist
# from tensorflow.contrib.eager.python import tfe

In [2]:
# enable eager mode
# tf.enable_eager_execution()
# tf.set_random_seed(0)
# np.random.seed(0)

In [3]:
# constants
batch_size = 128
epochs = 10
num_classes = 10

In [4]:
# dataset loading
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# normalization of dataset
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# flatten the dataset
x_train = x_train.reshape((-1, 28 * 28))
x_test = x_test.reshape((-1, 28 * 28))

# one hot encode the labels. convert back to numpy as we cannot use a combination of numpy
# and tensors as input to keras
y_train_ohe = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_ohe = tf.one_hot(y_test, depth=num_classes).numpy()

print('x train', x_train.shape)
print('y train', y_train_ohe.shape)
print('x test', x_test.shape)
print('y test', y_test_ohe.shape)

x train (60000, 784)
y train (60000, 10)
x test (10000, 784)
y test (10000, 10)


In [5]:
# model definition (canonical way)
class LogisticRegression(tf.keras.Model):

    def __init__(self, num_classes):
        super(LogisticRegression, self).__init__()
        self.dense = tf.keras.layers.Dense(num_classes)

    def call(self, inputs, training=None, mask=None):
        output = self.dense(inputs)

        # softmax op does not exist on the gpu
        with tf.device('/cpu:0'):
            output = tf.nn.softmax(output)

        return output

In [6]:
def startTraining():
  device = '/cpu:0'

  with tf.device(device):
      # build model and optimizer
      model = LogisticRegression(num_classes)
      model.compile(optimizer=tf.optimizers.SGD(learning_rate= 0.01,name='SGD' ), loss='categorical_crossentropy',
                    metrics=['accuracy'])
      
      # TF Keras tries to use entire dataset to determine shape without this step when using .fit()
      # Fix = Use exactly one sample from the provided input dataset to determine input/output shape/s for the model
      dummy_x = tf.zeros((1, 28 * 28))
      model._set_inputs(dummy_x)

      # train
      model.fit(x_train, y_train_ohe, batch_size=batch_size, epochs=epochs,
                validation_data=(x_test, y_test_ohe), verbose=2)

      # evaluate on test set
      scores = model.evaluate(x_test, y_test_ohe, batch_size, verbose=2)
      print("Final test loss and accuracy :", scores)


In [11]:
%%time
startTraining()

Epoch 1/10
469/469 - 2s - loss: 1.2721 - accuracy: 0.7028 - val_loss: 0.8112 - val_accuracy: 0.8305 - 2s/epoch - 5ms/step
Epoch 2/10
469/469 - 1s - loss: 0.7157 - accuracy: 0.8398 - val_loss: 0.6101 - val_accuracy: 0.8612 - 763ms/epoch - 2ms/step
Epoch 3/10
469/469 - 1s - loss: 0.5881 - accuracy: 0.8593 - val_loss: 0.5291 - val_accuracy: 0.8715 - 736ms/epoch - 2ms/step
Epoch 4/10
469/469 - 1s - loss: 0.5268 - accuracy: 0.8683 - val_loss: 0.4832 - val_accuracy: 0.8781 - 762ms/epoch - 2ms/step
Epoch 5/10
469/469 - 1s - loss: 0.4893 - accuracy: 0.8748 - val_loss: 0.4531 - val_accuracy: 0.8841 - 791ms/epoch - 2ms/step
Epoch 6/10
469/469 - 1s - loss: 0.4635 - accuracy: 0.8797 - val_loss: 0.4320 - val_accuracy: 0.8879 - 711ms/epoch - 2ms/step
Epoch 7/10
469/469 - 1s - loss: 0.4443 - accuracy: 0.8831 - val_loss: 0.4156 - val_accuracy: 0.8911 - 721ms/epoch - 2ms/step
Epoch 8/10
469/469 - 1s - loss: 0.4294 - accuracy: 0.8862 - val_loss: 0.4025 - val_accuracy: 0.8941 - 772ms/epoch - 2ms/step
Epo