In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

In [None]:
# both keras and tf datasets can be used. TFDS will be used to show the list of datasets.
# TFDS is a high-level wrapper around tf.data.
import tensorflow_datasets as tfds

In [None]:
# list of datasets
tfds.list_builders()

In [None]:
# necessary libraries for data exploration and further data operations
import numpy as np
import matplotlib.pyplot as plt
import math

In [None]:
# use tfds api to get mnist dataset
# split to train and test
# batch size -1, thus, no batch yet.
mnist_training, mnist_test = tfds.load('mnist', split=['train', 'test'], batch_size=-1, as_supervised=True)                     

In [None]:
print(mnist_training)

In [None]:
print(mnist_test)

In [None]:
mnist_training_images, mnist_training_labels = mnist_training[0], mnist_training[1]
mnist_test_images, mnist_test_labels = mnist_test[0], mnist_test[1]

In [None]:
print(mnist_training_images.shape)
print(mnist_training_labels.shape)

print(mnist_test_images.shape)
print(mnist_test_labels.shape)

In [None]:
# let's visualize first training image
plt.imshow(mnist_training_images[0] ,cmap = 'gray')
print(mnist_training_labels[0])

In [None]:
# also, let's visualize first test image
plt.imshow(mnist_test_images[0] ,cmap = 'gray')
print(mnist_test_labels[0])

In [None]:
# data preprocessing
# reshape into trainable vectors
num_training_images = mnist_training_images.shape[0]
num_test_images = mnist_test_images.shape[0]

img_width, img_height = mnist_training_images.shape[1], mnist_training_images.shape[2]

# since dense layer, we have to flatten 28x28 to 784x1.
mnist_training_images = tf.reshape(mnist_training_images, shape=(num_training_images, img_width * img_height))
mnist_test_images = tf.reshape(mnist_test_images, shape=(num_test_images, img_width * img_height))

# check the changes
print(mnist_training_images.shape)
print(mnist_test_images.shape)

# another preprocessing step is to normalize data
print(np.amax(mnist_training_images[0]),np.amin(mnist_training_images[0]))

print(np.amax(mnist_test_images[0]),np.amin(mnist_test_images[0]))

print(np.amax(mnist_training_labels),np.amin(mnist_training_labels))

print(np.amax(mnist_test_labels),np.amin(mnist_test_labels))

In [None]:
# conversion of data type and normalization of training data
# main idea of normalization/standardization -> variables that are at different scale contribute different.
# we want to reduce the "bias" as much as possible by these methods.
# min-max is highly influenced by outliers! min and max values affect a lot!
def preprocess(x, y):
  x = tf.cast(x, tf.float32) / 255.0
  y = tf.cast(y, tf.int64)

  return x, y

# one-hot labels and create dataloader with given batch size.
def create_dataset(xs, ys, n_classes=10):
  ys = tf.one_hot(ys, depth=n_classes)
  return tf.data.Dataset.from_tensor_slices((xs, ys)) \
    .map(preprocess) \
    .shuffle(len(ys)) \
    .batch(128)

In [None]:
train_dataset = create_dataset(mnist_training_images, mnist_training_labels)
test_dataset = create_dataset(mnist_test_images, mnist_test_labels)

In [None]:
print(train_dataset)
train_dataset.element_spec    

In [None]:
# check the dataloader
batch_images, batch_labels = next(iter(train_dataset))
print(batch_images.shape)
print(batch_labels.shape)
print(batch_images[0])
print(batch_labels[0])
print(np.amax(batch_images[0]),np.amin(batch_images[0]))

In [None]:
# visualize first batch training image to show it is corresponds to same class with printed label.
plt.imshow(tf.reshape(batch_images[0], shape=(img_width, img_height, 1)) ,cmap = 'gray')

In [None]:
# Defining hyperparameters
input_shape = 784
label_shape = 10

lr = 0.003

layer_neurons = [
    [input_shape, 200],
    [200, 80],
    [80, label_shape],
]

bias_shapes = [200, 80, label_shape]
# xaiver uniform initializer
initializer = tf.initializers.glorot_uniform()

In [None]:
# define a dense layer, also, you can use TF2 API or Keras!
def dense_layer(inputs, weights, bias):
    return tf.nn.sigmoid(tf.matmul(inputs, weights) + bias)

In [None]:
# for initialization of weights and biases
def get_weight(shape, name):
    return tf.Variable(initializer(shape), name=name, trainable=True, dtype=tf.float32)

def get_bias(shape, name):
    return tf.Variable(initializer([shape]), name=name, trainable=True, dtype=tf.float32)

In [None]:
# define weights and bias lists to use in model
weights = []
bias = []
i = 0
for layer in layer_neurons:
    weights.append(get_weight(layer, 'weight{}'.format(i)))
    i+=1

i = 0
for layer in bias_shapes:
    bias.append(get_bias(layer, 'bias{}'.format(i)))
    i+=1


In [None]:
# define the model with initialized weights and biases
def model(input):
    l1 = dense_layer(input, weights[0], bias[0])
    l2 = dense_layer(l1, weights[1], bias[1])
    l3 = dense_layer(l2, weights[2], bias[2])
    #return tf.nn.softmax(l3)
    return l3

In [None]:
# define optimizer and loss function
optimizer = tf.optimizers.Adam(lr)

# it is with logits because we return the predictions without applying softmax!
# applied directly to prediction probabilities.
def loss(pred, target):
    return tf.nn.softmax_cross_entropy_with_logits(target, pred)

In [None]:
# we define our train_step here
# tf.GradientTape is used for recording operations for automatic differentiation. backward pass!
def train_step(model, inputs, outputs, epoch):
    epoch_loss_avg = None
    
    with tf.GradientTape() as tape:
        current_loss = loss(model(inputs), outputs)
        grads = tape.gradient(current_loss, weights)
        optimizer.apply_gradients(zip(grads, weights))
    
    epoch_loss_avg = tf.reduce_mean(current_loss)
    
    return epoch_loss_avg

In [None]:
num_epochs = 10
# batch by batch for each epoch -> traverse over all training dataset.
# total loss is divided by number of iterations to get average loss for each batch.
for epoch in range(num_epochs):
    epoch_loss = 0
    i = 0
    for train_data in train_dataset:
        batch_images, batch_labels = train_data
        iter_loss = train_step(model, batch_images, batch_labels, epoch)
        epoch_loss += iter_loss
        i+=1
    print("--- On epoch {} ---".format(epoch))
    tf.print("| Loss: ", epoch_loss/i)

In [None]:
acc = 0 
# use trained model over test dataset and normalize with number of test samples
# obtain accuracy!
for test_data in test_dataset:
    batch_images, batch_labels = test_data
    predictions = model(batch_images)
    predictions = tf.nn.softmax(predictions)
    equality = tf.math.equal(np.argmax(predictions, axis=1), np.argmax(batch_labels, axis=1))
    acc += np.sum(equality)
acc /= 10000
print(acc)