---

**Load essential libraries**

---

In [None]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('dark_background')
%matplotlib inline

import tensorflow as tf

---

**Check TensorFlow version**

---

In [None]:
tf.__version__

---

Load MNIST Data

---

In [None]:
## Load MNIST data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])

num_labels = len(np.unique(y_train))
num_features = X_train.shape[1]
num_samples = X_train.shape[0]

# One-hot encode class labels
Y_train = tf.keras.utils.to_categorical(y_train)
Y_test = tf.keras.utils.to_categorical(y_test)

# Normalize the samples (images)
xmax = np.amax(X_train)
xmin = np.amin(X_train)
X_train = (X_train - xmin) / (xmax - xmin) # all train features turn into a number between 0 and 1
X_test = (X_test - xmin)/(xmax - xmin)

print('MNIST set')
print('---------------------')
print('Number of training samples = %d'%(num_samples))
print('Number of features = %d'%(num_features))
print('Number of output labels = %d'%(num_labels))

---

We will now look at 3 different ways to build custom models using TensorFlow 2:

1. model subclassing ([Making new layers and models via subclassing](https://www.tensorflow.org/guide/keras/making_new_layers_and_models_via_subclassing))
2. sequential API
3. functional API

---

---

**Approach-1**: here we build the model by subclassing the Keras $\texttt{Model}$ class followed by definition of of layers in $\texttt{__init__}$ and implementation of the model's forward pass in $\texttt{call()}$.

---

In [None]:
## Define 1-layer (softmax) neural network architecture
# Define model
class Softmax_Model(tf.keras.models.Model):
    def __init__(self):
        super(Softmax_Model, self).__init__()
        initializer = tf.keras.initializers.RandomUniform(minval=-0.5, maxval=0.5)
        self.dense1 = tf.keras.layers.Dense(num_labels, dtype = 'float64',\
                                 bias_initializer = initializer,\
                                 activation = tf.keras.activations.softmax)

    # Forward pass for the model
    def call(self, inputs):
        a = self.dense1(inputs)
        return a

---

Build model

---

In [None]:
## Build model
model = Softmax_Model()
batch_size = 100 # batch size
model.build((batch_size, num_features))

---

Compile and train the model on the training batches and test on the test set in one shot

---

In [None]:
## Compile model
opt = tf.keras.optimizers.Adam(learning_rate = 1e-03) # optimizer
loss_fn = tf.keras.losses.CategoricalCrossentropy()  # loss function
model.compile(optimizer = opt, loss = loss_fn, metrics = ['acc'])

# Train model and simultabeously test on the test set
model.fit(X_train, Y_train, batch_size = 100,\
          epochs = 10,\
          validation_data = (X_test, Y_test))

---

Instead of doing the above, we can explicitly write down the optimization step using $\texttt{GradientTape()}$ and train the model

---

In [None]:
## Create source dataset from input data (this is helpful for ppipelining later)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
batch_size = 100 # batch size
# Create training batches
train_dataset = train_dataset.shuffle(buffer_size = 1024).batch(batch_size)

In [None]:
# Create softmax model
model = Softmax_Model()

opt = tf.keras.optimizers.Adam(learning_rate = 1e-03) # optimizer
loss_fn = tf.keras.losses.CategoricalCrossentropy()  # loss function

# Varible to store training loss per epoch
loss_train_epoch = tf.keras.metrics.Mean()

# Iterate over epochs
nepochs = 10
for epoch in range(nepochs):
  # Iterate over the batches of the dataset.
  for step, train_batch in enumerate(train_dataset):
    with tf.GradientTape() as g:
      # Compute loss
      yhat = model(train_batch[0])
      loss = loss_fn(train_batch[1], yhat)

    # Calculate gradients
    grad = g.gradient(loss, model.trainable_weights)

    # Update model
    opt.apply_gradients(zip(grad, model.trainable_weights))

    # Append training loss
    loss_train_epoch(loss)
  print('Epoch %d: train loss = %f'%(epoch+1, loss_train_epoch.result()))

In [None]:
# Compile model so it can be evaluated for test set
model.compile(optimizer = opt, loss = loss_fn, metrics = ['acc'])
print('\nAccuracy:', model.evaluate(X_test, Y_test, verbose=0)[1])

---

**Approach-2**: here we build the model using the sequential API of TensorFlow Keras. Try this.

---

---

**Approach-3**: here we build the model using the functional API of TensorFlow Keras. Try this.

---