In this turorial we we train a simple CNN model on MNIST dataset. However, in this training we will not use predefined methods such as model.compile() and model.fit() 

In [14]:
import tensorflow as tf 
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten

import numpy as np
import time

from sklearn.model_selection import train_test_split

Preprocessing Dataset

In [15]:
(x_train, y_train), (x_test, y_test)=tf.keras.datasets.mnist.load_data()

In [16]:
def preprocess(x,y):
    print('x.shape ',x.shape)
    x=tf.cast(x,dtype=tf.float32)/255.0
    x=tf.reshape(x,shape=(28,28,1))
    y=tf.one_hot(y,depth=10) ## 
    return x,y

In [17]:
train_ds=tf.data.Dataset.from_tensor_slices((x_train,y_train)).map(preprocess).cache().batch(256).prefetch(tf.data.AUTOTUNE)
test_ds=tf.data.Dataset.from_tensor_slices((x_test,y_test)).map(preprocess).cache().batch(256).prefetch(tf.data.AUTOTUNE)

x.shape  (28, 28)
x.shape  (28, 28)


In [18]:
for img,label in train_ds.take(1):
    print(img.shape)
    print(label.shape)

(256, 28, 28, 1)
(256, 10)


# Creating a simple model

In [19]:
inputs=tf.keras.Input(shape=(28,28,1))
x=Conv2D(64,3,activation='relu')(inputs)
x=Conv2D(128,3,activation='relu')(x)
x=Flatten()(x)
outputs=Dense(10,activation='softmax')(x)
model=tf.keras.Model(inputs,outputs,name='CNN_MODEL')

## Defining Loss function, Optimizer and accuracy functions.

In [20]:
loss_fnc=tf.keras.losses.CategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
acc_fnc=tf.keras.metrics.categorical_accuracy

In [21]:
def custom_acc(true_label,pred_label):
    true=tf.argmax(true_label,axis=1)
    pred=tf.argmax(pred_label,axis=1)
    acc=tf.reduce_sum(tf.cast(tf.equal(true,pred),tf.float32))/tf.cast(true.shape[0],tf.float32)
    """
    Above three lines is same as using: tf.reduce_mean(tf.keras.metrics.categorical_accuracy(true_label,pred_label)).
    In this way we can define our other loss.
    """
    return acc

In [22]:
epochs=5



In [23]:

for epoch_num in range(epochs):

    for batch_num,(img,true_label) in enumerate(train_ds):

        start_time=time.time()

        with tf.GradientTape() as tape:

            pred_label=model(img)
            
            loss=loss_fnc(true_label,pred_label)


        grad=tape.gradient(loss,model.trainable_weights)
        optimizer.apply_gradients(zip(grad,model.trainable_weights)) # learn about in which condition we use update state. 
        
    
        

    
        acc=custom_acc(true_label,pred_label)
        

        if batch_num %100==0:
            print('Epoch :',epoch_num)
            print('Batch Num : {}\tLoss Value: {:.3f}\t Accuracy: {:.3f}'.format(batch_num,loss,acc))
    
total_training_time=time.time()-start_time

print('Total training time: {:.3f}'.format(total_training_time))

Epoch : 0
Batch Num : 0	Loss Value: 2.307	 Accuracy: 0.109
Epoch : 0
Batch Num : 100	Loss Value: 0.128	 Accuracy: 0.973
Epoch : 0
Batch Num : 200	Loss Value: 0.084	 Accuracy: 0.980
Epoch : 1
Batch Num : 0	Loss Value: 0.097	 Accuracy: 0.973
Epoch : 1
Batch Num : 100	Loss Value: 0.069	 Accuracy: 0.988
Epoch : 1
Batch Num : 200	Loss Value: 0.063	 Accuracy: 0.984
Epoch : 2
Batch Num : 0	Loss Value: 0.078	 Accuracy: 0.977
Epoch : 2
Batch Num : 100	Loss Value: 0.048	 Accuracy: 0.980
Epoch : 2
Batch Num : 200	Loss Value: 0.059	 Accuracy: 0.980
Epoch : 3
Batch Num : 0	Loss Value: 0.058	 Accuracy: 0.984
Epoch : 3
Batch Num : 100	Loss Value: 0.022	 Accuracy: 0.988
Epoch : 3
Batch Num : 200	Loss Value: 0.043	 Accuracy: 0.988
Epoch : 4
Batch Num : 0	Loss Value: 0.037	 Accuracy: 0.984
Epoch : 4
Batch Num : 100	Loss Value: 0.020	 Accuracy: 0.996
Epoch : 4
Batch Num : 200	Loss Value: 0.044	 Accuracy: 0.996
Total training time: 0.029


In [24]:
## Evaluating the loss 
accs=[]

for img,label in test_ds:
    pred=model.predict(img)
    acc=custom_acc(label,pred)
    accs.append(acc)

print('Average Accuracy: {:.3f}'.format(tf.reduce_mean(accs)))

Average Accuracy: 0.983


The above training loop is training eagerly, which is slower. We can use tf.function to speed up the training process

In [25]:
@tf.function
def train_step(inputs,label):
    with tf.GradientTape() as tape:
        pred_label=model(inputs)
        
        loss=loss_fnc(label,pred_label)

    grad=tape.gradient(loss,model.trainable_weights)
    optimizer.apply_gradients(zip(grad,model.trainable_weights))

    acc=custom_acc(pred_label=pred_label,true_label=label)

    return acc,loss


In [26]:

for epoch_num in range(epochs):

    for batch_num,(img,true_label) in enumerate(train_ds):

        start_time=time.time()

        acc,loss=train_step(img,true_label)
        

        if batch_num %100==0:
            print('Epoch :',epoch_num)
            print('Batch Num : {}\tLoss Value: {:.3f}\t Accuracy: {:.3f}'.format(batch_num,loss,acc))
    




Epoch : 0
Batch Num : 0	Loss Value: 0.020	 Accuracy: 0.992
Epoch : 0
Batch Num : 100	Loss Value: 0.008	 Accuracy: 1.000
Epoch : 0
Batch Num : 200	Loss Value: 0.035	 Accuracy: 0.996
Epoch : 1
Batch Num : 0	Loss Value: 0.021	 Accuracy: 0.996
Epoch : 1
Batch Num : 100	Loss Value: 0.008	 Accuracy: 0.996
Epoch : 1
Batch Num : 200	Loss Value: 0.024	 Accuracy: 0.988
Epoch : 2
Batch Num : 0	Loss Value: 0.010	 Accuracy: 0.996
Epoch : 2
Batch Num : 100	Loss Value: 0.026	 Accuracy: 0.992
Epoch : 2
Batch Num : 200	Loss Value: 0.012	 Accuracy: 0.996
Epoch : 3
Batch Num : 0	Loss Value: 0.011	 Accuracy: 0.992
Epoch : 3
Batch Num : 100	Loss Value: 0.006	 Accuracy: 1.000
Epoch : 3
Batch Num : 200	Loss Value: 0.013	 Accuracy: 0.992
Epoch : 4
Batch Num : 0	Loss Value: 0.007	 Accuracy: 0.996
Epoch : 4
Batch Num : 100	Loss Value: 0.014	 Accuracy: 0.996
Epoch : 4
Batch Num : 200	Loss Value: 0.011	 Accuracy: 0.996
Total training time: 0.029


It can be obseved that using tf.function() training time reduce by 10 sec.