### Descriptions:
This notebook is for learning neural network in classification handwritten number in mnist dataset. Tensorflow should be used and the model is built on low-level api of tensorflow. Custom training loop is also taken with gradient tape

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np
from tqdm import tqdm

### Get mnist dataset

In [2]:
(x_train,y_train),(x_test,y_test) = mnist.load_data()
x_train = np.array(x_train,dtype=np.float32)
x_test = np.array(x_test,dtype=np.float32)


## Normalize ,flatten input and one-hot coding label

In [3]:
x_train = x_train/255.
x_test = x_test/255.
x_train = x_train.reshape(x_train.shape[0],-1)
x_test = x_test.reshape(x_test.shape[0],-1)
y_train = tf.keras.utils.to_categorical(y_train,num_classes = 10)
y_test = tf.keras.utils.to_categorical(y_test,num_classes = 10)

## Get dataset with batchsize

In [4]:
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_dataset = train_dataset.batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_dataset = test_dataset.batch(batch_size)

## Build weights,biases -> model
the model contains three dense layers , the first layer with shape 784x1024 , the second layer with 1024x10 nodes followed by the softmax activation

In [5]:
initializer = tf.initializers.HeNormal()
weight_shapes = {
    'w1': [784,1024],
    'w2': [1024,10]
}
biases_shapes = {
    'b1': [1024],
    'b2': [10]
}
def get_trainable_params(shape,name):
    return tf.Variable(initializer(shape),name=name,trainable=True,dtype=tf.float32)

trainable_params = []
trainable_params.append(get_trainable_params(weight_shapes['w1'],'w1'))
trainable_params.append(get_trainable_params(biases_shapes['b1'],'b1'))
trainable_params.append(get_trainable_params(weight_shapes['w2'],'w2'))
trainable_params.append(get_trainable_params(biases_shapes['b2'],'b2'))

def model(x):
    # Reshape input from (batchsize,28,28) -> (batchsize,784)
    x = tf.matmul(x,trainable_params[0])
    x = tf.add(x,trainable_params[1])
    x = tf.nn.relu(x)
    x = tf.nn.dropout(x,rate = 0.3)
    
    x = tf.matmul(x,trainable_params[2])
    x = tf.add(x,trainable_params[3])
    x = tf.nn.softmax(x)
    return x

### Define loss and optimizer

In [6]:
learning_rate = 0.001
def loss(pred,target):
    return tf.reduce_mean(tf.losses.categorical_crossentropy(target,pred))

optimizer = tf.optimizers.Adam(learning_rate)

### Define train_step and test step function

In [7]:
def train_step(data,target):
    with tf.GradientTape() as tape:
        pred = model(data)
        train_loss = loss(pred,target)
    grads = tape.gradient(train_loss,trainable_params)
    optimizer.apply_gradients(zip(grads,trainable_params))
    pred_label = tf.argmax(pred,axis=1)
    target_label = tf.argmax(target,axis=1)
    check_equal = (tf.cast(pred_label,tf.int64)) == (tf.cast(target_label,tf.int64))
    correct = tf.reduce_sum(tf.cast(check_equal,tf.float32))
    return train_loss,correct

def test_step(data,target):
    pred = model(data)
    test_loss = loss(pred,target)
    pred_label = tf.argmax(pred,axis=1)
    target_label = tf.argmax(target,axis=1)
    check_equal = (tf.cast(pred_label,tf.int64)) == (tf.cast(target_label,tf.int64))
    correct = tf.reduce_sum(tf.cast(check_equal,tf.float32))
    return test_loss,correct

## Train and test

In [8]:
epochs = 10
for epoch in range(epochs):
    nums_train = 0 
    nums_test = 0
    train_corrects = 0 
    test_corrects =0
    train_losses = []
    test_losses = []
    for data,target in tqdm(train_dataset):
        train_loss,correct = train_step(data,target)
        nums_train += len(data)
        train_corrects += correct
        train_losses.append(train_loss)
#         break
    
    for data,target in tqdm(test_dataset):
        test_loss,correct = test_step(data,target)
        nums_test += len(data)
        test_corrects += correct
        test_losses.append(test_loss)
#         break
    print(test_corrects,train_corrects)
    print("EPOCH : {}/{}, train_loss : {},train_acc : {}, test_loss :{}, test_acc : {}".format(epoch+1,epochs,sum(train_losses)/len(train_losses),train_corrects/nums_train*100.,sum(test_losses)/len(test_losses),test_corrects/nums_test*100.))
    

100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:20<00:00, 46.75it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 126.70it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9563.0, shape=(), dtype=float32) tf.Tensor(55686.0, shape=(), dtype=float32)
EPOCH : 1/10, train_loss : 0.2391355037689209,train_acc : 92.80999755859375, test_loss :0.13886849582195282, test_acc : 95.6300048828125


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 47.88it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.32it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9655.0, shape=(), dtype=float32) tf.Tensor(58065.0, shape=(), dtype=float32)
EPOCH : 2/10, train_loss : 0.10584694147109985,train_acc : 96.7750015258789, test_loss :0.10322239995002747, test_acc : 96.55000305175781


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 48.31it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.36it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9617.0, shape=(), dtype=float32) tf.Tensor(58637.0, shape=(), dtype=float32)
EPOCH : 3/10, train_loss : 0.07251103222370148,train_acc : 97.72833251953125, test_loss :0.12267841398715973, test_acc : 96.17000579833984


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 48.70it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.31it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9697.0, shape=(), dtype=float32) tf.Tensor(58946.0, shape=(), dtype=float32)
EPOCH : 4/10, train_loss : 0.05555057153105736,train_acc : 98.24333190917969, test_loss :0.10273538529872894, test_acc : 96.97000122070312


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 48.54it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.32it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9725.0, shape=(), dtype=float32) tf.Tensor(59174.0, shape=(), dtype=float32)
EPOCH : 5/10, train_loss : 0.04254589229822159,train_acc : 98.62333679199219, test_loss :0.08969199657440186, test_acc : 97.25


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 48.38it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.69it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9754.0, shape=(), dtype=float32) tf.Tensor(59273.0, shape=(), dtype=float32)
EPOCH : 6/10, train_loss : 0.03627990186214447,train_acc : 98.788330078125, test_loss :0.08902158588171005, test_acc : 97.53999328613281


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 48.70it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.95it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9774.0, shape=(), dtype=float32) tf.Tensor(59358.0, shape=(), dtype=float32)
EPOCH : 7/10, train_loss : 0.03129735589027405,train_acc : 98.93000030517578, test_loss :0.08183076977729797, test_acc : 97.73999786376953


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 47.39it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 129.55it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9766.0, shape=(), dtype=float32) tf.Tensor(59484.0, shape=(), dtype=float32)
EPOCH : 8/10, train_loss : 0.026197129860520363,train_acc : 99.13999938964844, test_loss :0.08678855746984482, test_acc : 97.65999603271484


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 48.35it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.37it/s]
  0%|                                                                                          | 0/938 [00:00<?, ?it/s]

tf.Tensor(9793.0, shape=(), dtype=float32) tf.Tensor(59493.0, shape=(), dtype=float32)
EPOCH : 9/10, train_loss : 0.024779407307505608,train_acc : 99.1550064086914, test_loss :0.07878308743238449, test_acc : 97.93000030517578


100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 47.77it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 128.33it/s]


tf.Tensor(9766.0, shape=(), dtype=float32) tf.Tensor(59530.0, shape=(), dtype=float32)
EPOCH : 10/10, train_loss : 0.02162688598036766,train_acc : 99.21666717529297, test_loss :0.08770375698804855, test_acc : 97.65999603271484
