<a href="https://colab.research.google.com/github/aneeshcheriank/approaching-any-machine-learning-problem/blob/main/GradientTape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten
import numpy as np

# data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# give the color dim to the images
x_train = np.expand_dims(x_train, 3)
x_test = np.expand_dims(x_test, 3)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# model
model = Sequential([
    Conv2D(16, (2, 2), activation='relu', input_shape=(28, 28, 1)),
    MaxPool2D((2, 2)),
    Conv2D(64, (2, 2), activation='relu'),
    MaxPool2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(32, activation='relu'),
    Dense(10, activation='softmax')
])

loss = tf.losses.SparseCategoricalCrossentropy()
optimizer = tf.optimizers.Adam()

In [12]:
EPOCHS = 100
for epoch in range(EPOCHS):
  # gradient tape
  with tf.GradientTape() as t:
    pred = model(x_train)
    l = loss(
      y_train, pred # pytorch the order is (pred, actual)
    )

  # calculate the gradients
  grads = t.gradient(
    l, model.trainable_variables
  )
  # apply the gradients on the model variables
  optimizer.apply_gradients(
    zip(
        grads, 
        model.trainable_variables
       )
  )
  if (epoch+1)%2 == 0:
    print(f'epoch: {epoch+1}/{EPOCHS} loss = {l.numpy():.4f}')

epoch: 2/100 loss = 11.3049
epoch: 4/100 loss = 8.0784
epoch: 6/100 loss = 5.0259
epoch: 8/100 loss = 3.7428
epoch: 10/100 loss = 2.4021
epoch: 12/100 loss = 1.9598
epoch: 14/100 loss = 1.9526
epoch: 16/100 loss = 1.5451
epoch: 18/100 loss = 1.1819
epoch: 20/100 loss = 0.9817
epoch: 22/100 loss = 0.8455
epoch: 24/100 loss = 0.7566
epoch: 26/100 loss = 0.7357
epoch: 28/100 loss = 0.7027
epoch: 30/100 loss = 0.6352
epoch: 32/100 loss = 0.5784
epoch: 34/100 loss = 0.5439
epoch: 36/100 loss = 0.5102
epoch: 38/100 loss = 0.4722
epoch: 40/100 loss = 0.4408
epoch: 42/100 loss = 0.4188
epoch: 44/100 loss = 0.3979
epoch: 46/100 loss = 0.3739
epoch: 48/100 loss = 0.3509
epoch: 50/100 loss = 0.3303
epoch: 52/100 loss = 0.3122
epoch: 54/100 loss = 0.2956
epoch: 56/100 loss = 0.2792
epoch: 58/100 loss = 0.2643
epoch: 60/100 loss = 0.2503
epoch: 62/100 loss = 0.2369
epoch: 64/100 loss = 0.2251
epoch: 66/100 loss = 0.2133
epoch: 68/100 loss = 0.2015
epoch: 70/100 loss = 0.1911
epoch: 72/100 loss = 0.

In [34]:
# evaluate the model
def accuracy(model, x_test, y_test):
  '''
  predict the accuracy of the model
  input:
  model, x_test, y_test
  output: accuracy in percentage
  '''
  pred = model(x_test)
  pred = tf.math.argmax(pred, axis=1)
  correct = (pred == y_test).numpy().sum()
  n_samples = x_test.shape[0]
  acc = correct/n_samples * 100

  return acc

accuracy(model, x_test, y_test)


96.5

In [32]:
pred = model(x_test)
pred = tf.math.argmax(pred, axis=1)
correct = (pred == y_test).numpy().sum()
n_samples = x_test.shape[0]

acc = correct/n_samples * 100
acc

96.5

## Dataloader in Tensorflow

In [2]:
BUFFER = 1024
BATCH_SIZE = 128

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))\
.shuffle(buffer_size=BUFFER)\
.batch(BATCH_SIZE)

In [3]:
# to test the data loader
iterator = iter(train_data)
data, label = iterator.next()

print(f'data shape: {data.shape}')
print(f'label shape: {label.shape}')

data shape: (128, 28, 28, 1)
label shape: (128,)


## Batch Training

In [None]:
EPOCHS = 100
for epoch in range(EPOCHS):
  # batch training
  total_loss = 0
  for x_batch, y_batch in train_data:
    # gradient tape
    with tf.GradientTape() as t:
      pred = model(x_batch)
      l = loss(
        y_batch, pred # pytorch the order is (pred, actual)
      )

    # calculate the gradients
    grads = t.gradient(
      l, model.trainable_variables
    )
    # apply the gradients on the model variables
    optimizer.apply_gradients(
      zip(
          grads, 
          model.trainable_variables
         )
    )
    total_loss += l.numpy()
  if (epoch+1)%2 == 0:
    print(f'epoch: {epoch+1}/{EPOCHS} loss = {total_loss:.4f}')

epoch: 2/100 loss = 34.3231
epoch: 4/100 loss = 17.2616
epoch: 6/100 loss = 11.5943
epoch: 8/100 loss = 8.4373
epoch: 10/100 loss = 7.3869
epoch: 12/100 loss = 6.5979
epoch: 14/100 loss = 6.0831
epoch: 16/100 loss = 3.6101
epoch: 18/100 loss = 5.2228
epoch: 20/100 loss = 2.9344
epoch: 22/100 loss = 3.3705
epoch: 24/100 loss = 4.1665
epoch: 26/100 loss = 4.1016
