# Fashion MNIST using a custom training loop
we will build a custom training loop and a validation loop to traing a model on the Fashion mnist ds

https://github.com/sasidhar-programmer/Tensorflow_Advance_Techniques/blob/main/2-custom_and_distributed_training/week-2/C2_W2_Lab_2_training-categorical.ipynb

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
from tqdm import tqdm
import tensorflow_datasets as tfds
import matplotlib.ticker as mticker

In [7]:
train_data, info = tfds.load("fashion_mnist", split = "train", with_info = True, download=True)

test_data = tfds.load("fashion_mnist", split = "test", download=True)

class_names = ["T-shirt/top", "Trouser/pants", "Pullover shirt", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]


mples...:  47%|████▋     | 28433/60000 [00:17<00:37, 847.78 examples/s][A
Generating train examples...:  48%|████▊     | 28560/60000 [00:17<00:33, 941.67 examples/s][A
Generating train examples...:  48%|████▊     | 28660/60000 [00:17<00:32, 949.74 examples/s][A
Generating train examples...:  48%|████▊     | 28785/60000 [00:17<00:30, 1022.20 examples/s][A
Generating train examples...:  48%|████▊     | 28898/60000 [00:17<00:29, 1039.73 examples/s][A
Generating train examples...:  48%|████▊     | 29006/60000 [00:18<00:36, 848.86 examples/s][A
Generating train examples...:  48%|████▊     | 29099/60000 [00:18<00:35, 866.00 examples/s][A
Generating train examples...:  49%|████▊     | 29229/60000 [00:18<00:32, 940.65 examples/s][A
Generating train examples...:  49%|████▉     | 29355/60000 [00:18<00:30, 1016.11 examples/s][A
Generating train examples...:  49%|████▉     | 29471/60000 [00:18<00:28, 1054.21 examples/s][A
Generating train examples...:  49%|████▉     | 29581/60000 [00:18<

In [12]:
train_data

<PrefetchDataset shapes: {image: (28, 28, 1), label: ()}, types: {image: tf.uint8, label: tf.int64}>

In [13]:
def format_image(data):        
    image = data["image"]
    image = tf.reshape(image, [-1])
    image = tf.cast(image, 'float32')
    image = image / 255.0
    return image, data["label"]

train_data = train_data.map(format_image)
test_data = test_data.map(format_image)

In [25]:
def base_model():
    flattened_image_dim = 28*28
    inputs = tf.keras.layers.Input(shape=(flattened_image_dim, ), name="flat_input")
    x = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs)
    x = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(x)
    output = tf.keras.layers.Dense(10, activation='softmax', name='prediction')(x)
    return tf.keras.models.Model(inputs=inputs, outputs=output)



In [15]:
optimizer = tf.keras.optimizers.Adam()
loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()


# these metrics show us stats for every epoch, we have to gather data during training and validation in the epoch, and at the end show the metrics. Before the next epoch we need to clear the metrics for the next epoch training
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()


### Building Training Loop
In this section you build your training loop consisting of training and validation sequences.

The core of training is using the model to calculate the current_prediction on specific set of inputs and compute loss (in this case sparse categorical crossentropy) by comparing the predicted outputs to the true outputs. You then update the trainable weights using the optimizer algorithm chosen. Optimizer algorithm requires your computed loss and partial derivatives of loss with respect to each of the trainable weights to make updates to the same.

You use gradient tape to calculate the gradients and then update the model trainable weights using the optimizer.

In [17]:
def apply_gradient(optimizer, model, x, y):
    with tf.GradientTape() as t:
        current_prediction = model(x)
        loss_val =loss_obj(y_true=y, y_pred= current_prediction)
        # every layer has x trainable parameters and we find the derivative of the loss with respect to each of those trainable weights
    gradients = tape.gradient(loss_val, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights)) # every neuron has a weight and a bias so 2 parameters, but when we do the gradient we get two seprate arrays, so the zip() puts them together so the first item in the w gradient and the first item in the b gradient will be zipped together and be applied to be the first model's new trainable param

    return current_prediction, loss_val



In [30]:
# one epoch means that we ran through ALL of the batches and saw all the training data and adjusted the weights based on that, and we use `update_state` to update the metrics 


def train_data_for_one_epoch():
    lossess = []
    total_num = len(list(enumerate(train_data)))
    progress_bar=tqdm(total=total_num,position=0,leave=True,bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} ')
    for step, (x_batch_train, y_batch_train) in enumerate(train_data):
        prediction, loss_val = apply_gradient(optimizer, model, x_batch_train, y_batch_train)
        losses.append(loss_val)
        train_acc_metric(y_batch_train, prediction)

        progress_bar.set_description("Training loss for step %s: %.4f " % (int(step), float(loss_val)) )
        progress_bar.update()

    return losses
    


def perform_validation():
    lossess = []
    for x_val, y_val in test_data:
        pred = model(x_val)
        loss = loss_obj(y_true = y_val, y_pred=pred)
        lossess.append(loss)
        val_acc_metric(y_val, pred)
    return lossess

1. Perform training over all batches of training data.
2. Get values of metrics.
3. Perform validation to calculate loss and update validation metrics on test data.
4. Reset the metrics at the end of epoch.
5. Display statistics at the end of each epoch.

In [31]:
model = base_model()

epochs = 10
epochs_val_losses, epochs_train_losses = [], []

for epoch in range(epochs):
    print("Starting epoch %d"%epoch)

    losses_train = train_data_for_one_epoch()
    train_acc = train_acc_metric.result()

    losses_val = perform_validation()
    val_acc = val_acc_metric.result()
    
    losses_train_mean = np.mean(losses_train)
    losses_val_mean = np.mean(losses_val)
    epochs_train_losses = np.mean(losses_train_mean)
    epochs_val_losses.append(losses_val_mean)

    print('\n Epoch %s: Train loss: %.4f  Validation Loss: %.4f, Train Accuracy: %.4f, Validation Accuracy %.4f' % (epoch, float(losses_train_mean), float(losses_val_mean), float(train_acc), float(val_acc)))
  
    train_acc_metric.reset_states()
    val_acc_metric.reset_states()

Starting epoch 0
  0%|          | 0/60000

InvalidArgumentError: Input tensor must be at least 2D: [64] [Op:BiasAdd]

In [None]:
# evaluate the model

