# **Multiclass Classification using Keras and TensorFlow on Food-101 Dataset**
![alt text](https://www.vision.ee.ethz.ch/datasets_extra/food-101/static/img/food-101.jpg)

### **Installing Prerequisites**

In [None]:
!pip install torch-lr-finder

In [None]:
!pip install keras-adabound

In [None]:
#!pip install tensorboard

In [None]:
!pip install torch > 1.4

In [None]:
!pip install torchsummary

### **Importing Packages**

In [None]:
import tensorflow as tf
from tqdm import tqdm
from pathlib import Path
from fastai.vision import *
from fastai.metrics import error_rate
import matplotlib.image as img
from PIL import Image
%matplotlib inline
import numpy as np
from collections import defaultdict
import collections
from shutil import copy
from shutil import copytree, rmtree
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import random
import time
import datetime
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.regularizers import l2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.inception_v3 import InceptionV3
# from tensorflow.keras.applications.nasnet import NasNetLarge
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, AveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.regularizers import l2
from tensorflow import keras
from tensorflow.keras import models
import cv2
from keras_adabound import AdaBound
import tensorboard as tb
from torchvision.models.resnet import resnet50
import torchvision
import torch
import torchvision.models as modelst
#from torch.utils.tensorboard import SummaryWriter
# import torchvision.models as models
#from tensorboardX import SummaryWriter
from torchsummary import summary

In [None]:
#!pip uninstall tensorboard --yes

In [None]:
# Check if GPU is enabled
print(tf.__version__)
print(tf.test.gpu_device_name())

In [None]:
%cd /kaggle/input/food-101/

### Use the below two blocks of code if running this file on Google Colab

In [None]:
# Should be used while working on Colab

# # Helper function to download data and extract
# def get_data_extract():
#   if "food-101" in os.listdir():
#     print("Dataset already exists")
#   else:
#     print("Downloading the data...")
#     !wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
#     print("Dataset downloaded!")
#     print("Extracting data..")
#     !tar xzvf food-101.tar.gz
#     print("Extraction done!")

In [None]:
# Download data and extract it to folder
# Uncomment this below line if you are on Colab

#get_data_extract()

In [None]:
# Check the extracted dataset folder
!ls food-101/

**meta** folder contains the text files - train.txt and test.txt  
**train.txt** contains the list of images that belong to training set  
**test.txt** contains the list of images that belong to test set  
**classes.txt** contains the list of all classes of food

### **Visualize random image from each of the 101 classes**

In [None]:
# # Visualize the data, showing one image per class from 101 classes
# rows = 17
# cols = 6
# fig, ax = plt.subplots(rows, cols, figsize=(25,25))
# fig.suptitle("Showing one random image from each class", y=1.05, fontsize=24) # Adding  y=1.05, fontsize=24 helped me fix the suptitle overlapping with axes issue
# data_dir = "food-101/images/"
# foods_sorted = sorted(os.listdir(data_dir))
# food_id = 0
# for i in range(rows):
#   for j in range(cols):
#     try:
#       food_selected = foods_sorted[food_id] 
#       food_id += 1
#     except:
#       break
#     if food_selected == '.DS_Store':
#         continue
#     food_selected_images = os.listdir(os.path.join(data_dir,food_selected)) # returns the list of all files present in each food category
#     food_selected_random = np.random.choice(food_selected_images) # picks one food item from the list as choice, takes a list and returns one random item
#     img = plt.imread(os.path.join(data_dir,food_selected, food_selected_random))
#     ax[i][j].imshow(img)
#     ax[i][j].set_title(food_selected, pad = 10)
    
# plt.setp(ax, xticks=[],yticks=[])
# plt.tight_layout()
# # https://matplotlib.org/users/tight_layout_guide.html


### **Split the image data into train and test using train.txt and test.txt**
* We first read the textfile with the list of images that belong to either train or test
* We then create a train or test directory
* Finally, we move the images from the ***Images*** folder to the ***train/test*** folder

In [None]:
# Helper method to split dataset into train and test folders
def prepare_data(filepath, src,dest):
  classes_images = defaultdict(list)
  with open(filepath, 'r') as txt:
      paths = [read.strip() for read in txt.readlines()]
      for p in paths:
        food = p.split('/')
        classes_images[food[0]].append(food[1] + '.jpg')

  for food in classes_images.keys():
    print("\nCopying images into ",food)
    if not os.path.exists(os.path.join(dest,food)):
      os.makedirs(os.path.join(dest,food))
    for i in classes_images[food]:
      copy(os.path.join(src,food,i), os.path.join(dest,food,i))
  print("Copying Done!")

In [None]:
# Prepare train dataset by copying images from food-101/images to food-101/train using the file train.txt
%cd /
print("Creating train data...")
prepare_data('/kaggle/input/food-101/food-101/meta/train.txt', '/kaggle/input/food-101/food-101/images', 'train')

In [None]:
# Prepare test data by copying images from food-101/images to food-101/test using the file test.txt
print("Creating test data...")
prepare_data('/kaggle/input/food-101/food-101/meta/test.txt', '/kaggle/input/food-101/food-101/images', 'test')

#### Used a simple lambda function to attach the list of images with a .txt extension and then use it to count the number of matching files in the ***images*** folder

In [None]:
train_df = pd.read_csv('/kaggle/input/food-101/food-101/meta/train.txt', header=None).apply(lambda x : x + '.jpg')
print("Total number of samples in train folder: ", len(train_df))
train_image_list = ImageItemList.from_df(train_df, '/kaggle/input/food-101/food-101/images')

In [None]:
print(train_df.iloc[1,])
train_image_list[1]

In [None]:
test_df = pd.read_csv('/kaggle/input/food-101/food-101/meta/test.txt', header=None).apply(lambda x : x + '.jpg')
print("Total number of samples in test folder:", len(test_df))
test_image_list = ImageItemList.from_df(test_df, '/kaggle/input/food-101/food-101/images')

In [None]:
print(test_df.iloc[80,])
test_image_list[80]

# Pre-process Images
## Objectives
1. Implement preprocessing codes for each model. 
2. Augment the dataset. 
3. Preview the preprocessed dataset. 

### **Pre-processing Steps for TensorFlow (Keras) Model**
#### The preprocessing steps are done to make sure that the model doesn't overfit the data. In the block below, we have augmented the images by performing random transformations on them. This is extremely useful if the dataset is small in size, but we do it anyways to reduce the chances of overfitting.

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=30,
        shear_range=0.3,
        horizontal_flip=True,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.25,
        fill_mode = 'nearest'
)
valid_datagen = ImageDataGenerator(
        rescale=1./255,
)

In [None]:
batch_size = 128

train_data = train_datagen.flow_from_directory(
    '/train/',
    batch_size=batch_size,
    target_size=(331, 331),
    shuffle=True,
)
valid_data = valid_datagen.flow_from_directory(
    '/test/',
    target_size=(331, 331),
    batch_size=batch_size,
    shuffle=False,
)

# Training different models
## Objectives
1. Obtain 90% accuracy in all the models trained. 
2. You're free to use any techniques for traning such as transfer learning, knowledge transfer, etc. 
3. The models should not overfit the training dataset. 
4. Measure the performance in terms of accuracy and speed of each model. 
5. Visualize the training and testing performance using TensorBoard. 

#### Optional:
1. Apply weight quantization to increase the speed of the models. 

#### Before starting with the training, I decided that it'd be better to use the models that have already been pretrained on Imagenet. So I decided to go with the Transfer Learning approach here. Below are the points in the order I followed them while finalising the model.
1. I first headed over the Keras applications page to take a look at the different models and their sizes. they have also listed out the accuracies of all the models over there.
2. VGG16 and VGG19 were the heaviest models, followed by NasNetLarge, but NasNetLarge showed a much better accuracy compared to them.
3. MobileNetv2, DenseNet169, and NasNetMobile were the smallest models. However, the accuracy of DenseNet169 was the highest among these three.
4. I first did a simple comparison between DenseNet169 and MobileNetv2 to find out the one that performed better. DenseNet169 performed better. MobileNetv2 ran in approximately 3h 8m whereas DenseNet169 ran for 3h 17m. DenseNet169's accuracy was around 4% higher compared to MobileNetv2 so I have performed my future calculations using DensenNet169.
5. After this, I ran two instances on this model, one with SGD (with momentum) and another one with the ADAM optimizer. Their learning rates were kept the same. I found out that ADAM provided better accuracy and hence chose to proceed forward with this optimizer.
6. I have also added ***Dropout*** as another form of regularization, again, to prevent overfitting of the model. I am also planning to add L2 regularization at the end, I have read that these two perform well together in certain cases.
7. The reason why I have commented out the ***Tensorboard*** operations is because I was facing some issues while using it on Kaggle and didn't have the time to sort these issues out. However, they worked just fine on Google Colab.

In [None]:
os.getcwd()

In [None]:
# # Load the extension and start TensorBoard

# %load_ext tensorboard.notebook
# %tensorboard --logdir logs

In [None]:
base1 = tf.keras.applications.DenseNet169(input_shape=(331,331,3), include_top=False, weights='imagenet')
base1.trainable = False
model1 = Sequential()
model1.add(base1)
# model1.add(Flatten())
model1.add(GlobalAveragePooling2D())
model1.add(Dense(256, activation='relu'))
model1.add(Dropout(0.5))
model1.add(Dense(101, activation='softmax'))
# opt = AdaBound(lr=1e-3)
opt = Adam(lr=0.001)
# opt = SGD(lr=0.001, momentum = 0.9)
model1.compile(optimizer=opt,loss = 'categorical_crossentropy',metrics=['accuracy'])

In [None]:
model1.layers

In [None]:
# model1.summary()

In [None]:
reduce_lr = ReduceLROnPlateau(monitor = 'val_acc',patience = 1,verbose = 1)
early_stop = EarlyStopping(monitor = 'val_acc',patience = 5,verbose = 1)
# log = "/logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard = TensorBoard(log_dir=log,write_graph = True, update_freq = 'batch')
chkp1 = ModelCheckpoint('/kaggle/working/NNL.h5',monitor='val_acc',verbose=1,save_best_only=True)
csv_logger1 = CSVLogger('/kaggle/working/NNL.log')

In [None]:
%%time

history1 = model1.fit(train_data, 
                    epochs=15,
                    validation_data = valid_data,
                    callbacks=[early_stop, reduce_lr, csv_logger1, chkp1])

In [None]:
model1.save('NNL.h5')

## Quantized Model
#### The Quantization used here is known as quantization aware training as it is much better for model accuracy compared to post-training quantization.

In [None]:
# import tensorflow_model_optimization as tfmot

# quantize_model = tfmot.quantization.keras.quantize_model

# # q_aware stands for for quantization aware.
# q_aware_model1 = quantize_model(model1)

# # `quantize_model` requires a recompile.
# q_aware_model1.compile(optimizer='adam',
#               loss=tf.keras.losses.CategoricalCrossentropy(),
#               metrics=['accuracy'])

# q_aware_model1.summary()

In [None]:
# reduce_lr = ReduceLROnPlateau(monitor = 'val_acc',patience = 1,verbose = 1)
# early_stop = EarlyStopping(monitor = 'val_acc',patience = 5,verbose = 1)
# # log = "/logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
# # tensorboard = TensorBoard(log_dir=log,write_graph = True, update_freq = 'batch')
# chkp2 = ModelCheckpoint('/kaggle/working/quant_NasNetLarge.h5',monitor='val_acc',verbose=1,save_best_only=True)
# csv_logger2 = CSVLogger('/kaggle/working/quant_NasNetLarge.log')

In [None]:
# %%time

# q_aware_model.fit(train_data, validation_data = valid_data, epochs=15, 
#                   callbacks=[early_stop, reduce_lr, csv_logger2, chkp2])

In [None]:
# q_aware_model.save('NasNetLarge_quantized.h5')

# Fine Tuning Weights
### After training the model, we can take the best weights from it and try unfreezing some layers from the tranfer learning model used to improve accuracy.

In [None]:
model1.load_weights('/kaggle/working/NNL.h5')

In [None]:
len(model1.layers)

In [None]:
for layer in model1.layers[:300]:
        layer.trainable = False
    for layer in model1.layers[300:]:
        layer.trainable = True

In [None]:
model1.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

In [None]:
reduce_lr = ReduceLROnPlateau(monitor = 'val_acc',patience = 1,verbose = 1)
early_stop = EarlyStopping(monitor = 'val_acc',patience = 5,verbose = 1)
# log = "/logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard = TensorBoard(log_dir=log,write_graph = True, update_freq = 'batch')
chkp3 = ModelCheckpoint('/kaggle/working/NNL_finetune.h5',monitor='val_acc',verbose=1,save_best_only=True)
csv_logger3 = CSVLogger('/kaggle/working/NNL_finetune.log')

In [None]:
%%time

history1 = model1.fit(train_data, 
                    epochs=15,
                    validation_data = valid_data,
                    callbacks=[early_stop, reduce_lr, csv_logger1, chkp1])

In [None]:
model1.save('NNL_finetune.h5')

### **Visualize the accuracy and loss plots**

In [None]:
# def plot_accuracy(history,title):
#     plt.title(title)
#     plt.plot(history.history['acc'])
#     plt.plot(history.history['val_acc'])
#     plt.ylabel('accuracy')
#     plt.xlabel('epoch')
#     plt.legend(['train_accuracy', 'validation_accuracy'], loc='best')
#     plt.show()
# def plot_loss(history,title):
#     plt.title(title)
#     plt.plot(history.history['loss'])
#     plt.plot(history.history['val_loss'])
#     plt.ylabel('loss')
#     plt.xlabel('epoch')
#     plt.legend(['train_loss', 'validation_loss'], loc='best')
#     plt.show()


In [None]:
# plot_accuracy(history,'FOOD101-MobileNetv2')
# plot_loss(history,'FOOD101-MobileNetv2')

### **Preprocessing steps for PyTorch Model**

In [None]:
# train_transforms = torchvision.transforms.Compose([
#         torchvision.transforms.ColorJitter(brightness=0.1,contrast=0.1,saturation=0.1),
#         torchvision.transforms.RandomAffine(15),
#         torchvision.transforms.RandomHorizontalFlip(),
#         torchvision.transforms.RandomRotation(15),
#         torchvision.transforms.Resize((224,224)),
#         torchvision.transforms.ToTensor(),
#         torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
# ])
# valid_transforms = torchvision.transforms.Compose([
#         torchvision.transforms.Resize((224,224)),
#         torchvision.transforms.ToTensor(),
#         torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
# ])

In [None]:
# def get_device():
#     if torch.cuda.is_available():
#         return torch.device("cuda")
#     else:
#         return torch.device("cpu")
    
# def to_device(data, device):
#     if isinstance(data, (list,tuple)):
#         return [to_device(x, device) for x in data]
#     return data.to(device, non_blocking=True)

# class DeviceDataLoader():
#     def __init__(self, dl, device):
#         self.dl = dl
#         self.device = device
        
#     def __iter__(self):
#         for x in self.dl:
#             yield to_device(x, self.device)
            
#     def __len__(self):
#         return len(self.dl)

In [None]:
# device = get_device()
# device

In [None]:
# train_dataset = torchvision.datasets.ImageFolder('/train/',transform=train_transforms)
# valid_dataset = torchvision.datasets.ImageFolder('/test/',transform=valid_transforms)


In [None]:
# batch_size = 128
# train_loader = torch.utils.data.DataLoader(train_dataset,batch_size,shuffle=True,num_workers=0,pin_memory=True)
# valid_loader = torch.utils.data.DataLoader(valid_dataset,batch_size,shuffle=False,num_workers=0,pin_memory=True)

In [None]:
# train_dl = DeviceDataLoader(train_loader, device)
# val_dl = DeviceDataLoader(valid_loader, device)

# PyTorch Light Model

In [None]:
# mobilenet_v3_small = modelst.mobilenet_v3_small()

In [None]:
# model_l = torchvision.models.mobilenet_v2(pretrained=True)

In [None]:
# model_l

### The below block function is used to freeze the number of layers 

In [None]:
# for i,param in enumerate(model_l.parameters()):
#     if i<169:
#         param.requires_grad=False

In [None]:
# model_l.fc = torch.nn.Sequential(
#     torch.nn.Dropout(0.5),
#     torch.nn.Linear(2048,101)
# )

In [None]:
# model_l = to_device(model_l, device)

### The summary function gives us the total number of layers in a model which helps us decide the number of layers we want to freeze.

In [None]:
# summary(model_l, (3, 224, 224))

### This piece of block below helps us find the optimal learning rate by taking into account the steepest gradient

In [None]:
# from torch_lr_finder import LRFinder
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model_l.parameters(), lr=0.00001)
# lr_finder = LRFinder(model_l, optimizer, criterion, device="cuda")
# lr_finder.range_test(train_loader, end_lr=0.001, num_iter=25)
# lr_finder.plot()
# lr_finder.reset()

In [None]:
# %%time

# cuda = True
# epochs = 10
# model_name = '/kaggle/working/densenet161.pt'
# optimizer = torch.optim.Adam(model_l.parameters(),lr=3.83e-4,weight_decay=0.001)
# criterion = torch.nn.CrossEntropyLoss()
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',factor=0.1,patience=1,verbose=True)

# # writer = SummaryWriter() # For Tensorboard
# early_stop_count=0
# ES_patience=5
# best = 0.0
# if cuda:
#     model_l.cuda()

# for epoch in range(epochs):
    
#     # Training
#     model_l.train()
#     correct = 0
#     train_loss = 0.0
#     tbar = tqdm(train_loader, desc = 'Training', position=0, leave=True)
#     for i,(inp,lbl) in enumerate(tbar):
#         optimizer.zero_grad()
#         if cuda:
#             inp,lbl = inp.cuda(),lbl.cuda()
#         out = model_l(inp)
#         loss = criterion(out,lbl)
#         train_loss += loss
#         out = out.argmax(dim=1)
#         correct += (out == lbl).sum().item()
#         loss.backward()
#         optimizer.step()
#         tbar.set_description(f"Epoch: {epoch+1}, loss: {loss.item():.5f}, acc: {100.0*correct/((i+1)*train_loader.batch_size):.4f}%")
#     train_acc = 100.0*correct/len(train_loader.dataset)
#     train_loss /= (len(train_loader.dataset)/batch_size)

#     # Validation
#     model_l.eval()
#     with torch.no_grad():
#         correct = 0
#         val_loss = 0.0
#         vbar = tqdm(valid_loader, desc = 'Validation', position=0, leave=True)
#         for i,(inp,lbl) in enumerate(vbar):
#             if cuda:
#                 inp,lbl = inp.cuda(),lbl.cuda()
#             out = model_l(inp)
#             val_loss += criterion(out,lbl)
#             out = out.argmax(dim=1)
#             correct += (out == lbl).sum().item()
#         val_acc = 100.0*correct/len(valid_loader.dataset)
#         val_loss /= (len(valid_loader.dataset)/batch_size)
#     print(f'\nEpoch: {epoch+1}/{epochs}')
#     print(f'Train loss: {train_loss}, Train Accuracy: {train_acc}')
#     print(f'Validation loss: {val_loss}, Validation Accuracy: {val_acc}\n')

#     scheduler.step(val_loss)

#     # write to tensorboard
#     #writer.add_scalar("Loss/train", train_loss, epoch)
#     #writer.add_scalar("Loss/val", val_loss, epoch)
#     #writer.add_scalar("Accuracy/train", train_acc, epoch)
#     #writer.add_scalar("Accuracy/val", val_acc, epoch)

#     if val_acc>best:
#         best=val_acc
#         torch.save( model_l,model_name)
#         early_stop_count=0
#         print('Accuracy Improved, model saved.\n')
#     else:
#         early_stop_count+=1

#     if early_stop_count==ES_patience:
#         print('Early Stopping Initiated...')
#         print(f'Best Accuracy achieved: {best:.2f}% at epoch:{epoch-ES_patience}')
#         print(f'Model saved as {model_name}')
#         break
#     #writer.flush()
# # writer.close()

# PyTorch Medium/Heavy Model

In [None]:
# model_t = resnet50(pretrained=True)

In [None]:
# model_t

In [None]:
# for i,param in enumerate(model_t.parameters()):
#     if i<169:
#         param.requires_grad=False

In [None]:
# model_t.fc = torch.nn.Sequential(
#     torch.nn.Dropout(0.5),
#     torch.nn.Linear(2048,101)
# )

In [None]:
# model_t = to_device(model_t, device)

In [None]:
# summary(model_t, (3, 224, 224))

#### From the below summary, we can see that there are a total of 175 layers including the ones that we added, hence, we freeze the weights of the first 173 layers as we are only want the final layers' weights to be updated.

In [None]:
# torch.cuda.is_available()

In [None]:
# from torch_lr_finder import LRFinder
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model_t.parameters(), lr=0.00001)
# lr_finder = LRFinder(model_t, optimizer, criterion, device="cuda")
# lr_finder.range_test(train_loader, end_lr=0.001, num_iter=25)
# lr_finder.plot()
# lr_finder.reset()

In [None]:
# model_t.parameters()

In [None]:
# %%time

# cuda = True
# epochs = 10
# model_name = '/kaggle/working/resnet50.pt'
# optimizer = torch.optim.Adam(model_t.parameters(),lr=3.83e-4,weight_decay=0.001)
# criterion = torch.nn.CrossEntropyLoss()
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',factor=0.1,patience=1,verbose=True)

# # writer = SummaryWriter() # For Tensorboard
# early_stop_count=0
# ES_patience=5
# best = 0.0
# if cuda:
#     model_t.cuda()

# for epoch in range(epochs):
    
#     # Training
#     model_t.train()
#     correct = 0
#     train_loss = 0.0
#     tbar = tqdm(train_loader, desc = 'Training', position=0, leave=True)
#     for i,(inp,lbl) in enumerate(tbar):
#         optimizer.zero_grad()
#         if cuda:
#             inp,lbl = inp.cuda(),lbl.cuda()
#         out = model_t(inp)
#         loss = criterion(out,lbl)
#         train_loss += loss
#         out = out.argmax(dim=1)
#         correct += (out == lbl).sum().item()
#         loss.backward()
#         optimizer.step()
#         tbar.set_description(f"Epoch: {epoch+1}, loss: {loss.item():.5f}, acc: {100.0*correct/((i+1)*train_loader.batch_size):.4f}%")
#     train_acc = 100.0*correct/len(train_loader.dataset)
#     train_loss /= (len(train_loader.dataset)/batch_size)

#     # Validation
#     model_t.eval()
#     with torch.no_grad():
#         correct = 0
#         val_loss = 0.0
#         vbar = tqdm(valid_loader, desc = 'Validation', position=0, leave=True)
#         for i,(inp,lbl) in enumerate(vbar):
#             if cuda:
#                 inp,lbl = inp.cuda(),lbl.cuda()
#             out = model_t(inp)
#             val_loss += criterion(out,lbl)
#             out = out.argmax(dim=1)
#             correct += (out == lbl).sum().item()
#         val_acc = 100.0*correct/len(valid_loader.dataset)
#         val_loss /= (len(valid_loader.dataset)/batch_size)
#     print(f'\nEpoch: {epoch+1}/{epochs}')
#     print(f'Train loss: {train_loss}, Train Accuracy: {train_acc}')
#     print(f'Validation loss: {val_loss}, Validation Accuracy: {val_acc}\n')

#     scheduler.step(val_loss)

#     # write to tensorboard
#     #writer.add_scalar("Loss/train", train_loss, epoch)
#     #writer.add_scalar("Loss/val", val_loss, epoch)
#     #writer.add_scalar("Accuracy/train", train_acc, epoch)
#     #writer.add_scalar("Accuracy/val", val_acc, epoch)

#     if val_acc>best:
#         best=val_acc
#         torch.save( model_t,model_name)
#         early_stop_count=0
#         print('Accuracy Improved, model saved.\n')
#     else:
#         early_stop_count+=1

#     if early_stop_count==ES_patience:
#         print('Early Stopping Initiated...')
#         print(f'Best Accuracy achieved: {best:.2f}% at epoch:{epoch-ES_patience}')
#         print(f'Model saved as {model_name}')
#         break
#     #writer.flush()
# # writer.close()

# References
1. https://keras.io/api/applications/
2. https://www.tensorflow.org/model_optimization/guide/quantization/training
3. https://www.kaggle.com/pranshu15/tensorflow-keras-mobilenetv2-77
4. https://pytorch.org/vision/stable/models.html
5. https://pypi.org/project/keras-adabound/