# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

from shutil import copy

from tensorflow.keras.utils import get_file
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.callbacks import EarlyStopping , ModelCheckpoint

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential , load_model
from tensorflow.keras.layers import Flatten , Dense , Conv2D , MaxPooling2D , Dropout , BatchNormalization , GlobalAveragePooling2D , DepthwiseConv2D

# Downloading the Dataset

In [2]:
def get_data_extract():
  if "food-101" in os.listdir():
    print("Dataset already exists")
  else:
    tf.keras.utils.get_file(
    'food-101.tar.gz',
    'http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz',
    cache_subdir='/content',
    extract=True,
    archive_format='tar',
    cache_dir=None
    )
    print("Dataset downloaded and extracted!")

In [None]:
# Download data and extract it to folder
get_data_extract()

Downloading data from http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
 114630656/4996278331 [..............................] - ETA: 6:55

In [None]:
os.listdir("food-101")

In [None]:
os.listdir("food-101/meta")

In [None]:
print(f"The No of Classes in Dataset are : {len(os.listdir('/content/food-101/images'))}")

In [None]:
count = 0
for folder in os.listdir("food-101/images"):
  folder_path = os.path.join("food-101/images" , folder)
  print(f"The No of Images of Class : {folder} in Dataset are : {len(os.listdir(folder_path))}")
  count = count + 1000

#####Dataset Information

This Dataset has 101 Classes with 1000 images for each class



# Splitting the Data into Training and Testing Data

There are two files in meta folder train.txt and test.txt in which list of images is given which needed to be in training set and testing set

* Training Set includes 750 images for each class
* Testing Set incldes 250 images for each class

In [None]:
def prepare_data(filepath , src_file  , dest_file):
  classes_images = dict()
  for folder_name in os.listdir(src_file):
    classes_images[folder_name] = []
    

  with open(filepath , 'r') as txt:
    #Getting the paths for images
    paths = [read.strip() for read in txt.readlines()]

  for p in paths:
    food = p.split("/")
    classes_images[food[0]].append(food[1] + '.jpg')


  os.makedirs(dest_file , exist_ok = True)
  
  for food in classes_images.keys():
    print("\nCopying Images into " , food)

    if not os.path.exists(os.path.join("food" , food)):
      os.makedirs(os.path.join(dest_file , food))

    for i in classes_images[food]:
  
      copy(os.path.join(src_file , food , i) , os.path.join( dest_file, food , i))
      
    print(f"Copied {len(classes_images[food])} images into {food}")

In [None]:
# Testing Data
prepare_data("/content/food-101/meta/test.txt" ,"/content/food-101/images" , "food/test")

In [None]:
# Training Data
prepare_data("/content/food-101/meta/train.txt" , "/content/food-101/images" , "food/train")

# Model Building

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
teacher_base_model = EfficientNetB4(weights = "imagenet", include_top  = False ,  classes = 101 , input_shape = (380 , 380 , 3))
x = GlobalAveragePooling2D()(teacher_base_model.output)
x = tf.keras.layers.Dropout(0.2)(x)
teacher_outputs = Dense(101 , activation = "softmax")(x)
teacher_model = tf.keras.models.Model(teacher_base_model.input , teacher_outputs)

In [None]:
teacher_model.load_weights("/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models (1)/EfficientNetTLmodel_v2.14.h5")
teacher_model.trainable = False

In [None]:
Teacher_clf_layer = teacher_model.layers[-1]
Teacher_clf_layer

In [None]:
Teacher_clf_layer

In [None]:
teacher_model = tf.keras.Model(teacher_model.inputs, teacher_model.layers[-2].output, name='TeacherTransfer')
# teacher_model = tf.keras.Model(teacher_model.inputs, teacher_model.layers[-3].output, name='TeacherScratch')

In [None]:
teacher_model.summary()

In [None]:
base_model = tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape=(160, 160, 3), include_top=False, weights='imagenet')
x = tf.keras.layers.Reshape(target_shape=(16, 20, 100))(base_model.output)
x = tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = tf.keras.layers.Conv2D(28, (1, 3), padding='valid', use_bias=False)(x)
features  = tf.keras.layers.Flatten()(x)
# Removing Dropout as you shouldnt have a dropout active on an output layer 
# x = tf.keras.layers.Dropout(0.1)(features)
outputs = Dense(101 , activation = "softmax")(features)

student_model = tf.keras.models.Model(base_model.input , outputs = [features , outputs])
student_model.trainable = True

# weights_path = "/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models (1)/Student_mobileNetV2_model/MobileNetScratch.05.h5"
# student_model.load_weights(weights_path)

In [None]:
student_model.summary()

In [None]:
student_model.layers[-1].set_weights(Teacher_clf_layer.weights)

In [None]:
student_model.layers[-1].weights[0] == Teacher_clf_layer.weights[0]

In [None]:
class Distill_model(tf.keras.Model):

    def __init__(self, student, teacher, name=None):
        super().__init__(name=name)

        self.student = student
        self.teacher = teacher
        self.teacher.trainable = False

        self.total_loss_tracker = tf.keras.metrics.Mean(name='loss')
        self.logits_loss_tracker = tf.keras.metrics.Mean(name='logits_loss')
        self.feature_loss_tracker = tf.keras.metrics.Mean(name='feature_loss')
        self.acc_tracker = tf.keras.metrics.Mean(name='accuracy')

        self.resize_input_for_student = tf.keras.layers.Resizing(160, 160)
        self.loss_weight = 0.5
        self.temperature = 3

    def set_loss_weight(self, w = 0.5):
        self.loss_weight = w

    def set_temp(self, t = 3):
        self.temperature = t

    @tf.function
    def call(self, inputs, training=False):
        student_features, logits = self.student(inputs, training=training)        
        return student_features, logits

    def compute_loss(self, logits, student_features, labels, teacher_features, training = False):
        logits_loss = tf.keras.losses.CategoricalCrossentropy()(labels, logits)
        
        # feature_loss = tf.keras.losses.KLDivergence()(
        #         (teacher_features / self.temperature),
        #         (student_features / self.temperature))


        # feature_loss = tf.keras.losses.CategoricalCrossentropy()(teacher_features, student_features)

        feature_loss = tf.keras.losses.MeanSquaredError()(
            (teacher_features / self.temperature), (student_features / self.temperature))

        loss = self.loss_weight * logits_loss + (1 - self.loss_weight) * feature_loss

        self.logits_loss_tracker.update_state(logits_loss)
        self.feature_loss_tracker.update_state(feature_loss)
        self.total_loss_tracker.update_state(loss)
        self.acc_tracker.update_state(labels, logits)
        
        return loss

    @tf.function
    def train_step(self, data):

        images, labels = data

        if images.shape[1:3] != (160, 160):
            student_images = self.resize_input_for_student(images)
        else:
            student_images = images

        student_features, logits = self(student_images)
        teacher_features = self.teacher(images, training=False)
        
        train_variables = self.trainable_variables
        loss = self.compute_loss(logits, student_features, labels, teacher_features, training = True)
        
        grads = tf.gradients(loss, train_variables)

        self.optimizer.apply_gradients(zip(grads, train_variables))

        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def test_step(self, data):
        images, labels = data

        if images.shape[1:3] != (160, 160):
            student_images = self.resize_input_for_student(images)
        else:
            student_images = images

        student_features, logits = self(student_images)
        teacher_features = self.teacher(images, training=False)
        
        loss = self.compute_loss(logits, student_features, labels, teacher_features, training = False)


        return {m.name: m.result() for m in self.metrics}

In [None]:
model = Distill_model(student=student_model, 
                      teacher=teacher_model,
                      name='Distill_MobileNet_with_Teacher_Transfer')

In [None]:
model.build(input_shape=(None, 160, 160, 3))

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer = opt)

In [None]:
model.summary()

In [None]:
ckpt_path = os.path.join("/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models")
# os.makedirs(ckpt_path , exist_ok = True)

my_callbacks = [EarlyStopping(patience = 3) ,
                ModelCheckpoint(os.path.join(ckpt_path, "MobileNet_Student_Distill_TL_{epoch:02d}.h5"), save_weights_only = True, save_best_only=False, verbose=1),
                tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3, verbose=1, min_delta=0.01)
                ]

### Data Augementation

In [None]:
train_datagen = ImageDataGenerator(
                             rotation_range = 180,
                             shear_range = 0.2,
                             zoom_range = 0.2,
                             horizontal_flip = True ,
                             vertical_flip = True,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             validation_split=0.2 ,
                             rescale = 1./255
                             
)
test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(
    "/content/food/train" , 
    target_size = (380 , 380),
    class_mode = "categorical" ,
    batch_size = 64

)
test_generator = test_datagen.flow_from_directory(
    "/content/food/test" , 
    target_size = (380 , 380),
    class_mode = "categorical",
    batch_size = 64,
)

# Distilling with Ts

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(optimizer = opt)
model.set_loss_weight(0.5)
model.set_temp(1)

In [None]:
history = model.fit(train_generator, 
                    epochs = 10,
                    callbacks  = my_callbacks,
                    validation_data = test_generator)

In [None]:
model.load_weights('/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models/MobileNet_Student_Distill_TS_02.h5')

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(optimizer = opt)
model.set_loss_weight(0.8)
model.set_temp(1)

1.3606  0.5254

In [None]:
history = model.fit(train_generator, 
                    epochs = 10,
                    callbacks  = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch=2)

In [None]:
model.load_weights('/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models/MobileNet_Student_Distill_TS_V2_03.h5')

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(optimizer = opt)
model.set_loss_weight(0.8)
model.set_temp(1)


4/1184 - loss: 1.0236 - logits_loss: 1.1696 - feature_loss: 0.4396 - accuracy: 0.5681

In [None]:
history = model.fit(train_generator, 
                    epochs = 10,
                    callbacks  = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch=3)

In [None]:
history = model.fit(train_generator, 
                    epochs = 15,
                    callbacks  = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch=10)

# Distilling with TL

## V1

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer = opt)
model.set_loss_weight(0.5)
model.set_temp(1)

In [None]:
model.summary()

Epoch 1/3
  26/1184 [..............................] - ETA: 43:22 - loss: 4.1153 - logits_loss: 7.9638 - feature_loss: 0.2668 - accuracy: 0.0090

In [None]:
history = model.fit(train_generator, 
                    epochs = 3,
                    callbacks  = my_callbacks,
                    validation_data = test_generator)

 Did not work --> acc = 1.5%, slightly higher than random guessing

## V2

Changing feature loss to MSE

In [None]:
images = next(train_generator)[0]
teacher_features = model.teacher(images)
student_features, logits = model.student(model.resize_input_for_student(images))

tf.keras.losses.MeanSquaredError()(teacher_features, student_features)

In [None]:
ckpt_path = os.path.join("/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models")
# os.makedirs(ckpt_path , exist_ok = True)

my_callbacks = [EarlyStopping(patience = 3) ,
                ModelCheckpoint(os.path.join(ckpt_path, "MobileNet_Student_Distill_TL_v2_{epoch:02d}.h5"), save_weights_only = True, save_best_only=False, verbose=1),
                tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3, verbose=1, min_delta=0.01)
                ]

### .1

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(optimizer = opt)
model.set_loss_weight(0.5)
model.set_temp(1)

Epoch 1/3
   7/1184 [..............................] - ETA: 44:18 - loss: 5.5273 - logits_loss: 5.7333 - feature_loss: 5.3214 - accuracy: 0.0092

In [None]:
history = model.fit(train_generator, 
                    epochs = 3,
                    callbacks  = my_callbacks,
                    validation_data = test_generator)

### .2

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(optimizer = opt)
model.set_loss_weight(0.9)
model.set_temp(1)

In [None]:
# model.load_weights('/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models/MobileNet_Student_Distill_TL_v2_01.h5')

In [None]:
history = model.fit(train_generator, 
                    epochs = 5,
                    callbacks = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch = 1)

### .3

In [None]:
model.load_weights('/content/drive/MyDrive/Computer Vision/Knowledge_distillation/Models (1)/MobileNet_Student_Distill_TL_05.h5')

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)
model.compile(optimizer = opt)
model.set_loss_weight(0.9)
model.set_temp(1)

In [None]:
history = model.fit(train_generator, 
                    epochs = 10,
                    callbacks = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch = 5)

In [None]:
history = model.fit(train_generator, 
                    epochs = 18,
                    callbacks = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch = 10)

In [None]:
history = model.fit(train_generator, 
                    epochs = 22,
                    callbacks = my_callbacks,
                    validation_data = test_generator,
                    initial_epoch = 18)