<h1>Library Imports<h1>


In [1]:
!pip install wandb
# Common
import os
import time

# Google
from google.cloud import storage

# Model
import tensorflow as tf
from keras import Sequential, Model
from keras.layers import Dense, GlobalAvgPool2D, Conv2D, MaxPooling2D, Flatten, BatchNormalization
import keras

# Callbacks
from keras.callbacks import ModelCheckpoint, EarlyStopping

# Transfer Learning Models
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.convnext import ConvNeXtBase, ConvNeXtSmall, ConvNeXtLarge
from tensorflow.keras.applications.densenet import DenseNet201, DenseNet121, DenseNet169
from tensorflow.python.keras import backend as K

# Weights and Biases
import wandb
from wandb.keras import WandbCallback

NUM_CHANNELS = 3
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
BATCH_SIZE = 4
TRAIN_TEST_SPLIT = 0.8
EPOCHS = 30
NUM_CLASSES = 120
input_shape = (IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS)
train_data = []
validation_data = []
n_steps_training = 0
n_steps_validation = 0



In [None]:
TENSORIZED_DATA_BUCKET_NAME="team-engai-dogs-tensorized"
client = storage.Client.from_service_account_json('secrets/data-service-account.json')
blobs = client.list_blobs(TENSORIZED_DATA_BUCKET_NAME, prefix='dog_breed_dataset/images/Images')
breed_directory_name = 'breed-data'

<h1>Data Pipeline Function Definitions<h1>


In [2]:
def parse_tfrecord_example(example_proto):
  parsed_example = tf.io.parse_single_example(example_proto, feature_description)

  # Image
  #image = tf.image.decode_jpeg(parsed_example['image'])
  image = tf.io.decode_raw(parsed_example['image'], tf.uint8)
  image.set_shape([NUM_CHANNELS * IMAGE_HEIGHT * IMAGE_WIDTH])
  image = tf.reshape(image, [IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])

  # Label
  label = tf.cast(parsed_example['label'], tf.int64)
  label = tf.one_hot(label, num_classes)

  return image, label

# Normalize pixels
def normalize(image, label):
  image = image/255
  return image, label

def download_data(directory_name, download=True):

  if not os.path.exists(directory_name):
    os.mkdir(directory_name)

  class_to_image_files = {}

  for blob in blobs:
    image_file_name = blob.name.split('/')[-1]
    label = blob.name.split('/')[-2]
    if label not in class_to_image_files:
      class_to_image_files[label] = []
    class_to_image_files[label].append(image_file_name)
    if download:
      blob.download_to_filename(f'{directory_name}/{image_file_name}')

  feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'height':tf.io.FixedLenFeature([], tf.int64),
    'width':tf.io.FixedLenFeature([], tf.int64),
    'channel':tf.io.FixedLenFeature([], tf.int64),
    'label': tf.io.FixedLenFeature([], tf.int64)
    }

  return feature_description, class_to_image_files

def build_data_pipeline(directory, starting_filename="tensorized", train_dataset_size=TRAIN_TEST_SPLIT):

  file_path = os.path.join(directory, ''.join([starting_filename, '*']))
  dataset_tfrecord = tf.data.Dataset.list_files('breed-data/tensorized*')
  n_files = dataset_tfrecord.cardinality().numpy()

  print(f'Loaded {n_files} files')

  train_dataset_size = int(TRAIN_TEST_SPLIT * n_files)
  val_dataset_size = n_files - train_dataset_size
  dataset_tfrecord = dataset_tfrecord.shuffle(n_files)
  train_data = dataset_tfrecord.take(train_dataset_size)
  validation_data = dataset_tfrecord.skip(train_dataset_size)


  # Read the tfrecord files
  train_data = train_data.flat_map(tf.data.TFRecordDataset)
  train_data = train_data.map(parse_tfrecord_example, num_parallel_calls=tf.data.AUTOTUNE)
  train_data = train_data.map(normalize, num_parallel_calls=tf.data.AUTOTUNE)
  train_data = train_data.batch(BATCH_SIZE)
  train_data = train_data.prefetch(buffer_size=tf.data.AUTOTUNE)
  train_data = train_data.repeat()

  # Read the tfrecord files
  validation_data = validation_data.flat_map(tf.data.TFRecordDataset)
  validation_data = validation_data.map(parse_tfrecord_example, num_parallel_calls=tf.data.AUTOTUNE)
  validation_data = validation_data.map(normalize, num_parallel_calls=tf.data.AUTOTUNE)
  validation_data = validation_data.batch(BATCH_SIZE)
  validation_data = validation_data.prefetch(buffer_size=tf.data.AUTOTUNE)
  validation_data = validation_data.repeat()

  n_steps_training = train_dataset_size // BATCH_SIZE
  n_steps_validation = val_dataset_size // BATCH_SIZE

  print(f"Number of training steps: {n_steps_training}")
  print(f"Number of validation steps: {n_steps_validation}")

  return train_data, validation_data, n_steps_training, n_steps_validation





In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds

def tf_dataset_pipeline(train_test_split=TRAIN_TEST_SPLIT):

  n_images = 20580
  n_train_images = int(n_images * train_test_split)
  n_val_images = n_images - n_train_images

  ds = tfds.load('stanford_dogs', split='all', shuffle_files=True)

  ds = ds.shuffle(n_images)
  train_data = ds.take(n_train_images)
  validation_data = ds.skip(n_train_images)


  # Read the tfrecord files
  train_data = train_data.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
  train_data = train_data.batch(BATCH_SIZE)
  train_data = train_data.prefetch(buffer_size=tf.data.AUTOTUNE)
  train_data = train_data.cache()
  train_data = train_data.repeat()

  # Read the tfrecord files
  validation_data = validation_data.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
  validation_data = validation_data.batch(BATCH_SIZE)
  validation_data = validation_data.prefetch(buffer_size=tf.data.AUTOTUNE)
  validation_data = validation_data.cache()
  validation_data = validation_data.repeat()

  n_steps_training = n_train_images // BATCH_SIZE
  n_steps_validation = n_val_images // BATCH_SIZE

  print(f"Number of training steps: {n_steps_training}")
  print(f"Number of validation steps: {n_steps_validation}")

  return train_data, validation_data, n_steps_training, n_steps_validation

def preprocess(data):
  image, label = data['image'], data['label']
  image, label = resize_image(image, label)
  image, label = normalize(image, label)
  image, label = label_to_onehot(image, label)
  return image, label


def resize_image(image, label):
  image = tf.image.resize(image, [IMAGE_HEIGHT, IMAGE_WIDTH])
  return image, label

def normalize(image, label):
  image = tf.cast(image, tf.float32) / 255.
  return image, label

def label_to_onehot(image, label):
  depth = 120
  label = tf.one_hot(label, depth)
  return image, label






<h1>Model Building and Training Function Definitions<h1>



In [4]:
def build_teacher_model(num_classes, input_shape, base_architecture, name="DogNet-breed"):
  if base_architecture=="ResNet152v2":
    base_model = ResNet152V2(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights
  elif base_architecture=="ConNeXtBase":
    base_model = ConvNeXtBase(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights
  elif base_architecture=="ConNeXtLarge":
    base_model = ConvNeXtLarge(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights
  elif base_architecture=="DenseNet201":
    base_model = DenseNet201(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights


  # Model
  DogNet_breed = Sequential([
      base_model,
      GlobalAvgPool2D(),
      Dense(500, activation='relu'),
      Dense(300, activation='relu'),
      Dense(200, activation='relu'),
      Dense(200, activation='relu'),
      Dense(num_classes, activation='softmax')
  ], name=name)

  print(DogNet_breed.summary())

  return DogNet_breed


def train_model(model,
                name,
                architecture,
                train_data=train_data,
                validation_data=validation_data,
                n_steps_training=n_steps_training,
                n_steps_validation=n_steps_validation,
                epochs=EPOCHS,
                batch_size=BATCH_SIZE):

  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  wandb.init(
      project = "DogNet-breed",
      config = {
          #"learning_rate": 0.02,
          "epochs": epochs,
          "architecture": architecture,
          "batch_size": batch_size,
          "model_name": name
      },
      name = name
  )

  # Callbacks
  callbacks = [
      EarlyStopping(monitor='val_accuracy', patience=5, verbose=1, restore_best_weights=True),
      WandbCallback(monitor='val_accuracy')
  ]

  # Train
  start_time = time.time()
  model.fit(
      train_data,
      epochs=EPOCHS,
      validation_data=validation_data,
      callbacks=callbacks,
      verbose=1,
      steps_per_epoch=n_steps_training,
      validation_steps=n_steps_validation
      #shuffle=True
  )
  execution_time = (time.time() - start_time)/60.0
  wandb.config.update({"execution_time": execution_time})
  wandb.run.finish()

  return model


In [5]:
def build_student_model(num_classes, input_shape, base_architecture, name="DogNet-breed-student-NoTeacher"):
  if base_architecture=="ResNet50":
    base_model = ResNet50(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights
  elif base_architecture=="ConNextSmall":
    base_model = ConvNeXtSmall(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights
  elif base_architecture=="DenseNet121":
    base_model = DenseNet121(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights
  elif base_architecture=="DenseNet169":
    base_model = DenseNet169(include_top=False, input_shape=input_shape, weights='imagenet')
    base_model.trainable = False # Freeze the Weights


  # Model
  DogNet_breed = Sequential([
      base_model,
      GlobalAvgPool2D(),
      BatchNormalization(),
      Dense(500, activation='relu'),
      Dense(300, activation='relu'),
      Dense(200, activation='relu'),
      Dense(num_classes, activation='softmax')
  ], name=name)

  print(DogNet_breed.summary())

  return DogNet_breed



<h1>Build and Train Teacher Model<h1>

In [6]:
model_architecture = "ConNeXtLarge"
model_name = ''.join(["DogNet-breed-teacher", "-",model_architecture])
train_data, validation_data, n_steps_training, n_steps_validation = tf_dataset_pipeline(TRAIN_TEST_SPLIT)
DogNet_breed_teacher = build_teacher_model(NUM_CLASSES, input_shape, model_architecture)
K.clear_session()
DogNet_breed_teacher = train_model(
    DogNet_breed_teacher,
    model_name,
    model_architecture,
    train_data=train_data,
    validation_data=validation_data,
    n_steps_training=n_steps_training,
    n_steps_validation=n_steps_validation,
    epochs=EPOCHS
    )

Number of training steps: 4116
Number of validation steps: 1029
Model: "DogNet-breed"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 convnext_large (Functional  (None, 7, 7, 1536)        196230336 
 )                                                               
                                                                 
 global_average_pooling2d (  (None, 1536)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 500)               768500    
                                                                 
 dense_1 (Dense)             (None, 300)               150300    
                                                                 
 dense_2 (Dense)             (None, 200)               60200     
                                                        

[34m[1mwandb[0m: Currently logged in as: [33mjuanp-heusser[0m ([33mengai[0m). Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1/30
 945/4116 [=====>........................] - ETA: 8:08 - loss: 4.7390 - accuracy: 0.0175

KeyboardInterrupt: ignored

<h1>Build and Train Student Model Without Teacher<h1>

In [6]:
model_architecture = "ConNextSmall"
model_name = ''.join(["DogNet-breed-student-alone", "-", model_architecture])
BATCH_SIZE = 32
train_data, validation_data, n_steps_training, n_steps_validation = tf_dataset_pipeline(.8)
K.clear_session()
DogNet_breed_student = build_student_model(NUM_CLASSES, input_shape, model_architecture)
DogNet_breed_student = train_model(
    DogNet_breed_student,
    model_name,
    model_architecture,
    train_data, validation_data,
    n_steps_training, n_steps_validation
    )

Number of training steps: 514
Number of validation steps: 128
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_small_notop.h5
Model: "DogNet-breed-student-NoTeacher"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 convnext_small (Functional  (None, 7, 7, 768)         49454688  
 )                                                               
                                                                 
 global_average_pooling2d (  (None, 768)               0         
 GlobalAveragePooling2D)                                         
                                                                 
 batch_normalization (Batch  (None, 768)               3072      
 Normalization)                                                  
                                                                 
 dense (Dense)               (None, 500)               3

[34m[1mwandb[0m: Currently logged in as: [33mjuanp-heusser[0m ([33mengai[0m). Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.5s


Epoch 2/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.5s


Epoch 3/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.5s


Epoch 4/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.5s


Epoch 5/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 6/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 7/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.5s


Epoch 8/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 9/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 10/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 11/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 12/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 13/30
Epoch 14/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 15/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 16/30
Epoch 17/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 18/30
Epoch 19/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 20/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 21/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 22/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 23/30
Epoch 24/30
Epoch 25/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s


Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

  saving_api.save_model(
[34m[1mwandb[0m: Adding directory to artifact (/content/wandb/run-20231120_154242-lwrq6o7o/files/model-best)... Done. 0.6s




VBox(children=(Label(value='4435.733 MB of 4435.733 MB uploaded (17.424 MB deduped)\r'), FloatProgress(value=1…

0,1
accuracy,▁▂▂▃▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇███████████
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
loss,█▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▃▃▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█▇▇██████
val_loss,█▅▃▂▂▁▁▁▁▂▂▂▃▃▄▄▅▅▅▅▄▅▆▆▆▆▆██▇

0,1
accuracy,0.87719
best_epoch,29.0
best_val_accuracy,0.56323
epoch,29.0
loss,0.43624
val_accuracy,0.56323
val_loss,3.78758


<h1>Model Distillation<h1>

In [None]:
def load_model_from_wandb(artifact_name, project):

    try:
        wandb.login()
        run = wandb.init(project=project, job_type="load_model")
        artifact = run.use_artifact(artifact_name, type='model')
        artifact_dir = artifact.download()
        model = tf.keras.models.load_model(artifact_dir)
        run.finish()

        print("Model loaded successfully")
        return model

    except Exception as e:
        print(f"An error occurred while loading the model: {e}")
        return None


In [None]:
class Distiller(Model):
    def __init__(self, teacher, student):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student

    def compile(self, optimizer, metrics, student_loss_fn, distillation_loss_fn, Lambda = 0.1, temperature=3):
      """
      optimizer: Keras optimizer for the student weights
      metrics: Keras metrics for evaluation
      student_loss_fn: Loss function of difference between student predictions and ground-truth
      distillation_loss_fn: Loss function of difference between soft student predictions and soft teacher predictions
      lambda: weight to student_loss_fn and 1-alpha to distillation_loss_fn
      temperature: Temperature for softening probability distributions. Larger temperature gives softer distributions.
      """
      super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
      self.student_loss_fn = student_loss_fn
      self.distillation_loss_fn = distillation_loss_fn

      #hyper-parameters
      self.Lambda = Lambda
      self.temperature = temperature

    def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher (professor)
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distillation_loss_fn(
                tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                tf.nn.softmax(student_predictions / self.temperature, axis=1),
            )
            loss = self.Lambda * student_loss + (1 - self.Lambda) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )
        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results

In [None]:
def train_model_distillation(teacher_model, student_model_name, student_model_architecture, train_data, validation_data, n_steps_training, n_steps_validation, training_params):
  ############################
  # Training Params
  ############################
  learning_rate = training_params["learning_rate"]
  Lambda = training_params["Lambda"]
  temperature = training_params["temperature"]
  epochs = training_params["epochs"]
  batch_size = training_params["batch_size"]


  # Free up memory
  K.clear_session()

  # Build Student model
  DogNet_breed_student_distillation = build_student_model(NUM_CLASSES, input_shape, model_architecture, student_model_name)

  # Build the distiller model
  distiller_model = Distiller(teacher=DogNet_breed_teacher, student=DogNet_breed_student_distillation)

  # Optimizer
  optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
  # Loss
  student_loss = keras.losses.CategoricalCrossentropy(from_logits=True)
  #distillation_loss = keras.losses.KLDivergence()
  distillation_loss = keras.losses.CategoricalCrossentropy(from_logits=False)

  # Compile
  distiller_model.compile(
      optimizer=optimizer,
      student_loss_fn=student_loss,
      distillation_loss_fn=distillation_loss,
      metrics=[keras.metrics.CategoricalAccuracy()],
      Lambda=Lambda,
      temperature=temperature
  )

  wandb.init(
      project = "DogNet-breed",
      config = {
          #"learning_rate": 0.02,
          "epochs": epochs,
          "architecture": model_architecture,
          "batch_size": batch_size,
          "model_name": student_model_name
      },
      name = student_model_name
  )

  callbacks = [
      EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True),
      #ModelCheckpoint(filepath=student_model_name+".keras", monitor='val_categorical_accuracy', verbose=1, save_best_only=True),
      WandbCallback(monitor='val_loss')
  ]


  # Distill teacher to student
  start_time = time.time()
  training_results = distiller_model.fit(
          train_data,
          validation_data=validation_data,
          epochs=EPOCHS,
          verbose=1,
          callbacks=callbacks,
          steps_per_epoch=n_steps_training,
          validation_steps=n_steps_validation)
  execution_time = (time.time() - start_time)/60.0
  print("Training execution time (mins)",execution_time)

  wandb.config.update({"execution_time": execution_time})
  wandb.run.finish()


 # # Get model training history
 # training_history = training_results.history

 # # Evaluate model
 # evaluation_results = evaluate_model(distiller_model,validation_data,
 #               training_history,execution_time,learning_rate, batch_size, epochs, optimizer,
 #               save=False,
 #               loss_metrics=["student_loss","distillation_loss","val_student_loss"],
 #               acc_metrics=["sparse_categorical_accuracy","val_sparse_categorical_accuracy"])


  return distiller_model


In [None]:
learning_rate = 0.001
Lambda = 0.75
temperature= 12
BATCH_SIZE = 32


model_architecture = "DenseNet121"
model_name = ''.join(["DogNet-breed-student-distilled", "-", model_architecture])

training_params = {
    "learning_rate": learning_rate,
    "Lambda": Lambda,
    "temperature": temperature,
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE
}


K.clear_session()
DogNet_breed_teacher = load_model_from_wandb(
    "model-DogNet-breed-teacher-ResNet152v2:v13",
    "DogNet-breed"
)
train_data, validation_data, n_steps_training, n_steps_validation = tf_dataset_pipeline(.8)
DogNet_breed_student_distilled = train_model_distillation(
    DogNet_breed_teacher,
    model_name,
    model_architecture,
    train_data, validation_data,
    n_steps_training,
    n_steps_validation,
    training_params
)

[34m[1mwandb[0m: Currently logged in as: [33mjuanp-heusser[0m ([33mengai[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact model-DogNet-breed-teacher-ResNet152v2:v13, 247.76MB. 5 files... 
[34m[1mwandb[0m:   5 of 5 files downloaded.  
Done. 0:0:0.8


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Model loaded successfully
Number of training steps: 514
Number of validation steps: 128
Model: "DogNet-breed-student-distilled-DenseNet121"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 densenet121 (Functional)    (None, 7, 7, 1024)        7037504   
                                                                 
 global_average_pooling2d (  (None, 1024)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 batch_normalization (Batch  (None, 1024)              4096      
 Normalization)                                                  
                                                                 
 dense (Dense)               (None, 500)               512500    
                                                                 
 dense_1 (Dense)             (None, 300)               150300    
  



Epoch 1/30


  output, from_logits = _get_logits(






Epoch 2/30



Epoch 3/30



Epoch 4/30



Epoch 5/30



Epoch 6/30



Epoch 7/30



Epoch 8/30



Epoch 9/30



Epoch 10/30



Epoch 11/30



Epoch 12/30



Epoch 13/30



Epoch 14/30



Epoch 15/30



Epoch 16/30



Epoch 17/30



Epoch 18/30



Epoch 19/30



Epoch 20/30



Epoch 21/30



Epoch 22/30



Epoch 23/30



Epoch 24/30



Epoch 25/30



Epoch 26/30



Epoch 27/30



Epoch 28/30



Epoch 29/30



Epoch 30/30



Training execution time (mins) 94.12780819336574


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
categorical_accuracy,▁▄▆▇▇▇▇▇▇█████████████████████
distillation_loss,█▆▆▆▆▃▄▆▇▄▆▆▇▇▂▃▆▇▅▃▅▇██▅▅▁▅▅▅
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
student_loss,█▅▅▄▂▂▃▂▄▁▂▁▅▁▁▃▂▇▂▁▁▁▂▁▂▁▁▁▂▁
val_categorical_accuracy,▁▄▅▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇██▇█▇█▇▇████
val_student_loss,▄▃▂▂▃▂▂▁▂▂▂▁▃▃▃▄▄▃▅▆▇▄▄▇▅▇▆▄▅█

0,1
categorical_accuracy,0.98564
distillation_loss,4.78747
epoch,29.0
student_loss,0.00153
val_categorical_accuracy,0.92017
val_student_loss,1.85343


In [None]:
tf.keras.saving.save_model(DogNet_breed_student_distilled.student, model_name+'.hdf5')

  tf.keras.saving.save_model(DogNet_breed_student_distilled.student, model_name+'.hdf5')


In [None]:
train_data, validation_data, n_steps_training, n_steps_validation = tf_dataset_pipeline(TRAIN_TEST_SPLIT)


Number of training steps: 4116
Number of validation steps: 1029


In [None]:
model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

loss, accuracy = model.evaluate(validation_data, steps=n_steps_validation)
print("Loss: ", loss)
print("Accuracy: ", accuracy)

Loss:  0.7964034080505371
Accuracy:  0.9122934937477112
