# Data loading

In [1]:
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from typing import Union

tf.enable_v2_behavior()

import numpy as np
from PIL import Image
import csv
import os
path = os.path.join(os.path.abspath(''), "mhist_dataset", "annotations.csv")

raw_data = []
with open(path, 'r') as file:
    csv_reader = csv.reader(file)
    next(csv_reader) # skip header row
    for row in csv_reader:
        raw_data.append(row)

# file name (MHIST_zzz.png),HP/SSA,Number of Annotations who selecteed SSA out of 7,train/test

raw_train = []
raw_test = []

# split raw into train and test
# also convert the columns to indicate the number of annotators for HP vs SSA

for elem in raw_data:
    to_add = raw_train if elem[-1] == "train" else raw_test
    num_ssa = int(elem[2])
    num_hp = 7 - num_ssa
    num_ssa /= 7 # normalize value
    num_hp /= 7
    to_add.append([elem[0], num_ssa, num_hp])

del raw_data

def create_train_test(raw):
    x = np.zeros([len(raw),224,224,3], dtype=np.float32)
    y = np.zeros([len(raw),2], dtype=np.float32)

    for i, elem in enumerate(raw):
        file_name = elem[0]
        file_path = os.path.join(os.path.abspath(''), "mhist_dataset", "images", file_name)
        image = Image.open(file_path)
        image_array = np.array(image, dtype=np.float32) / 255.0
        x[i] = image_array

        y[i][0] = elem[1]
        y[i][1] = elem[2]
    
    return x,y

train_x, train_y = create_train_test(raw_train)
test_x, test_y = create_train_test(raw_test)

TOTAL_TESTS = test_x.shape[0]

BATCH_SIZE = 32
INITIAL_EPOCHS = 10
FINE_TUNING_EPOCHS = 12
TEACHER_LEARNING_RATE=1e-4
STUDENT_LEARNING_RATE=1e-3

mhist_train = tf.data.Dataset.from_tensor_slices((train_x, train_y)).cache()
mhist_train = mhist_train.shuffle(buffer_size=mhist_train.cardinality())
mhist_train = mhist_train.batch(BATCH_SIZE, drop_remainder=True)

mhist_test = tf.data.Dataset.from_tensor_slices((test_x, test_y)).cache()
mhist_test = mhist_test.batch(BATCH_SIZE)
del train_x, train_y, test_x, test_y


2023-11-01 11:25:47.611943: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-01 11:25:47.657160: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-01 11:25:47.657839: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm
2023-11-01 11:25:54.046462: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-01 11:25:54.046768: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please ma

# Model creation

In [2]:
# here is the teacher
from tensorflow.keras.applications import ResNet50V2
# use pretrained weights
resnet50v2 = ResNet50V2(include_top=False, weights='imagenet', input_tensor=tf.keras.layers.Input(shape=(224,224,3)))
cnn_model = tf.keras.Sequential()
cnn_model.add(resnet50v2)
cnn_model.add(tf.keras.layers.Flatten())
cnn_model.add(tf.keras.layers.Activation('relu'))
cnn_model.add(tf.keras.layers.Dense(2)) # no activation (logit output)

# here is the student
from tensorflow.keras.applications import MobileNetV2
# weights=None means random, don't pre fetch from anywhere
mobnetv2 = MobileNetV2(include_top=False, weights=None, input_tensor=tf.keras.layers.Input(shape=(224,224,3)))
fc_model = tf.keras.Sequential()
fc_model.add(mobnetv2)
fc_model.add(tf.keras.layers.Flatten())
fc_model.add(tf.keras.layers.Activation('relu'))
fc_model.add(tf.keras.layers.Dense(2)) # no activation (logit output)


# Teacher loss function

In [3]:
@tf.function
def compute_teacher_loss(images, labels):
  subclass_logits = cnn_model(images, training=True)
  cross_entropy_loss_value = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels, subclass_logits))
  return cross_entropy_loss_value

# Student Loss Function

In [4]:
#@test {"output": "ignore"}

# Hyperparameters for distillation (need to be tuned).
ALPHA = 0.5 # task balance between cross-entropy and distillation loss
DISTILLATION_TEMPERATURE_ORIGINAL = 4
DISTILLATION_TEMPERATURE = DISTILLATION_TEMPERATURE_ORIGINAL #temperature hyperparameter

@tf.function
def distillation_loss(teacher_logits: tf.Tensor, student_logits: tf.Tensor,
                      temperature: Union[float, tf.Tensor]):
  soft_targets = tf.nn.softmax(teacher_logits / temperature)

  return tf.reduce_mean(
      tf.nn.softmax_cross_entropy_with_logits(
          soft_targets, student_logits / temperature)) * temperature ** 2

@tf.function
def compute_student_loss(images, labels):
  student_subclass_logits = fc_model(images, training=True)

  # Compute subclass distillation loss between student subclass logits and
  # softened teacher subclass targets probabilities.

  # your code start from here for step 3

  teacher_subclass_logits = cnn_model(images, training=False)
  distillation_loss_value = distillation_loss(teacher_subclass_logits, student_subclass_logits, DISTILLATION_TEMPERATURE)

  # Compute cross-entropy loss with hard targets.

  # your code start from here for step 3
  cross_entropy_loss_value = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels, student_subclass_logits))

  return distillation_loss_value * ALPHA + cross_entropy_loss_value * (1 - ALPHA)

# Train and Evaluation

In [5]:
from sklearn.metrics import f1_score

# accumulate over batches for f1 score
predicted_classes = [] # list of numpy array for each batch. concatenated before use
true_classes = []

def compute_num_correct(model, images, labels):
  """Compute number of correctly classified images in a batch.

  Args:
    model: Instance of tf.keras.Model.
    images: Tensor representing a batch of images.
    labels: Tensor representing a batch of labels.

  Returns:
    Number of correctly classified images.
  """
  class_logits = model(images, training=False)

  global predicted_classes, true_classes

  predicted_classes_batch = tf.math.argmax(class_logits, axis=1)
  true_classes_batch = tf.math.argmax(labels, axis=1)

  predicted_classes.append(predicted_classes_batch.numpy())
  true_classes.append(true_classes_batch.numpy())

  return tf.reduce_sum(tf.cast(tf.equal(predicted_classes_batch, true_classes_batch), tf.int32))

def train_and_evaluate(model, compute_loss_fn, num_epochs, learning_rate):
  """Perform training and evaluation for a given model.

  Args:
    model: Instance of tf.keras.Model.
    compute_loss_fn: A function that computes the training loss given the
      images, and labels.
  """

  # your code start from here for step 4
  optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

  for epoch in range(1, num_epochs + 1):
    # Run training.
    print('Epoch {}: '.format(epoch), end='')
    for images, labels in mhist_train:
      with tf.GradientTape() as tape:
         # your code start from here for step 4
        loss_value = compute_loss_fn(images, labels)
      grads = tape.gradient(loss_value, model.trainable_variables)
      optimizer.apply_gradients(zip(grads, model.trainable_variables))

    # Run evaluation.
    num_correct = tf.constant(0, dtype=tf.int32)
    num_total = TOTAL_TESTS
    for images, labels in mhist_test:
      # your code start from here for step 4
      num = compute_num_correct(model, images, labels)
      num_correct += num
    
    last_accuracy = num_correct / num_total * 100
    print("Class_accuracy: " + '{:.2f}%'.format(last_accuracy), end='')

    # f1 print out
    print("f1: ", f1_score(np.concatenate(true_classes), np.concatenate(predicted_classes)))

    predicted_classes.clear() # reset for next epoch or end
    true_classes.clear()
  return last_accuracy


# Training Models

In [6]:
drop = train_and_evaluate(cnn_model, compute_teacher_loss, INITIAL_EPOCHS, TEACHER_LEARNING_RATE)

Epoch 1: 

2023-11-01 11:25:57.414918: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1309593600 exceeds 10% of free system memory.


Class_accuracy: 68.47%f1:  0.7946666666666666
Epoch 2: Class_accuracy: 74.92%f1:  0.8283111422564821
Epoch 3: Class_accuracy: 79.53%f1:  0.8514115898959881
Epoch 4: Class_accuracy: 79.63%f1:  0.8539985326485693
Epoch 5: Class_accuracy: 81.17%f1:  0.8566978193146417
Epoch 6: Class_accuracy: 79.63%f1:  0.8531365313653136
Epoch 7: Class_accuracy: 81.17%f1:  0.8546603475513428
Epoch 8: Class_accuracy: 83.32%f1:  0.8743253662297609
Epoch 9: Class_accuracy: 82.50%f1:  0.8671328671328672
Epoch 10: Class_accuracy: 81.37%f1:  0.8639760837070253


In [7]:
drop = train_and_evaluate(fc_model, compute_student_loss, INITIAL_EPOCHS, STUDENT_LEARNING_RATE)

Epoch 1: Class_accuracy: 63.15%f1:  0.7741530740276036
Epoch 2: Class_accuracy: 36.85%f1:  0.0
Epoch 3: Class_accuracy: 63.15%f1:  0.7741530740276036
Epoch 4: 

# Train student from scratch

In [None]:
# Build fully connected student.
fc_model_no_distillation = tf.keras.models.clone_model(fc_model) # checked online. this does not copy the weights. fresh start

# your code start from here for step 7

#@test {"output": "ignore"}
def compute_plain_cross_entropy_loss(images, labels):
  """Compute plain loss for given images and labels.

  For fair comparison and convenience, this function also performs a
  LogSumExp over subclasses, but does not perform subclass distillation.

  Args:
    images: Tensor representing a batch of images.
    labels: Tensor representing a batch of labels.

  Returns:
    Scalar loss Tensor.
  """
  # your code start from here for step 7

  student_subclass_logits = fc_model_no_distillation(images, training=True)
  cross_entropy_loss_value = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels, student_subclass_logits))
  return cross_entropy_loss_value

drop = train_and_evaluate(fc_model_no_distillation, compute_plain_cross_entropy_loss, INITIAL_EPOCHS, STUDENT_LEARNING_RATE)

TypeError: train_and_evaluate() missing 2 required positional arguments: 'num_epochs' and 'learning_rate'