In [None]:
import time
import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy, KLDivergence
from sklearn.metrics import classification_report, accuracy_score

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

teacher_model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])
teacher_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

teacher_model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)

soft_targets = tf.nn.softmax(teacher_model.predict(X_train) / 3.0)
soft_targets = tf.cast(soft_targets, tf.float32)

def distillation_loss(y_true, y_pred):
    alpha = 0.3
    temperature = 3.0

    y_true = tf.cast(y_true, tf.int32)
    y_pred = tf.cast(y_pred, tf.float32)

    batch_size = tf.shape(y_true)[0]

    batch_soft_targets = tf.gather(soft_targets, tf.range(batch_size))

    return alpha * SparseCategoricalCrossentropy()(y_true, y_pred) + \
           (1 - alpha) * KLDivergence()(batch_soft_targets, tf.nn.softmax(y_pred / temperature))

student_model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])
student_model.compile(optimizer=Adam(), loss=distillation_loss, metrics=['accuracy'])

start_time = time.time()
student_model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)
student_training_time = time.time() - start_time

def evaluate_model(model, X_test, y_test, name):
    start_time = time.time()
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    inference_time = time.time() - start_time
    accuracy = accuracy_score(y_test, y_pred)

    print(f"\n{name} Accuracy: {accuracy:.4f}")
    print(f"{name} Inference Time: {inference_time:.2f} s")
    print(f"{name} Classification Report:\n{classification_report(y_test, y_pred)}")
    return inference_time

teacher_inference_time = evaluate_model(teacher_model, X_test, y_test, "Teacher Model")
student_inference_time = evaluate_model(student_model, X_test, y_test, "Student Model")

In [None]:
import time
import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy, KLDivergence
from sklearn.metrics import classification_report, accuracy_score

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

teacher_model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])
teacher_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

teacher_model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)

soft_targets = tf.nn.softmax(teacher_model.predict(X_train) / 3.0)
soft_targets = tf.cast(soft_targets, tf.float32)

def distillation_loss(y_true, y_pred):
    alpha = 0.3
    temperature = 3.0

    y_true = tf.cast(y_true, tf.int32)
    y_pred = tf.cast(y_pred, tf.float32)

    batch_size = tf.shape(y_true)[0]
    batch_soft_targets = tf.gather(soft_targets, tf.range(batch_size))

    return alpha * SparseCategoricalCrossentropy()(y_true, y_pred) + \
           (1 - alpha) * KLDivergence()(batch_soft_targets, tf.nn.softmax(y_pred / temperature))

student_model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])
student_model.compile(optimizer=Adam(), loss=distillation_loss, metrics=['accuracy'])

start_time = time.time()
student_model.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)
student_training_time = time.time() - start_time

print(f"Student Training Time: {student_training_time:.2f} s")

converter = tf.lite.TFLiteConverter.from_keras_model(student_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quantized_model = converter.convert()

with open("student_model_quantized.tflite", "wb") as f:
    f.write(tflite_quantized_model)

print("Quantized Student Model converted to TensorFlow Lite and saved as student_model_quantized.tflite")

def evaluate_model(model, X_test, y_test, name):
    start_time = time.time()
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    inference_time = time.time() - start_time
    accuracy = accuracy_score(y_test, y_pred)

    print(f"\n{name} Accuracy: {accuracy:.4f}")
    print(f"{name} Inference Time: {inference_time:.2f} s")
    print(f"{name} Classification Report:\n{classification_report(y_test, y_pred)}")
    return inference_time

teacher_inference_time = evaluate_model(teacher_model, X_test, y_test, "Teacher Model")
student_inference_time = evaluate_model(student_model, X_test, y_test, "Student Model")