<a href="https://colab.research.google.com/github/dogukartal/IBM_AI_Labs/blob/main/Deep%20Learning%20with%20Keras%20and%20Tensorflow/Advanced%20Keras%20Techniques/Model_Optimization_Techniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Optimization
---

## Weight Initialization

In [None]:
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu', kernel_initializer=HeNormal()), #He initialization
    Dense(10, activation='softmax')
])

## Learning Rate Scheduling


In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import LearningRateScheduler
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

(X_train, y_train), (X_test, y_test) = mnist.load_data()

x_train = X_train.astype('float32') / 255
x_val = X_val.astype('float32') / 255

y_train = x_train.reshape(-1, 28, 28)
x_val = x_val.reshape(-1, 28, 28)

lr_scheduler = LearningRateScheduler(scheduler)

model.compile(optimizer=Adam, loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=20, callbacks=[scheduler])

## Batch Normalization


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization

model = Sequential([
    Flatten(input_shape=(28, 28)),
    BatchNormalization(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

## Mixed Precision Training

In [None]:
from tensorflow.keras import mixed_precision

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

## Model Pruning

In [None]:
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
pruning_params = {
    'pruning_schedule':
      tfmot.sparsity.keras.PolynomialDecay(
          initial_sparsity=0.50,
          final_sparsity=0.80,
          begin_step=2000,
          end_step=4000)}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

## Quantization

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open("quantized_model.tflite", "wb") as f:
    f.write(tflite_model)

## Knowledge Distillation

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import numpy as np

def distillation_loss(teacher_logits, student_logits, temperature):
    teacher_probs = tf.nn.softmax(teacher_logits / temperature)
    student_probs = tf.nn.softmax(student_logits / temperature)
    return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(teacher_probs, student_probs))

def train_student(student_model, teacher_model, x_train, y_train, x_val, y_val, batch_size=32, epochs=10, temperature=3):
    for epoch in range(epochs):
      num_batches = len(x_train) // batch_size
      for batch_idx in range(num_batches):
        x_batch = x_train[batch_idx * batch_size : (batch_idx + 1) * batch_size]
        y_batch = y_train[batch_idx * batch_size : (batch_idx + 1) * batch_size]

        teacher_logits = teacher_model(x_batch)

        with tf.GradientTape() as tape:
          student_logits = student_model(x_batch)
          loss = distillation_loss(teacher_logits, student_logits, temperature)

        gradients = tape.gradient(loss, student_model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, student_model.trainable_variables))

      if batch_idx % 100 == 0:
        print(f"Epoch {epoch + 1}, Batch {batch_idx + 1}/{num_batches}, Loss: {loss.numpy()}")

teacher_model = models.Sequential([
    layers.Input(shape=(28, 28)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

teacher_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
teacher_model.fit(x_train, y_train, epochs=5, validation_data=(x_val, y_val))

# Assume the teacher model is alerady trained

student_model = models.Sequential([
    layers.Input(shape=(28, 28)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

student_model.compile(optimizer=optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])