In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
import os
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Conv2D,  MaxPool2D, Flatten, GlobalAveragePooling2D,  BatchNormalization, Layer, Add
from keras.callbacks import EarlyStopping
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import utils
from keras.models import Model

In [8]:
model = ResNet50(include_top = True, weights = 'imagenet')
model.summary()
model.save('model.h5')

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_2[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                           



# قسمت اول

<div style="direction:rtl">
در این قسمت مدل resnet50 استفاده شده را لود می کنیم و با استفاده از آن، مدل را روی یک دیتاست از نوع cifar10 آموزش می دهیم.

در قسمت زیر لایه ورودی و خروجی تغییر کرده تا متناسب با داده های جدید و نیز لیبل های خروجی 10 تایی دیتاست شود.

همچنین تنها لایه آخر که fully connected است را به صورت trainable قرار داده ایم. بنابراین برای این مدل تنها وزن های لایه آخر آموزش میبیند. این کار باعث کاهش دقت مدل خواهد شد ولی در عین حال سرعت آموزش مدل را هم افزایش خواهد داد.

همانطور که مشخص است دقت مدل به تدریج در حال افزایش است ولی سرعت این افزایش زیاد نیست. 

این مدل در قسمت بعدی به عنوان teacher برای مدل قبلی مورد استفاده قرار می گیرد.

</div>

In [9]:
model_pretrained = tf.keras.models.load_model('model.h5')

model_config = model_pretrained.get_config()
model_config['layers'][0]['config']['batch_input_shape'] = (None, 32, 32, 3)
model_pretrained = model.__class__.from_config(model_config, custom_objects={})

model_pretrained.trainable = False

model_resnet50_teacher = model_pretrained.layers[-2].output
fc_layer = Dense(10, activation = "softmax")(model_resnet50_teacher)
model_resnet50_teacher = tf.keras.models.Model(inputs = model_pretrained.input, outputs = fc_layer)

model_resnet50_teacher.summary()



Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 38, 38, 3)    0           ['input_2[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 16, 16, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 16, 16, 64)   256         ['conv1_conv[0][0]']             
                                                                                            

In [10]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [11]:
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)

In [12]:
model_resnet50_teacher.compile(optimizer='adam',loss='categorical_crossentropy',
                      metrics=['accuracy'])
callback = keras.callbacks.EarlyStopping(monitor='accuracy', patience=8)
model_resnet50_teacher.fit(x_train, y_train, epochs=50, batch_size=2000, validation_data=(x_test, y_test), callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fdd801346a0>

In [13]:
!pip install image-classifiers==0.2.2
!pip install segmentation_models==1.0.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting image-classifiers==0.2.2
  Downloading image_classifiers-0.2.2-py2.py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.0/73.0 KB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: image-classifiers
Successfully installed image-classifiers-0.2.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting segmentation_models==1.0.1
  Downloading segmentation_models-1.0.1-py3-none-any.whl (33 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 KB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting image-classifiers==1.0.0
  Downloading image_classifiers-1.0.0-py3-none-any.whl (19 kB)
Collecting efficientnet==1.0.0
  Downloading e

# قسمت دوم

<div style="direction:rtl">
در اینجا یک مدل resnet18 آماده لود کرده و ورودی و خروجی آن را تغییر میدهیم تا برای استفاده از دیتاس cifar10 آماده شود.

سپس این مدل را با روش knowledge distiller روی داده های جدید آموزش میدهیم.
منبع مورد استفاده برای پیاده سازی این بخش آدرس زیر است:

https://keras.io/examples/vision/knowledge_distillation/

در اینجا مقادیر alpha و temperature به صورت تجربی با آزمون و خطا با مقادیر زیر بدست آمده است:

alpha = 0.5

temperature = 1

با بررسی اعداد مختلف مشاهده شد که دقت چندان بالا نمی رود و مدل به خوبی آموزش نمیبیند.


</div>

In [14]:
from classification_models.keras import Classifiers

ResNet18, preprocess_input = Classifiers.get('resnet18')
model = ResNet18((32, 32, 3), weights='imagenet')
model.save('model2.h5')

Downloading data from https://github.com/qubvel/classification_models/releases/download/0.0.1/resnet18_imagenet_1000.h5




In [15]:
model_pretrained = tf.keras.models.load_model('model2.h5')

model_resnet18_student = model_pretrained.layers[-2].output
fc_layer = Dense(10, activation = "softmax")(model_resnet18_student)
model_resnet18_student = tf.keras.models.Model(inputs = model_pretrained.input, outputs = fc_layer)

model_config = model_resnet18_student.get_config()
model_config['layers'][0]['config']['batch_input_shape'] = (None, 32, 32, 3)
model_resnet18_student = model.__class__.from_config(model_config, custom_objects={})

model_resnet18_student.summary()



Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 data (InputLayer)              [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 bn_data (BatchNormalization)   (None, 32, 32, 3)    9           ['data[0][0]']                   
                                                                                                  
 zero_padding2d (ZeroPadding2D)  (None, 38, 38, 3)   0           ['bn_data[0][0]']                
                                                                                                  
 conv0 (Conv2D)                 (None, 16, 16, 64)   9408        ['zero_padding2d[0][0]']         
                                                                                            

In [16]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        """ Configure the distiller.

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation
            student_loss_fn: Loss function of difference between student
                predictions and ground-truth
            distillation_loss_fn: Loss function of difference between soft
                student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions.
                Larger temperature gives softer distributions.
        """
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)

            # Compute scaled distillation loss from https://arxiv.org/abs/1503.02531
            # The magnitudes of the gradients produced by the soft targets scale
            # as 1/T^2, multiply them by T^2 when using both hard and soft targets.
            distillation_loss = (
                self.distillation_loss_fn(
                    tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                    tf.nn.softmax(student_predictions / self.temperature, axis=1),
                )
                * self.temperature**2
            )

            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )
        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results

In [31]:
# Initialize and compile distiller
distiller = Distiller(student=model_resnet18_student, teacher=model_resnet50_teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.Accuracy()],
    student_loss_fn=keras.losses.CategoricalCrossentropy(),
    distillation_loss_fn=keras.losses.CategoricalCrossentropy(),
    alpha=0.5,
    temperature=1,
)

callback = keras.callbacks.EarlyStopping(monitor='accuracy', patience=8)
distiller.fit(x_train, y_train, epochs=50, batch_size=2000, validation_data=(x_test, y_test), callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50


<keras.callbacks.History at 0x7fdc48d96eb0>

# قسمت سوم

<div style="direction:rtl">
در این قسمت یک مدل resnet18 را از ابتدا روی cifar10 آموزش می دهیم. همانطور که مشخص است نتایج این قسمت بسیار خوب است و به دقت های بالایی رسیده است.

در این مدل هم تمامی وزن ها قابل آموزش هستند.
</div>

In [21]:
model_pretrained = tf.keras.models.load_model('model2.h5')

model_resnet18_zero = model_pretrained.layers[-2].output
fc_layer = Dense(10, activation = "softmax")(model_resnet18_zero)
model_resnet18_zero = tf.keras.models.Model(inputs = model_pretrained.input, outputs = fc_layer)

model_config = model_resnet18_zero.get_config()
model_config['layers'][0]['config']['batch_input_shape'] = (None, 32, 32, 3)
model_resnet18_zero = model.__class__.from_config(model_config, custom_objects={})

model_resnet18_zero.summary()



Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 data (InputLayer)              [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 bn_data (BatchNormalization)   (None, 32, 32, 3)    9           ['data[0][0]']                   
                                                                                                  
 zero_padding2d (ZeroPadding2D)  (None, 38, 38, 3)   0           ['bn_data[0][0]']                
                                                                                                  
 conv0 (Conv2D)                 (None, 16, 16, 64)   9408        ['zero_padding2d[0][0]']         
                                                                                            

In [22]:
model_resnet18_zero.compile(optimizer='adam',loss='categorical_crossentropy',
                      metrics=['accuracy'])
callback = keras.callbacks.EarlyStopping(monitor='accuracy', patience=8)
model_resnet18_zero.fit(x_train, y_train, epochs=50, batch_size=2000, validation_data=(x_test, y_test), callbacks=[callback])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50


<keras.callbacks.History at 0x7fdd940e7520>

# قسمت چهارم

<div style="direction:rtl">
در این قسمت از همان مدل قسمت اول استفاده میکنیم. اما تمامی وزن های آن قابل آموزش هستند.

در اینجا سرعت آموزش بسیار بیشتر شده و با همان تعداد epoch قبلی بسیار بهتر آموزش می بیند.

در عین حال سرعت آموزش نسبت به قسمت اول کمتر است چون تعداد بیشتری وزن آموزش می بیند.
</div>

In [23]:
# model completely trainable
model_pretrained = tf.keras.models.load_model('model.h5')

model_config = model_pretrained.get_config()
model_config['layers'][0]['config']['batch_input_shape'] = (None, 32, 32, 3)
model_pretrained = model.__class__.from_config(model_config, custom_objects={})

model_resnet50_full = model_pretrained.layers[-2].output
fc_layer = Dense(10, activation = "softmax")(model_resnet50_full)
model_resnet50_full = tf.keras.models.Model(inputs = model_pretrained.input, outputs = fc_layer)

model_resnet50_full.summary()



Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 38, 38, 3)    0           ['input_2[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 16, 16, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 16, 16, 64)   256         ['conv1_conv[0][0]']             
                                                                                            

In [24]:
model_resnet50_full.compile(optimizer='adam',loss='categorical_crossentropy',
                      metrics=['accuracy'])
callback = keras.callbacks.EarlyStopping(monitor='accuracy', patience=8)
model_resnet50_full.fit(x_train, y_train, epochs=50, batch_size=2000, validation_data=(x_test, y_test), callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fdc93eeae80>