In [1]:
import configparser
from pathlib import Path
import sys

from tensorflow import keras

import shunt_connector

# PARAMS
temperature = 3.0
distillation_strength = 10.0


In [2]:
config_path = Path("config", "dark_knowledge.cfg")
config = configparser.ConfigParser()
config.read(config_path)

connector = shunt_connector.ShuntConnector(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [3]:
connector.create_dataset()
connector.create_original_model()
connector.test_original_model()
connector.create_shunt_model()
connector.test_shunt_model()
connector.create_final_model()


Create dataset
CIFAR10 was loaded successfully!

Create original model
MobileNetV3Small created successfully!
{'conv2d': 5137856.0, 'depthwise_conv2d': 1450240.0, 'total': 6588096.0}

Test original model
loss: 2.40339
categorical_crossentropy: 2.30259
categorical_accuracy: 0.10000

Create shunt model
Used dilation rates: [1, 1]

Test shunt model
Loss: 0.11851

Create final model


In [4]:
from shunt_connector.utils import create_distillation_trainings_model
from shunt_connector.utils import custom_callbacks
from shunt_connector.utils import custom_loss_metric


# learning rate strategy
if connector.train_final_params['learning_policy'] == 'two_cycles':
    callback_learning_rate = custom_callbacks.LearningRateSchedulerCallback(epochs_first_cycle=connector.train_final_params['epochs_first_cycle'],
                                                            learning_rate_second_cycle=connector.train_final_params['learning_rate_second_cycle'])
elif connector.train_final_params['learning_policy'] == 'plateau':
    callback_learning_rate = keras.callbacks.ReduceLROnPlateau(monitor='loss',
                                                factor=connector.train_final_params['factor'],
                                                patience=connector.train_final_params['patience'],
                                                verbose=1,
                                                mode='auto',
                                                min_lr=1e-8)
elif connector.train_final_params['learning_policy'] == 'poly':
    callback_learning_rate = custom_callbacks.PolyLearningRateCallback(connector.train_final_params['power'],
                                                        connector.train_final_params['max_epochs'],
                                                        verbose=1)

# freezing strategy
if connector.train_final_params['freezing'] == 'nothing':
    pass
elif connector.train_final_params['freezing'] == 'freeze_before_shunt':
    for i, layer in enumerate(connector.final_model.layers):
        if i < connector.shunt_params['locations'][0]:  # TODO: TEST THIS!!
            layer.trainable = False

loss_dict = {}
metric_dict = {}

with connector.activate_distribution_scope():
    model_final_dist = create_distillation_trainings_model.create_classification_distillation_model(connector.final_model,
                                                                                                    connector.original_model,
                                                                                                    add_dark_knowledge=True,
                                                                                                    temperature=temperature)
    
    loss_dict = {'Student': 'categorical_crossentropy'}
    metric_dict = {'Student': ['accuracy']}
    callback_checkpoint = custom_callbacks.SaveNestedModelCallback('val_Student_accuracy', str(Path(connector.folder_name_logging, "final_model_weights.h5")), 'Student')
    for output in model_final_dist.output:
        output_name = output.name.split('/')[0] # cut off unimportant part
        if 'd_k' in output_name:
            loss_dict[output_name] = custom_loss_metric.create_negative_sum_loss(distillation_strength)

callbacks = [callback_checkpoint, callback_learning_rate]

with connector.distribute_strategy.scope():
    model_final_dist.compile(loss=loss_dict,
                        optimizer=keras.optimizers.SGD(lr=connector.train_final_params['base_learning_rate'],momentum=0.9, decay=0.0, nesterov=False),
                        metrics=metric_dict)

history_final = model_final_dist.fit(connector.dataset_train.batch(connector.train_final_params['batch_size']),
                                epochs=connector.train_final_params['max_epochs'],
                                steps_per_epoch=connector.dataset_props['len_train_data']//connector.train_final_params['batch_size'],
                                validation_data=connector.dataset_val.batch(connector.train_final_params['batch_size']),
                                validation_steps=connector.dataset_props['len_val_data']//connector.train_final_params['batch_size'],
                                verbose=1, 
                                callbacks=callbacks)

connector.final_model.load_weights(str(Path(connector.folder_name_logging, "final_model_weights.h5")))

keras.models.save_model(connector.final_model, Path(connector.folder_name_logging, "final_model.h5"))
logging.info('')
logging.info('Final model saved to {}'.format(connector.folder_name_logging))


AttributeError: module 'shunt_connector.utils' has no attribute 'create_distillation_trainings_model'

In [5]:
connector.test_final_model()
connector.print_summary()


Test final model
loss: 2.40509
categorical_crossentropy: 2.29756
categorical_accuracy: 0.10000
