In [1]:
!pip install matplotlib image_classifiers tqdm
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.applications import ResNet50V2
from keras.datasets import cifar100
from keras import Sequential, Input
from keras.layers import Dense, UpSampling2D, Dropout, RandomFlip, RandomTranslation, RandomRotation,RandomBrightness, RandomContrast, RandomZoom, GlobalAveragePooling2D
from keras.callbacks import EarlyStopping, CSVLogger, ModelCheckpoint
from keras.applications.resnet_v2 import preprocess_input
from keras.models import Model
from classification_models.keras import Classifiers
from keras.optimizers import Adam
from keras.activations import linear
from tqdm.notebook import tqdm
import os



2024-04-12 16:23:36.293209: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
n_epoch = 100
batch_size = 64
taux_validation = 0.1
num_classes = 100
n_images = 50000 # Pour l'entrainement, et 10000 pour le test

In [3]:
!mc cp s3/afeldmann/projet_cnam/modele_enseignant.keras /home/onyxia/work/projet_distillation_cnam/sauvegardes/modele_enseignant.keras
model_enseignant = Sequential([
    Input((224,224,3)),
    ResNet50V2(include_top=False, weights='imagenet', pooling="avg"),
    Dropout(0.25),
    Dense(256, activation="sigmoid", kernel_regularizer = tf.keras.regularizers.L1(0.001)),
    Dropout(0.5),
    Dense(num_classes, activation="softmax", kernel_regularizer = tf.keras.regularizers.L2(0.001))
])
# Keras 3.0 est buggé et le chargement direct ne marche pas ici, même si les poids sont bien enregistrés
model_enseignant.load_weights("/home/onyxia/work/projet_distillation_cnam/sauvegardes/modele_enseignant.keras")

model_enseignant.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

...nant.keras: 135.73 MiB / 135.73 MiB ┃▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓┃ 103.24 MiB/s 1s[0;22m[0m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m[m[32;1m

2024-04-12 16:23:45.460510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13775 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:3b:00.0, compute capability: 7.5


In [4]:
def preprocessing(image, label):
    image = tf.image.resize(image, (224, 224))
    label = tf.squeeze(tf.one_hot(label, depth = num_classes), axis = 0)
    return  image, label

augmentation_donnees_keras = Sequential([
    RandomFlip("horizontal"),
    RandomTranslation(0.2,0.2),
    RandomRotation(0.2),
    RandomZoom(0.2),
    RandomContrast(0.2),
    RandomBrightness(0.2,value_range=(0,1))
])

def augmentation_donnees(image, label):
    return augmentation_donnees_keras(image/255.0, training = True)*255.0, label

def preprocess_resnet(image, label):
    return preprocess_input(image), label

def train_val_split(train_dataset, validation_size):
    X_train, y_train = train_dataset
    indices = np.random.permutation(X_train.shape[0])
    train_idx, val_idx = indices[:train_size], indices[train_size:]
    return (X_train[train_idx,...], y_train[train_idx,...]), (X_train[val_idx,...], y_train[val_idx,...])

train_dataset, test_dataset = cifar100.load_data()

validation_size = int(n_images * taux_validation)
train_size = n_images - validation_size

train_dataset, validation_dataset = train_val_split(train_dataset, validation_size)

validation_dataset = tf.data.Dataset.from_tensor_slices(validation_dataset).map(preprocessing).batch(batch_size).map(preprocess_resnet).cache().prefetch(tf.data.AUTOTUNE)
train_dataset = tf.data.Dataset.from_tensor_slices(train_dataset).map(preprocessing).cache().repeat().shuffle(train_size).batch(batch_size).map(augmentation_donnees, num_parallel_calls = tf.data.AUTOTUNE).map(preprocess_resnet, num_parallel_calls = tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices(test_dataset).map(preprocessing).batch(batch_size).map(preprocess_resnet, num_parallel_calls=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE)

In [5]:
def get_modele_logits(modele):
    config = modele.layers[-1].get_config()
    config['activation'] = linear
    config['name'] = 'logits'
    res = Model(inputs=modele.inputs, outputs=[Dense(**config)(modele.layers[-2].output)])
    res.layers[-1].set_weights([x.numpy() for x in modele.layers[-1].weights])
    res.compile(metrics=['accuracy'])
    return res

@tf.function
def compte_bons(x,y):
    return tf.reduce_sum(tf.cast(tf.equal(tf.argmax(x, axis = 1), tf.argmax(y, axis = 1)), tf.float32))

@tf.function
def softmax(logits, temp):
    expo = tf.exp(logits / temp)
    return expo / tf.reduce_sum(expo, axis = 1, keepdims=True)

@tf.function
def ce(x, y, temp):
    res = - x * tf.math.log(y)
    res = tf.where(tf.math.is_nan(res), 0., res)
    res = tf.reduce_sum(res) * temp**2
    return res

def init_csv_log(fichier):
    with open(fichier,'w') as file:
        file.write("epoch, accuracy,val_accuracy\n")
def append_csv_log(fichier, epoch, accuracy,val_accuracy):
    with open(fichier,'a') as file:
        file.write(f"{epoch:d},{accuracy:.2f},{val_accuracy:.2f}\n")

def distillateur_kl(etudiant, enseignant, train_dataset, validation_dataset, temp, nom_modele, n_epoch, alpha):
    etudiant_logit = get_modele_logits(etudiant)
    enseignant_logit = get_modele_logits(enseignant)
    adam = Adam(learning_rate=0.001)
    init_csv_log(f"sauvegardes/{nom_modele}_logs.csv")
    print("C'est parti pour la distillation !\n")
    val_accuracy_max = 0
    val_loss_min = 0
    early_stop_count = 0
    train_dataset_iter = iter(train_dataset)
    for epoch in range(n_epoch):
        print(f"Époque {epoch + 1} / {n_epoch}")
        n_batch = train_size//batch_size
        barre_progression = tqdm(range(n_batch))
        bons_epoque = 0
        for i in barre_progression:
            X_batch, y_batch = next(train_dataset_iter)
            enseignant_estim_logit = enseignant_logit(X_batch, training = False)
            enseignant_estim_softmax = softmax(enseignant_estim_logit, temp)
            with tf.GradientTape() as tape:
                etudiant_estim_logit = etudiant_logit(X_batch, training = True)
                etudiant_estim_softmax = softmax(etudiant_estim_logit, temp)
                etudiant_estim_softmax_1 = softmax(etudiant_estim_logit, 1)
                perte = alpha * ce(y_batch,etudiant_estim_softmax_1, 1) + (1-alpha) * ce(enseignant_estim_softmax,etudiant_estim_softmax, temp)
            grads = tape.gradient(perte, etudiant_logit.trainable_variables)
            adam.apply_gradients(zip(grads, etudiant_logit.trainable_variables))
            bons_epoque += compte_bons(etudiant_estim_softmax,y_batch).numpy()
            accuracy = bons_epoque / (i * batch_size) if i != 0 else np.nan
            barre_progression.set_description(f"Accuracy {accuracy*100:.1f} %")
        val_loss, val_accuracy = etudiant.evaluate(validation_dataset)
        if val_accuracy > val_accuracy_max:
            val_accuracy_max = val_accuracy
            etudiant.save(f"sauvegardes/{nom_modele}_checkpoint.keras")
        if val_loss < val_loss_min:
            val_loss_min = val_loss
            early_stop_count = 0
        elif early_stop_count > 5:
            return
        else:
            early_stop_count += 1
        append_csv_log(f"sauvegardes/{nom_modele}_logs.csv", epoch, accuracy, val_accuracy)
        print(f"Accuracy (train) : {accuracy:.4f} | Accuracy (val) : {val_accuracy:.4f}")

In [6]:
def ResNet18():
    resnet18, preprocess_input = Classifiers.get('resnet18')
    resnet = resnet18((224, 224, 3), weights='imagenet', include_top=False)
    resnet_output = GlobalAveragePooling2D()(resnet.output)
    resnet = Model(inputs=resnet.input, outputs=resnet_output)
    return resnet

def new_modele_resnet():
    model = Sequential([
        Input((224,224,3)),
        ResNet18(),
        Dropout(0.25),
        Dense(256, activation="sigmoid", kernel_regularizer = tf.keras.regularizers.L1(0.001)),
        Dropout(0.5),
        Dense(num_classes, activation="softmax", kernel_regularizer = tf.keras.regularizers.L2(0.001))
    ])
    model.compile(metrics=['accuracy'])
    return model

In [7]:
def distillation_resnet18(temp, alpha):
    tf.keras.backend.clear_session()
    modele = new_modele_resnet()
    nom_modele =  f"model_etudiant_t{temp:d}_a{int(alpha*100):d}"
    distillateur_kl(modele, model_enseignant, train_dataset, validation_dataset, temp, nom_modele, n_epoch,0.25)
    wd = os.getcwd()
    os.system(f"cp {wd}/sauvegardes/{nom_modele}_checkpoint.keras {wd}/sauvegardes/{nom_modele}.keras")
    os.system(f"mc cp {wd}/sauvegardes/{nom_modele}.keras s3/afeldmann/projet_cnam/{nom_modele}.keras")
    os.system(f"mc cp {wd}/sauvegardes/{nom_modele}_logs.csv s3/afeldmann/projet_cnam/{nom_modele}_logs.csv")

In [8]:
distillation_resnet18(1,0.25)

C'est parti pour la distillation !

Époque 1 / 100


  0%|          | 0/351 [00:00<?, ?it/s]

2024-04-12 16:24:08.192953: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8900
2024-04-12 16:24:20.456719: W external/local_tsl/tsl/framework/bfc_allocator.cc:487] Allocator (GPU_0_bfc) ran out of memory trying to allocate 98.00MiB (rounded to 102760448)requested by op Mul
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-04-12 16:24:20.456812: I external/local_tsl/tsl/framework/bfc_allocator.cc:1044] BFCAllocator dump for GPU_0_bfc
2024-04-12 16:24:20.456838: I external/local_tsl/tsl/framework/bfc_allocator.cc:1051] Bin (256): 	Total Chunks: 123, Chunks in use: 122. 30.8KiB allocated for chunks. 30.5KiB in use in bin. 13.6KiB client-requested in use in bin.
2024-04-12 16:24:20.456856: I external/local_tsl/tsl/framework/bfc_allocator.cc:1051] Bin (512): 	Total Chunks: 54, Chunks in use: 52.

ResourceExhaustedError: Exception encountered when calling BatchNormalization.call().

[1m{{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:Mul] name: [0m

Arguments received by BatchNormalization.call():
  • inputs=tf.Tensor(shape=(128, 14, 14, 1024), dtype=float32)
  • training=False
  • mask=None

In [None]:
distillation_resnet18(3,0.25)

In [None]:
distillation_resnet18(8,0.25)

In [None]:
distillation_resnet18(1,0.5)

In [None]:
distillation_resnet18(3,0.5)

In [None]:
distillation_resnet18(8,0.5)

In [5]:
def graphiques_accuracy(temp, alpha):
    nom_modele =  f"model_etudiant_t{temp:d}_a{int(alpha*100):d}"
    history=np.genfromtxt(f"sauvegardes/{nom_modele}_logs.csv", delimiter=",", names = True)
    plt.plot(history['accuracy'])
    plt.plot(history['val_accuracy'])
    plt.title('Modèle enseignant')
    plt.ylabel('Exactitude')
    plt.xlabel('Époque')
    plt.axvline(x=47, color='purple', ls='--', lw=2, label='Limite réglage fin')
    plt.legend(['Entrainement', 'Validation','Limite réglage fin'], loc='best')
    plt.show()