# Tuning de InceptionV3

L'objectif de ce notebook est de réaliser le tuning des hyperparamètres du modèle InceptionV3, réseau identifiés comme meilleur potentiel candidat pour la mise au point d'un modèle de classification du caractére comestible d'un champignon a partir d'une image.
Les inputs de ce notebook sont :
- le dataset d'images nettoyé et le fichier .csv correspondant au dataset d'images qui sera utilisé pour les parties train et test,
- le dataset de validation avec le fichier .csv associé.



In [None]:
import os
import pandas as pd
from IPython.display import display
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import time
import random
import seaborn as sns
from joblib import dump

import  keras
import tensorflow as tf # Utilisation de tensorflow v2.9.1
from tensorflow.keras.applications.resnet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras import optimizers
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
from tensorflow.keras import backend as K
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
# création des liens vers les dossiers et fichiers source
images_dataset = r'C:\Users\renamedadmin\Documents\Formation_Datascience\Projet_Datascientest_Champignons\Dossier_technique\02_Pieces_constitutives\Dataset\FFD_images_dataset'
train_dataset = r'C:\Users\renamedadmin\Documents\Formation_Datascience\Projet_Datascientest_Champignons\Dossier_technique\02_Pieces_constitutives\Dataset\train_FFDataframe_full_undersampling.csv'
test_dataset = r'C:\Users\renamedadmin\Documents\Formation_Datascience\Projet_Datascientest_Champignons\Dossier_technique\02_Pieces_constitutives\Dataset\test_FFDataframe_full_undersampling.csv'
validation_dataset = r'C:\Users\renamedadmin\Documents\Formation_Datascience\Projet_Datascientest_Champignons\Dossier_technique\02_Pieces_constitutives\Dataset\val_FFDataframe_full.csv'

# dossier ou sauver les résultats obtenus sur les modèles
save_models_results = r'C:\Users\renamedadmin\Documents\Formation_Datascience\Projet_Datascientest_Champignons\Dossier_technique\02_Pieces_constitutives\Dataset\Models_results'

In [None]:
# création de quelques fonctions utiles

# affichage des metriques (accuracy, loss) d'entrainement d'un modèle
def plot_scores(model, title):
    '''
    Arg :
    model : model dont on souhaite afficher les metriques
    Return:
    plot des métriques Accuracy et loss sur les datasets train et test
    '''
    sns.set()
    plt.rcParams['figure.figsize'] = [14,4]

    # Créer la figure
    fig = plt.figure()
    
    plt.gcf().subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, wspace = 0.3, hspace = 0.3)
    # Créer les 4 graphiques
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    # Tracer les données sur les graphiques
    ax1.plot(model.history['accuracy'], label = "train")
    ax1.plot(model.history['val_accuracy'], label = "test")
    ax1.legend(loc = "lower right")
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy')    

    ax2.plot(model.history['loss'], label = "train")
    ax2.plot(model.history['val_loss'], label = "test")
    ax2.legend(loc = "upper right")
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Loss')  
    plt.title(title, loc = "left")
    plt.show()
    
# affichage de la matrice de confusion du dataset de validation
def show_confusion_matrix(model):
    '''
    Args :
    model : modele à utiliser pour fair eles predictions
   
    Return :
    plot de la matrice de confusion
    '''
    # réalisation des prédiction pour le modèle
    model_pred=model.predict(val_generator, steps=val_steps, verbose=1)
    y_pred = []
    for element in model_pred:
        pred = np.argmax(element)
        y_pred.append(pred)
    y_val = df_val.edible.to_list()
    confusion_mtx = confusion_matrix(y_val, y_pred)
    #
    plt.rcParams['font.size'] = 20
    disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mtx)
    disp.plot(cmap='Blues', values_format='d', xticks_rotation='horizontal', colorbar = False)
    plt.title(f'Confusion matrix for {model}')
    plt.ylabel('True label', fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.xlabel('Predicted label', fontsize = 20)
    plt.xticks(fontsize = 20)
    plt.grid(False)
    plt.show()
    
# création d'une fonction permettant de compiler un modèle
def compile_model(model, optimizer, loss, metrics):
    '''
    Args :
    model : model à compiler
    optimizer :  choix de l'optimizer à utiliser durant l'entrainement
    loss : fonction de loss à utiliser durant l'entrainement sous la forme : "loss"
    metrics : metrique à évaluer durant l'entrainement sou sla forme : ["metrics"]
    '''
    model.compile(optimizer = optimizer, loss = loss, metrics = metrics)


In [None]:
# chargement des dataframes
df_train = pd.read_csv(train_dataset)
df_test = pd.read_csv(test_dataset)
df_val = pd.read_csv(validation_dataset)

# affichage de quelques infos sur ces dataframes + affichage d'une figure de répartition des catégories
display(df_train.head(), df_test.info(), df_val.info())

# génération des données du graph
inedible = []
edible = []

dataframes = [df_train, df_test, df_val]
for dataframe in dataframes:
    count_inedible = dataframe['edible'].value_counts()[0]
    inedible.append(count_inedible)
    count_edible = dataframe['edible'].value_counts()[1]
    edible.append(count_edible)   

data = ['df_train', 'df_test', 'df_val']
edibility = {'inedible': inedible, 'edible' : edible}

colonnes = ['df_train', 'df_test', 'df_val']
sex_counts = {
    'inedible': inedible,
    'edible': edible
}

width = 0.6
fig, ax = plt.subplots()
bottom = np.zeros(3)
for i, j in edibility.items():
    p = ax.bar(data, j, width, label=i, bottom=bottom)
    bottom += j
    ax.bar_label(p, label_type='center')
ax.set_title('Number of images by category')
ax.legend(title = 'categories')

plt.show()


## Création des modèle à tuner 

In [None]:
# création par transfert learning d'un modèle de type ResNet50V2 à deux sorties
TL_InceptionV3 = InceptionV3(include_top=False, pooling="avg", weights='imagenet')
for layer in TL_InceptionV3.layers:
    layer.trainable=False

logits = Dense(2)(TL_InceptionV3.layers[-1].output)
output = Activation('softmax')(logits)
TL_InceptionV3 = Model(TL_InceptionV3.input, output, name = 'TL_InceptionV3')
TL_InceptionV3.summary()



## Tuning de InceptionV3

### Learning_rate 

In [None]:
# Définition de quelques paramètres
batch_size = 64
SEED = 3
epochs = 15
W, H = 224, 224


In [None]:
# Création d'un DataGenerator pour le dataset d'entrainement
train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_train["edible"] = df_train["edible"].apply(str)

train_generator = train_datagen.flow_from_dataframe(df_train, images_dataset,
                                                    x_col="filename",
                                                    y_col="edible",
                                                    class_mode="categorical",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=SEED)

# Création d'un DataGenerator pour le dataset de test
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_test["edible"] = df_test["edible"].apply(str)

test_generator = test_datagen.flow_from_dataframe(df_test, images_dataset,
                                                  x_col="filename",
                                                  y_col="edible",
                                                  class_mode="categorical",
                                                  batch_size=batch_size)

In [None]:
# Choix des paramètres à compiler pour l'entrainement
optimizer01 = optimizers.SGD(learning_rate = 0.1)
optimizer001 = optimizers.SGD(learning_rate = 0.01)
optimizer0001 = optimizers.SGD(learning_rate = 0.001)
optimizer00001 = optimizers.SGD(learning_rate = 0.00001)
loss = "categorical_crossentropy"
metrics = ["accuracy"]

In [None]:
# compilation du modèle learning_rate = 0.1
compile_model(TL_InceptionV3, optimizer01, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr01_bs64_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr01_bs64_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr01_bs64_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# compilation du modèle learning_rate = 0.01
compile_model(TL_InceptionV3, optimizer001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr001_bs64_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr001_bs64_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr001_bs64_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# compilation du modèle learning_rate = 0.001
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs64_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs64_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs64_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# compilation du modèle learning_rate = 0.0001
compile_model(TL_InceptionV3, optimizer00001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr00001_bs64_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr00001_bs64_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr00001_bs64_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# affichage d'une synthèse du tuning du learning rate
sns.set()
plt.rcParams['figure.figsize'] = [14,4]

    # Créer la figure
fig = plt.figure()
    
plt.gcf().subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, wspace = 0.3, hspace = 0.3)
    # Créer les 4 graphiques
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

    # Tracer les données sur les graphiques
ax1.plot(history_InceptionV3_lr01_bs64_SG_catcross.history['val_accuracy'], label = "lr = 0.1")
ax1.plot(history_InceptionV3_lr001_bs64_SG_catcross.history['val_accuracy'], label = "lr = 0.01")
ax1.plot(history_InceptionV3_lr0001_bs64_SG_catcross.history['val_accuracy'], label = "lr = 0.001")
ax1.plot(history_InceptionV3_lr00001_bs64_SG_catcross.history['val_accuracy'], label = "lr = 0.0001")

ax1.legend(loc = "lower right")
ax1.set_xlabel('Epochs')
ax1.set_ylabel('val_accuracy')    


ax2.plot(history_InceptionV3_lr01_bs64_SG_catcross.history['val_loss'], label = "lr = 0.1")
ax2.plot(history_InceptionV3_lr001_bs64_SG_catcross.history['val_loss'], label = "lr = 0.01")
ax2.plot(history_InceptionV3_lr0001_bs64_SG_catcross.history['val_loss'], label = "lr = 0.001")
ax2.plot(history_InceptionV3_lr00001_bs64_SG_catcross.history['val_loss'], label = "lr = 0.0001")
ax2.legend(loc = "upper right")
ax2.set_xlabel('Epochs')
ax2.set_ylabel('val_loss')  
plt.title("résultats du tuning du learning rate sur InceptionV3", loc = "left")
plt.show()

### Batch_size 

In [None]:
# Définition de quelques paramètres
SEED = 3
epochs = 15
W, H = 224, 224


In [None]:
batch_size = 16
# Création d'un DataGenerator pour le dataset d'entrainement
train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_train["edible"] = df_train["edible"].apply(str)

train_generator = train_datagen.flow_from_dataframe(df_train, images_dataset,
                                                    x_col="filename",
                                                    y_col="edible",
                                                    class_mode="categorical",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=SEED)

# Création d'un DataGenerator pour le dataset de test
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_test["edible"] = df_test["edible"].apply(str)

test_generator = test_datagen.flow_from_dataframe(df_test, images_dataset,
                                                  x_col="filename",
                                                  y_col="edible",
                                                  class_mode="categorical",
                                                  batch_size=batch_size)
# Choix des paramètres à compiler pour l'entrainement
optimizer0001 = optimizers.SGD(learning_rate = 0.001)
loss = "categorical_crossentropy"
metrics = ["accuracy"]

# compilation du modèle learning_rate = 0.001 et batch_size = 16
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs16_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs16_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs16_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
batch_size = 32
# Création d'un DataGenerator pour le dataset d'entrainement
train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_train["edible"] = df_train["edible"].apply(str)

train_generator = train_datagen.flow_from_dataframe(df_train, images_dataset,
                                                    x_col="filename",
                                                    y_col="edible",
                                                    class_mode="categorical",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=SEED)

# Création d'un DataGenerator pour le dataset de test
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_test["edible"] = df_test["edible"].apply(str)

test_generator = test_datagen.flow_from_dataframe(df_test, images_dataset,
                                                  x_col="filename",
                                                  y_col="edible",
                                                  class_mode="categorical",
                                                  batch_size=batch_size)
# Choix des paramètres à compiler pour l'entrainement
optimizer0001 = optimizers.SGD(learning_rate = 0.001)
loss = "categorical_crossentropy"
metrics = ["accuracy"]

# compilation du modèle learning_rate = 0.001 et batch_size = 32
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs32_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs32_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs32_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
batch_size = 128
# Création d'un DataGenerator pour le dataset d'entrainement
train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_train["edible"] = df_train["edible"].apply(str)

train_generator = train_datagen.flow_from_dataframe(df_train, images_dataset,
                                                    x_col="filename",
                                                    y_col="edible",
                                                    class_mode="categorical",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=SEED)

# Création d'un DataGenerator pour le dataset de test
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_test["edible"] = df_test["edible"].apply(str)

test_generator = test_datagen.flow_from_dataframe(df_test, images_dataset,
                                                  x_col="filename",
                                                  y_col="edible",
                                                  class_mode="categorical",
                                                  batch_size=batch_size)
# Choix des paramètres à compiler pour l'entrainement
optimizer0001 = optimizers.SGD(learning_rate = 0.001)
loss = "categorical_crossentropy"
metrics = ["accuracy"]

# compilation du modèle learning_rate = 0.001 et batch_size = 128
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs128_SG_catcross.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs128_SG_catcross.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs128_SG_catcross = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# affichage d'une synthèse du tuning du batch size
sns.set()
plt.rcParams['figure.figsize'] = [14,4]

    # Créer la figure
fig = plt.figure()
    
plt.gcf().subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, wspace = 0.3, hspace = 0.3)
    # Créer les 4 graphiques
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

    # Tracer les données sur les graphiques
ax1.plot(history_InceptionV3_lr0001_bs16_SG_catcross.history['val_accuracy'], label = "batch_size = 16")
ax1.plot(history_InceptionV3_lr0001_bs32_SG_catcross.history['val_accuracy'], label = "batch_size = 32")
ax1.plot(history_InceptionV3_lr0001_bs64_SG_catcross.history['val_accuracy'], label = "batch_size = 64")
ax1.plot(history_InceptionV3_lr0001_bs128_SG_catcross.history['val_accuracy'], label = "batch_size = 128")

ax1.legend(loc = "lower right")
ax1.set_xlabel('Epochs')
ax1.set_ylabel('val_accuracy')    


ax2.plot(history_InceptionV3_lr0001_bs16_SG_catcross.history['val_loss'], label = "batch_size = 16")
ax2.plot(history_InceptionV3_lr0001_bs32_SG_catcross.history['val_loss'], label = "batch_size = 32")
ax2.plot(history_InceptionV3_lr0001_bs64_SG_catcross.history['val_loss'], label = "batch_size = 64")
ax2.plot(history_InceptionV3_lr0001_bs128_SG_catcross.history['val_loss'], label = "batch_size = 128")
ax2.legend(loc = "upper right")
ax2.set_xlabel('Epochs')
ax2.set_ylabel('val_loss')  
plt.title("résultats du tuning du learning rate sur InceptionV3", loc = "left")
plt.show()

###  Loss_function

In [None]:
batch_size = 128
# Création d'un DataGenerator pour le dataset d'entrainement
train_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_train["edible"] = df_train["edible"].apply(str)

train_generator = train_datagen.flow_from_dataframe(df_train, images_dataset,
                                                    x_col="filename",
                                                    y_col="edible",
                                                    class_mode="categorical",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=SEED)

# Création d'un DataGenerator pour le dataset de test
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

df_test["edible"] = df_test["edible"].apply(str)

test_generator = test_datagen.flow_from_dataframe(df_test, images_dataset,
                                                  x_col="filename",
                                                  y_col="edible",
                                                  class_mode="categorical",
                                                  batch_size=batch_size)
# Choix des paramètres à compiler pour l'entrainement
optimizer0001 = optimizers.SGD(learning_rate = 0.001)
loss = "hinge"
metrics = ["accuracy"]

# compilation du modèle learning_rate = 0.001 et batch_size = 32
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs128_SG_hinge.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs128_SG_hinge.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs128_SG_hinge = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# affichage d'une synthèse du tuning de la loss_function
sns.set()
plt.rcParams['figure.figsize'] = [14,4]

    # Créer la figure
fig = plt.figure()
    
plt.gcf().subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, wspace = 0.3, hspace = 0.3)
    # Créer les 4 graphiques
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

    # Tracer les données sur les graphiques

ax1.plot(history_InceptionV3_lr0001_bs128_SG_catcross.history['val_accuracy'], label = "loss_function = categorical_crossentropy")
ax1.plot(history_InceptionV3_lr0001_bs128_SG_hinge.history['val_accuracy'], label = "loss_function = hinge")
ax1.legend(loc = "right")
ax1.set_xlabel('Epochs')
ax1.set_ylabel('val_accuracy')    

ax2.plot(history_InceptionV3_lr0001_bs128_SG_catcross.history['val_loss'], label = "loss_function = categorical_crossentropy")
ax2.plot(history_InceptionV3_lr0001_bs128_SG_hinge.history['val_loss'], label = "loss_function = hinge")
ax2.legend(loc = "right")
ax2.set_xlabel('Epochs')
ax2.set_ylabel('val_loss')  
plt.title("résultats du tuning de la loss_function sur InceptionV3", loc = "left")
plt.show()

### Optimizer 

In [None]:
# Choix des paramètres à compiler pour l'entrainement
optimizer0001 = optimizers.Adam(learning_rate = 0.001)
loss = "hinge"
metrics = ["accuracy"]

# compilation du modèle learning_rate = 0.001 et batch_size = 128
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs128_Adam_hinge.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs128_Adam_hinge.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs128_Adam_hinge = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# Choix des paramètres à compiler pour l'entrainement
optimizer0001 = optimizers.RMSprop(learning_rate = 0.001)
loss = "hinge"
metrics = ["accuracy"]

# compilation du modèle learning_rate = 0.001 et batch_size = 32
compile_model(TL_InceptionV3, optimizer0001, loss, metrics)

# création de callbacks
checkpointer_InceptionV3 = ModelCheckpoint(filepath=os.path.join(save_models_results, "InceptionV3_lr0001_bs128_RMSprop_hinge.hdf5"),
                                            monitor='val_loss',
                                            save_best_only=True,
                                            mode='auto')
CSV_logger_InceptionV3 = CSVLogger(filename = 'logger_InceptionV3_lr0001_bs128_RMSprop_hinge.csv',
                                    separator=',',
                                    append = True)
callbacks_InceptionV3 = [checkpointer_InceptionV3, CSV_logger_InceptionV3]

# entrainement du modèle
start_time = time.time()
history_InceptionV3_lr0001_bs128_RMSprop_hinge = TL_InceptionV3.fit_generator(train_generator,
                                                    epochs=epochs,
                                                    validation_data=test_generator,
                                                    validation_steps=len(df_test)//batch_size,
                                                    steps_per_epoch=len(df_train)//batch_size,
                                                    callbacks=callbacks_InceptionV3)

end_time = time.time()
print("Durée de l'entrainement :", end_time - start_time)

In [None]:
# affichage d'une synthèse du tuning de l'optimizer
sns.set()
plt.rcParams['figure.figsize'] = [14,4]

    # Créer la figure
fig = plt.figure()
    
plt.gcf().subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, wspace = 0.3, hspace = 0.3)
    # Créer les 4 graphiques
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

    # Tracer les données sur les graphiques


ax1.plot(history_InceptionV3_lr0001_bs128_SG_hinge.history['val_accuracy'], label = "optimizer = SGD")
ax1.plot(history_InceptionV3_lr0001_bs128_Adam_hinge.history['val_accuracy'], label = "optimizer = Adam")
ax1.plot(history_InceptionV3_lr0001_bs128_RMSprop_hinge.history['val_accuracy'], label = "optimizer = RMSprop")
ax1.legend(loc = "lower right")
ax1.set_xlabel('Epochs')
ax1.set_ylabel('val_accuracy')    

ax2.plot(history_InceptionV3_lr0001_bs128_SG_hinge.history['val_loss'], label = "optimizer = SGD")
ax2.plot(history_InceptionV3_lr0001_bs128_Adam_hinge.history['val_loss'], label = "optimizer = Adam")
ax2.plot(history_InceptionV3_lr0001_bs128_RMSprop_hinge.history['val_loss'], label = "optimizer = RMSprop")
ax2.legend(loc = "upper right")
ax2.set_xlabel('Epochs')
ax2.set_ylabel('val_loss')  
plt.title("résultats du tuning de l'optimizer sur InceptionV3", loc = "left")
plt.show()

## Conclusions
Le tuning des hyperparamètres du modèle InceptionV3 permet d'effectuer les choix suivants pour l'entraienment du meilleur modèle dans le cas de la classification sur le caractère comestible de champignons  :
- batch_size = 128
- optimizer = RMSprop avec un learning_rate de 0.001
- loss_function = "hinge"

In [None]:
# sauvegarde des résultats de tuning pour streamlit
path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr01_bs64_SG_catcross.joblib')
dump(history_InceptionV3_lr01_bs64_SG_catcross, path_save_name, 3)

path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr001_bs64_SG_catcross.joblib')
dump(history_InceptionV3_lr001_bs64_SG_catcross, path_save_name, 3)

path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs64_SG_catcross.joblib')
dump(history_InceptionV3_lr0001_bs64_SG_catcross, path_save_name, 3)

path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr00001_bs64_SG_catcross.joblib')
dump(history_InceptionV3_lr00001_bs64_SG_catcross, path_save_name, 3)


path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs16_SG_catcross.joblib')
dump(history_InceptionV3_lr0001_bs16_SG_catcross, path_save_name, 3)

path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs32_SG_catcross.joblib')
dump(history_InceptionV3_lr0001_bs32_SG_catcross, path_save_name, 3)

path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs128_SG_catcross.joblib')
dump(history_InceptionV3_lr0001_bs128_SG_catcross, path_save_name, 3)


path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs128_SG_hinge.joblib')
dump(history_InceptionV3_lr0001_bs128_SG_hinge, path_save_name, 3)


path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs128_Adam_hinge.joblib')
dump(history_InceptionV3_lr0001_bs128_Adam_hinge, path_save_name, 3)

path_save_name = os.path.join(save_models_results, 'history_InceptionV3_lr0001_bs128_RMSprop_hinge.joblib')
dump(history_InceptionV3_lr0001_bs128_RMSprop_hinge, path_save_name, 3)

