<a href="https://colab.research.google.com/github/bdgouthiere/Infonuagique-TP3/blob/main/TP2_V_0_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installation des packages

In [None]:
!pip install rembg

Import des librairies

In [None]:
import os
import sys
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from cv2 import imread, imwrite, resize, cvtColor, INTER_AREA, COLOR_BGR2RGBA
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

from tensorflow import float32
from keras.applications.efficientnet import preprocess_input
from keras.applications import VGG16, ResNet50, InceptionV3
from keras import layers, Sequential, Model
from keras.layers import Dense, Activation, BatchNormalization, Input, Dropout
from keras.optimizers import Adam
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


from rembg import remove
from PIL import Image

from pickle import dump, HIGHEST_PROTOCOL
from google.colab import drive
drive.mount('/content/drive')

Class Animal Classification


# Main Code




In [None]:
from genericpath import exists
class AnimalsClassificationEntrainement(object):
  def __init__(self, **kwargs):
    self.IMAGE_TAILLE: tuple = (224, 224)
    self.BATCH_TAILLE: int = 16
    self.OUTPUT_SIZE: tuple = (224, 224)
    self.REMOVE_BACKGROUND: bool = kwargs.get("remove_background", False)
    self.dossier_modeles: str = kwargs.get("dossier_modeles", "/content/drive/MyDrive/Data/TP2/Models-3-WithBG")
    self.dossier_donnees: str = kwargs.get("dossier_donnees", "/content/drive/MyDrive/Data/TP2/Data")
    self.dossier_dataset: str = kwargs.get("dossier_dataset", "/content/drive/MyDrive/Data/TP2/Dataset/animals-4")
    self.fichier_labels: str = kwargs.get("fichier_labels", "/content/drive/MyDrive/Data/TP2/Models-3-WithBG/label_dict.pickle")
    self._init_dossiers()
    self.df: pd.DataFrame = None
    self.df_train = None
    self.df_validation = None
    self.callbacks: list = []
    self.fichier_meilleure_precision: str = os.path.join(self.dossier_modeles, "best-model-accuracy.h5")
    self.fichier_meilleure_validation: str = os.path.join(self.dossier_modeles, "best-model-val-accuracy.h5")
    self.modele = None
    self.history = None
    self.nbr_epochs = 50


  def preparation(self) -> None:
    self._init_dossiers()
    self.df = self._chargement_images()
    self._repartition_donnees()
    self._donnees_generation_configuration()
    self._enregistrement_labels()
    self._configuration_modele()
    self._callbacks()

  def _init_dossiers(self) -> None:
    self._dossier_creation(self.dossier_modeles)
    self._dossier_creation(self.dossier_donnees)
    self._dossier_creation(self.dossier_dataset)

  def _dossier_creation(self, chemin: str) -> None:
    if not os.path.exists(chemin):
      os.mkdir(chemin)

  def _traitement_image(self, chemin_image: str, dossier: str):
    # Création du dossier de la catégorie s'il n'existe pas
    dossier_images = os.path.join(self.dossier_donnees, dossier)
    self._dossier_creation(dossier_images)

    # Vérifie l'existence du fichier image
    if not (os.path.isfile(chemin_image)):
      return None

    # Chemin de l'image nouvelle image
    nom_fichier: str = os.path.basename(chemin_image)
    chemin_image_redimensionnee: str = os.path.join(dossier_images, nom_fichier)

    # Si l'image n'a pas été redimensionnée
    if not (os.path.isfile(chemin_image_redimensionnee)):
      image = imread(chemin_image)
      hauteur, largeur = image.shape[:2]
      ratio = 224 / max(hauteur, largeur)
      nouvelles_dimensions = (int(largeur * ratio), int(hauteur * ratio))
      image_redimensionnee = resize(image, nouvelles_dimensions)
      nouvelle_image = np.zeros((224, 224, 3), dtype=np.uint8)
      x = (224 - nouvelles_dimensions[0]) // 2
      y = (224 - nouvelles_dimensions[1]) // 2
      nouvelle_image[y:y+nouvelles_dimensions[1], x:x+nouvelles_dimensions[0]] = image_redimensionnee
      imwrite(chemin_image_redimensionnee, nouvelle_image)

    if self.REMOVE_BACKGROUND:
      chemin_image_redimensionnee_png = os.path.join(dossier_images, os.path.splitext(nom_fichier)[0] + ".png")

      # Si le fond de l'image n'a pas été enlevé - (présence du fichier chemin_image_redimensionnee_png)
      if not (os.path.isfile(chemin_image_redimensionnee_png)):
        input = Image.open(chemin_image_redimensionnee)
        output = remove(input, alpha_matting=True, alpha_matting_foreground_threshold=120)
        output = output.convert('RGB')
        output.save(chemin_image_redimensionnee_png)
        input.close()
      return chemin_image_redimensionnee_png # Image sans background (background uni.)

    return chemin_image_redimensionnee # Image avec background


  def _chargement_images(self):
    # Chargement des photos du dossier du dataset dans data
    data: dict = {'imgpath': [], 'labels': []}
    for folder in os.listdir(self.dossier_dataset):
      folder_path: str = os.path.join(self.dossier_dataset, folder)
      print(f"Chargement du dossier: {folder_path}")
      list_image_name: list = os.listdir(folder_path)
      for image_name in list_image_name:
        image_path: str = os.path.join(folder_path, image_name)
        resized_image_path = self._traitement_image(image_path, folder)
        if resized_image_path:
          data['imgpath'].append(resized_image_path)
          data['labels'].append(folder)

    return pd.DataFrame(data)

  def _repartition_donnees(self):
    self.df_train, self.df_validation = train_test_split(self.df, train_size = 0.8, random_state=128)
    self.df_train = self.df_train.reset_index(drop=True)
    self.df_validation = self.df_validation.reset_index(drop=True)
    print(self.df_train[['imgpath', 'labels']].head())
    print(self.df_train.shape)
    print(self.df_validation[['imgpath', 'labels']].head())
    print(self.df_validation.shape)


  def _donnees_generation_configuration(self) -> None:
    generator = ImageDataGenerator(
      preprocessing_function = preprocess_input,
      horizontal_flip=True,
      zoom_range=0.2,
      rotation_range=20,
      shear_range=0.2,
      validation_split=0.2
    )

    self.train_images = generator.flow_from_dataframe(
      dataframe=self.df_train,
      x_col='imgpath',
      y_col='labels',
      target_size=self.IMAGE_TAILLE,
      color_mode='rgb',
      class_mode='categorical',
      batch_size=self.BATCH_TAILLE,
      shuffle=True,
      seed=42
    )

    self.validation_images = generator.flow_from_dataframe(
      dataframe = self.df_validation,
      x_col='imgpath',
      y_col='labels',
      target_size=self.IMAGE_TAILLE,
      color_mode='rgb',
      class_mode='categorical',
      batch_size=self.BATCH_TAILLE,
      shuffle=True,
    )

  def _enregistrement_labels(self) ->None:
    ## Sauvegarde les labels / numero
    with open(self.fichier_labels, 'wb') as fichier:
      dump(self.train_images.class_indices, fichier, protocol=HIGHEST_PROTOCOL)

  def _configuration_modele(self) -> None:
    pretrained_resnet = ResNet50(
      input_shape=(224, 224, 3),
      include_top=False, # we don`t need a pre-trained top layer (output layer)
      weights='imagenet',
      pooling='max'
    )

    # Freezing the layers of a pretrained neural network
    for i, layer in enumerate(pretrained_resnet.layers):
      pretrained_resnet.layers[i].trainable = False

    num_classes = len(set(self.train_images.classes))

    sequence = Sequential()
    inputs = Input(shape=(224, 224, 3), name='inputLayer')
    x = sequence(inputs)
    pretrain_out = pretrained_resnet(x, training = False)
    x = Dense(256)(pretrain_out)
    x = Activation(activation="relu")(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(num_classes)(x)

    outputs = Activation(activation="softmax", dtype=float32, name='activationLayer')(x)

    self.modele = Model(inputs=inputs, outputs=outputs)
    pretrained_resnet.trainable = True
    for layer in pretrained_resnet.layers:
      if isinstance(layer, BatchNormalization): # set BatchNorm layers as not trainable
        layer.trainable = False

    # let`s see first 10 layers
    for l in pretrained_resnet.layers[:10]:
      print(l.name, l.trainable)

    self.modele.compile(
      optimizer = Adam(0.00001), # fine tuning requires very little learning rate
      loss = 'categorical_crossentropy',
      metrics = ['accuracy']
    )
    print(self.modele.summary())

  def _callbacks(self) -> None:
    ## Callbacks
    self.callbacks = []
    # Best Model Accuracy
    bestmodel_callback = ModelCheckpoint(filepath=self.fichier_meilleure_precision, verbose=1, monitor='accuracy', save_best_only=True)
    self.callbacks.append(bestmodel_callback)
    # Best Model Val Accuracy
    valmodel_callback = ModelCheckpoint(filepath=self.fichier_meilleure_validation, verbose=1, monitor='val_accuracy', save_best_only=True)
    self.callbacks.append(valmodel_callback)
    # EarlyStopping
    early_stop = EarlyStopping(monitor="val_loss", patience=5)
    self.callbacks.append(early_stop)
    # Reduce plateau
    reduce_plateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='min')
    self.callbacks.append(reduce_plateau)


  def entrainement_modele(self):
    self.history = self.modele.fit(
      self.train_images,
      steps_per_epoch = len(self.train_images),
      validation_data = self.validation_images,
      validation_steps = len(self.validation_images),
      epochs=self.nbr_epochs,
      callbacks=self.callbacks
    )
    self.modele.save_weights('./checkpoints/my_checkpoint2')

  def affichage_resultat_entrainement(self):
    tr_acc = self.history.history['accuracy']
    tr_loss = self.history.history['loss']
    val_acc = self.history.history['val_accuracy']
    val_loss = self.history.history['val_loss']
    index_loss = np.argmin(val_loss)
    val_lowest = val_loss[index_loss]
    index_acc = np.argmax(val_acc)
    acc_highest = val_acc[index_acc]
    Epochs = [i+1 for i in range(len(tr_acc))]
    loss_label = f'best epoch= {str(index_loss + 1)}'
    acc_label = f'best epoch= {str(index_acc + 1)}'

    # Plot training history
    plt.figure(figsize= (20, 8))
    plt.style.use('fivethirtyeight')

    plt.subplot(1, 2, 1)
    plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
    plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
    plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout
    plt.show()

act = AnimalsClassificationEntrainement()
act.preparation()
act.entrainement_modele()
act.affichage_resultat_entrainement()


Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/zebra
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/wombat
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/woodpecker
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/turtle
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/squirrel
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/starfish
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/whale
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/turkey
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/tiger
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/swan
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/wolf
Chargement du dossier: /content/drive/MyDrive/Data/TP2/Dataset/animals-4/squid
Chargement du dossier: /content/drive/My

KeyboardInterrupt: 



---

END

---



---

