In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Téléchargement de la base de données

In [None]:
# Pour récupérer le nombre de classes du training dataset
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
!git clone https://github.com/fabiopereira59/abeilles-cap500

Cloning into 'abeilles-cap500'...
remote: Enumerating objects: 24878, done.[K
remote: Counting objects: 100% (6126/6126), done.[K
remote: Compressing objects: 100% (6121/6121), done.[K
remote: Total 24878 (delta 2), reused 6126 (delta 2), pack-reused 18752[K
Receiving objects: 100% (24878/24878), 242.93 MiB | 17.13 MiB/s, done.
Resolving deltas: 100% (5/5), done.
Checking out files: 100% (24081/24081), done.


# Chargement des données

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
# Paramètres
IMG_SIZE = 224 # pour utiliser ResNet

In [None]:
# Récupération des dataset pour l'entraînement (train, val)
train_ds = keras.utils.image_dataset_from_directory(
    directory='abeilles-cap500/train/',
    labels='inferred',
    label_mode='categorical',
    shuffle = False,
    batch_size=16,
    image_size=(IMG_SIZE, IMG_SIZE))

validation_ds = keras.utils.image_dataset_from_directory(
    directory='abeilles-cap500/val/',
    labels='inferred',
    label_mode='categorical',
    batch_size=16,
    image_size=(IMG_SIZE, IMG_SIZE))

Found 14917 files belonging to 71 classes.
Found 1832 files belonging to 71 classes.


In [None]:
CLASS_NAMES = train_ds.class_names
NB_CLASSES = len(train_ds.class_names)

In [None]:
import numpy as np
import tensorflow as tf
# Normalisation des données de validation
x_val = np.zeros((1832, IMG_SIZE, IMG_SIZE, 3))
y_val = np.zeros((1832, NB_CLASSES))

ind_data = 0
for bx, by in validation_ds.as_numpy_iterator():
  x_val[ind_data:ind_data+bx.shape[0]] = bx
  y_val[ind_data:ind_data+bx.shape[0]] = by
  ind_data += bx.shape[0]

x_val = tf.keras.applications.resnet.preprocess_input(x_val)

# Prédiction

In [None]:
conv_base = keras.applications.resnet.ResNet101(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling=None,
    classes=NB_CLASSES,
)

model = keras.Sequential(
    [
        conv_base,
        layers.GlobalAveragePooling2D(),
        layers.Dense(NB_CLASSES, kernel_regularizer=tf.keras.regularizers.L2(1e-4), activation='softmax')
    ]
)

model.load_weights('/content/drive/MyDrive/Stage2A/cap500/ResNet101/ResNet101_HierarchicalLossV2/Poids/best_model_hierarchicaloss')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f3087f55350>

In [None]:
import pandas as pd
import numpy as np

hierarchie = pd.read_csv("/content/drive/MyDrive/Stage2A/hierarchie_especes71.csv")

species = hierarchie["species"].unique()
nb_species = len(species)

genus = list(hierarchie["genus"].unique())
nb_genus = len(genus)

family = list(hierarchie["family"].unique())
nb_family = len(family)

subfamily = list(hierarchie["subfamily"].unique())
nb_subfamily = len(subfamily)

#hierarchie.set_index("species", inplace=True)
data = pd.read_csv("/content/drive/MyDrive/Stage2A/liste_classes_71.csv")
#data.set_index("species", inplace=True)

species_to_genus = np.zeros((nb_genus, nb_species))
genus_to_subfamily = np.zeros((nb_subfamily, nb_genus))
subfamily_to_family = np.zeros((nb_family, nb_subfamily))
for i in range(nb_species):
  nb_images = data.at[i, "0"]
  # species -> genus
  genus_species = hierarchie.at[i, "genus"]
  ind_genus = genus.index(genus_species)
  species_to_genus[ind_genus, i] = 1

  # genus -> subfamily
  subfamily_species = hierarchie.at[i, "subfamily"]
  ind_subfamily = subfamily.index(subfamily_species)
  genus_to_subfamily[ind_subfamily, ind_genus] = 1

  # subfamily -> family
  family_species = hierarchie.at[i, "family"]
  ind_family = family.index(family_species)
  subfamily_to_family[ind_family, ind_subfamily] = 1

In [None]:
from numpy.ma.core import transpose
from keras import backend as K
import math
import tensorflow as tf

# Définition de la fonction de perte
def Hierarchicaloss(species_to_genus, genus_to_subfamily, subfamily_to_family, batch_size, alpha=0.1):

    def weight(height=1):
      return math.exp(-alpha * height)
    
    def species_loss(y_true, y_pred):
      height = 0
      return weight(height) * K.categorical_crossentropy(y_true, y_pred)
  
    def species_to_genus_loss(y_true, y_pred):
      height = 1
      y_true_genus = K.transpose(tf.raw_ops.MatMul(a=species_to_genus, b=tf.cast(y_true, tf.float64), transpose_b=True))
      y_pred_genus = K.transpose(tf.raw_ops.MatMul(a=species_to_genus, b=tf.cast(y_pred, tf.float64), transpose_b=True))
      return weight(height) * K.categorical_crossentropy(y_true_genus, y_pred_genus), y_true_genus, y_pred_genus
    
    def genus_to_subfamily_loss(y_true, y_pred):
      height = 2
      y_true_subfamily = K.transpose(tf.raw_ops.MatMul(a=genus_to_subfamily, b=y_true, transpose_b=True))
      y_pred_subfamily = K.transpose(tf.raw_ops.MatMul(a=genus_to_subfamily, b=y_pred, transpose_b=True))
      return weight(height) * K.categorical_crossentropy(y_true_subfamily, y_pred_subfamily), y_true_subfamily, y_pred_subfamily
    
    def subfamily_to_family_loss(y_true, y_pred):
      height = 3
      y_true_family = K.transpose(tf.raw_ops.MatMul(a=subfamily_to_family, b=y_true, transpose_b=True))
      y_pred_family = K.transpose(tf.raw_ops.MatMul(a=subfamily_to_family, b=y_pred, transpose_b=True))
      return weight(height) * K.categorical_crossentropy(y_true_family, y_pred_family)

    def HIERARCHICAL_loss(y_true, y_pred):
      loss_species = tf.cast(species_loss(y_true, y_pred), tf.float64)
      loss_genus, y_true_genus, y_pred_genus = species_to_genus_loss(y_true, y_pred)
      loss_subfamily, y_true_subfamily, y_pred_subfamily = genus_to_subfamily_loss(y_true_genus, y_pred_genus)
      loss_family = subfamily_to_family_loss(y_true_subfamily, y_pred_subfamily)
      return (loss_species + loss_genus + loss_subfamily + loss_family)/batch_size
   
    # Return a function
    return HIERARCHICAL_loss

In [None]:
loss=[Hierarchicaloss(species_to_genus, genus_to_subfamily, subfamily_to_family, batch_size=16, alpha=0.5)]

In [None]:
model.compile(tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9), loss=loss, metrics=['categorical_accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [None]:
y_pred_val = model.predict(x_val)

In [None]:
import tensorflow
from tensorflow.keras.metrics import Accuracy
test_accuracy = Accuracy()

prediction = tensorflow.argmax(y_pred_val, axis=1, output_type=tensorflow.int32)
print(prediction)
test_accuracy(prediction, np.argmax(y_val, axis=1))

print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

tf.Tensor([12 61 36 ... 61 40 49], shape=(1832,), dtype=int32)
Test set accuracy: 86.245%


In [None]:
print(y_val)
print(y_pred_val)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[1.8988131e-07 4.0246320e-07 9.9053630e-04 ... 6.0490155e-09
  1.0440071e-07 3.7943482e-07]
 [2.8941504e-04 1.1063877e-05 1.1680530e-03 ... 1.2901146e-06
  2.2328723e-07 1.3338813e-07]
 [6.3271941e-06 6.0010592e-07 1.6446731e-05 ... 1.8462208e-06
  1.2251907e-05 1.8338653e-06]
 ...
 [2.7829394e-06 3.6050099e-06 1.8594823e-03 ... 2.8446277e-07
  4.7238530e-07 4.0825807e-08]
 [4.6376732e-05 9.6714018e-07 3.1675006e-06 ... 3.1019375e-04
  3.8496878e-06 5.1433841e-07]
 [2.6284815e-07 9.5516439e-09 2.2737699e-07 ... 9.5484964e-08
  1.4772080e-08 5.2734097e-09]]


In [None]:
import pandas as pd
def analyse_accuracy_hierarchie(y_pred, y_true, hierarchie):
  accuracy = 0
  accuracy_genus = 0
  accuracy_family = 0
  accuracy_subfamily = 0
  prediction = np.argmax(y_pred, axis=1)
  ground_truth = np.argmax(y_true, axis=1)
  for i in range(len(y_pred)):
    true_species = CLASS_NAMES[ground_truth[i]]
    pred_species = CLASS_NAMES[prediction[i]]

    if (true_species == pred_species):
      accuracy += 1.
      accuracy_genus += 1.
      accuracy_family += 1.
      accuracy_subfamily += 1.
    else:
      true_genus = hierarchie.at[true_species, "genus"]
      true_family = hierarchie.at[true_species, "family"]
      true_subfamily = hierarchie.at[true_species, "subfamily"]
      pred_genus = hierarchie.at[pred_species, "genus"]
      pred_family = hierarchie.at[pred_species, "family"]
      pred_subfamily = hierarchie.at[pred_species, "subfamily"]
      if (true_genus == pred_genus):
        accuracy_genus += 1.
      if (true_family == pred_family):
        accuracy_family += 1.
      if (true_subfamily == pred_subfamily):
        accuracy_subfamily += 1.
      
  accuracy = accuracy/len(y_pred)
  accuracy_genus = accuracy_genus/len(y_pred)
  accuracy_family = accuracy_family/len(y_pred)
  accuracy_subfamily = accuracy_subfamily/len(y_pred)

  print("Accuracy : " + str(accuracy))
  print("Accuracy genus : " + str(accuracy_genus))
  print("Accuracy family : " + str(accuracy_family))
  print("Accuracy subfamily : " + str(accuracy_subfamily))

In [None]:
hierarchie = pd.read_csv('/content/drive/MyDrive/Stage2A/hierarchie_especes71.csv')
hierarchie = hierarchie.set_index("species")

In [None]:
analyse_accuracy_hierarchie(y_pred_val, y_val, hierarchie)

Accuracy : 0.8624454148471615
Accuracy genus : 0.9143013100436681
Accuracy family : 0.9301310043668122
Accuracy subfamily : 0.9263100436681223


In [None]:
test_ds = keras.utils.image_dataset_from_directory(
    directory='abeilles-cap500/test/',
    labels='inferred',
    label_mode='categorical',
    batch_size=16,
    image_size=(IMG_SIZE, IMG_SIZE))

Found 1832 files belonging to 71 classes.


In [None]:
import numpy as np
import tensorflow as tf
# Normalisation des données de validation
x_test = np.zeros((1832, IMG_SIZE, IMG_SIZE, 3))
y_test = np.zeros((1832, NB_CLASSES))



ind_data = 0
for bx, by in test_ds.as_numpy_iterator():
  x_test[ind_data:ind_data+bx.shape[0]] = bx
  y_test[ind_data:ind_data+bx.shape[0]] = by
  ind_data += bx.shape[0]

x_test = tf.keras.applications.resnet.preprocess_input(x_test)

In [None]:
y_pred_test = model.predict(x_test)

In [None]:
analyse_accuracy_hierarchie(y_pred_test, y_test, hierarchie)

Accuracy : 0.8493449781659389
Accuracy genus : 0.9082969432314411
Accuracy family : 0.9328602620087336
Accuracy subfamily : 0.9252183406113537
