# <!-- TITLE --> __```Exploration des données + Data Viz```__
<img width="1000px" height="400px" src="images/out.png"></img>

####  __```Importations des modules Python```__

In [None]:
%matplotlib inline 

In [None]:
import random
import pandas as pd  
import numpy  as np 
import seaborn as sns 
import tensorflow as tf 
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sklearn.preprocessing import LabelEncoder
import cv2


#### __```Importations des modules Python externes```__

In [None]:
from modules_python.file_extraction.extraction import read_plant_datasets
from modules_python.config.cmap import color_map as cmap
from modules_python.config.plot_images import plot as plot_img
from modules_python.image_processing.SemanticImage import SemanticImage
from modules_python.plots.plot import hist_hist_plot, hist_bar_plot, hist_pie_plot, boxplot
from modules_python.image_processing.preprocessing import  filter_selection
from modules_python.image_processing.data_aug import data_augmenter_v1 as data_aug
from modules_python.image_processing.ImageSeg import ImageSegmentation
from modules_python.image_processing.tools import change_bg
from modules_python.image_processing.preprocessing import Semantic_Image_Plus_Data_Augment
from modules_python.models.cnn.layers import convNet_m1
from modules_python.models.cnn.layers import compilation 
from modules_python.image_processing.data_aug import data_augmenter_v2



#### __```Liste des Couleurs disponibles```__

In [None]:
# liste de couleurs
python_colors = list(mcolors.CSS4_COLORS.keys())

#### __```Liste des Couleurs de mappage```__

In [None]:
# Mappage des couleurs
python_colors_map = cmap().get_cmap_list()

#### __```Définir le chemin d'accès du dataset```__

In [None]:
# Mettre son chemin ici, différent pour tout le monde (Iréné)
PATH = "C:\\Users\\amib\Documents\\Py_Projects\\DS_project\\especes"

#### __```Extraction & Lecture des données```__

- Plus de détails sur les différents paramètres [ici](https://github.com/amiehe-essomba/Plant_Seedlings_ds_Project/blob/Plant_Seedlings/details.md)
- Source des données : [Kaggle V2 Plant Seedlings Dataset](https://www.kaggle.com/datasets/vbookshelf/v2-plant-seedlings-dataset)

In [None]:
# nombres d'espèces de plantes
samples         = 12

# pas
pas             = 1

# extraction de tout le dataset (samples = 12 et pas = 1)
type_indexes    = [x for x in range(0, samples, pas) ]

# type de filtre utilisé
channel_type    = "RGR2-LAB"

# fenêtres de redimensionnement 
reshape         = [(128, 128)]

# format de données
return_as       = "dict"

# verbose
verbose         = 1

# ajout de l'intensité lumineuse sur les 3 canaux
add_contrast    = False 

# début de l'extraction des fichiers
DATA            = read_plant_datasets(
                        path        = PATH, 
                        reshape     = reshape, 
                        return_as   = return_as, 
                        verbose     = verbose, 
                        type_indexes= type_indexes, 
                        channel_type= channel_type,
                        add_contrast= add_contrast
                        )

#### __```Chargement des données```__

In [None]:
# selection de la taille des image
shape           = reshape[0]

# extraction & chargement (EL)
data            = DATA[f"{shape[0]}x{shape[1]}"].copy()

In [None]:
# definir la légende (noms des plantes)
legend          = data['feature_names']

# définir les coleurs pour chaque plantes du dataset
colors          = ["blue", 'orange', 'darkgreen', 'darkred', 'm', "c", "lime", "k", "y", "violet", "gold", "darkblue" ]
# le choix des couleurs peut également de faire comme suite 
#colors = random.sample(python_colors, samples) 

#### __```Visualisation des différentes espèces de plantes en RBG et sous Infra-Rouge```__

In [None]:
"""
Section de visualisation sur le canal 1.
à Savoir que, pour une bonne représentation des coleurs toutes les images ont 3 canaux (RGB2-LAB)
"""

# choisir l'unique valeur pour toutes les plantes (image uinique)
index           = 10

for s in ['images', 'X']:
    # choisir le canal de couleur (axis=1)
    channel         = 1

    # types d'images {X : RGR2-LAB, images : images réelles}
    type_img        = s

    # indice de la couleur de mappage (2D)
    id_cmap         = 10
    
    colors = ['k' for x in range(12)]
    # visualisation des images dans l'infra-rouge
    plot_img(
        data        = data,                         # both
        index       = index,                        # both
        channel     = channel,                      # both 
        colors      = colors,                       # both (title's colors)
        legend      = legend,                       # both 
        type_img    = type_img,                     # both 
        cmap        = python_colors_map[id_cmap],   # fig bottom (RGR2-LAB)
        save        = True,                         # fig bottom (RGR2-LAB)
        fig_name    =  "rgb.png"                    # fig top (RGB)
        )


#### __```Analyses des Images```__
- *[Intialisation des paramètres]()*

In [None]:
# Indices associés à chaque espèces de plantes
id_sel      = [1, 3, 5, 7, 9, 11] # [0, 2, 4, 6, 8, 10]#
# Sous legendes
sub_lengend = [legend[q] for q in id_sel]
# Sous coleurs 
# random.sample(python_colors, k=len(id_sel))
sub_colors  = ["darkblue", 'darkorange', 'green', 'red', 'm', 'skyblue']


- *[Hauteur & Largeur (n_H x n_L)]()*

In [None]:

# Hauteurs (n_H)
Hauteus     = [data['height'][i] for i in id_sel]
# Largeurs (n_L)
Largeurs    = [data['width'][i] for i in id_sel] 
# Concatenation
X           = [Hauteus, Largeurs]
# titles 
titles      = ["Histogramme des hauteurs", "Histogrammes de Largeurs"]
# xlabels 
xlabel      = ['Hauteur', "Largeur"]
# ylabels 
ylabel      = ["Population", "Population"]
# figsize   
figsize     = (12, 3)
# bonding box coordinates 
coord   : dict = {"x" : [[10, 250], [10, 250]], 
                        "y" : [[0, 450], [0, 450]], "xmin":[0, 0], "ymin":[0, 0],
                        "xmax" : [250, 250], "ymax" :  [450, 450]}

hist_hist_plot(
    X           = X,                # both 
    legend      = sub_lengend,      # both 
    title       = titles,           # both 
    xlabel      = xlabel,           # both 
    ylabel      = ylabel,           # both 
    colors      = sub_colors,       # both
    figsize     = figsize,          # both 
    bonding_box = True,             # both 
    annot       = True,             # both 
    text        = True,             # both
    coord       = coord,            # both
    y_lim       = [-5, 500],        # both 
    bins        = 8,                # both
    save        = False
    )

- *[Pixellisations & Répartition d'espèces]()*

| __Noms d'espèces__                    | __Nombre de plantes par espèce__ | __RGBA (RGB + canal alpha)__  |__[Total]()__|
|---------------------------------------|----------------------------------|-----------------------------|------|
| __Black-grass__                       | __309__               | __3__|
| __Charlock__                          | __452__               | __0__|
| __Cleavers__                          | __335__               | __0__|
| __Common Chickweed__                  | __713__               | __0__|
| __Common wheat__                      | __253__               | __0__|
| __Fat Hen__                           | __538__               | __0__|
| __Loose Silky-bent__                  | __762__               | __21__|
| __Maize__                             | __257__               | __0__|
| __Scentless Mayweed__                 | __607__               | __0__|
| __Shepherdoco Purse__                 | __274__               | __0__|
| __Small-flowered Cranesbill__         | __576__               | __0__|
| __Sugar beet__                        | __463__               | __0__|
|  __[Total]()__                        | __[5539]()__                     | __[24]()__      |__[12 espèces]()__|

- Valeurs Statistiques

|__Min__        | __Max__     | __Mean__         | __Med__        | __std__       | __Q1__         | __Q3__       | __IQ__        |
|---------------|-------------|------------------|----------------|---------------|----------------|--------------|---------------|
|  __[253]()__  | __[762]()__ | __[462.6]()__    | __[457.5]()__  | __[179.3]()__ | __[300.3]()__  |__[583.8]()__ | __[283.5]()__ |



* Voir [Boxplot](#boxplot)  bellow



- [Boxplot](#boxplot)

In [None]:
num   = pd.Series(data = data['number_of_images'], name='spaces') 
boxplot(X=num, save=False)

In [None]:
# création d'une Series pour stocker le nombre d'images par espèves
num_of_images   = pd.Series(data = [data['number_of_images'][q] for q in id_sel ], name='spaces').values
# gama
gama            = pd.Series(data = data['number_of_images'], name='spaces').values
# pixels
Pixels          = [data['pixels'][q] for q in id_sel]
# titles 
titles          = ["Histogramme de pixelisation", "Nombre de plantes par espèce"]
# xlabels 
xlabel          = ["Pixels (Mpx)", "Encodage"]
# ylabels 
ylabel          = ["Population", "Population"]
# Concatenation
X               = [Pixels, num_of_images]

hist_bar_plot(
    X           = X,                        # both
    figsize     = figsize,                  # both
    colors      = sub_colors,               # both
    legend      = sub_lengend,              # both
    xlabel      = xlabel,                   # fig 1 
    ylabel      = ylabel,                   # fig 1
    titles      = titles,                   # fig 1
    rot         = 45,                       # fig 2
    bb_box      = {"x" : 0.5, "y" : 600},   # fig 1
    legends     = legend,                   # fig 2
    y_lim       = [[-5, 750], [-5, 770]] ,  # both
    encoding    = True,                     # fig 2
    sort        = True,                     # fig 2
    rev         = True,                     # fig 2
    bar_bbox    = (1.1, 0.6, 0.5, 0.5),     # fig 2
    gama        = gama,                     # fig 2
    c           = 'k',                      # fig 2
    bins        = 8,                        # fig 1
    width       = 0.5,                      # fig 2
    save        = False
    )

- *[Longueur/Hauteur & Canaux RGBA(GRB + canal alpha)]()*

In [None]:
def x_y(widths, heights, size : int = 3):
    # cacul du rapport width / height 
    return [np.array(widths[i]) / np.array(heights[i]) for i in range(size)]

In [None]:
bb_box              = {"x":1.02, "y":600}
# création d'une Series pour stocker le nombre d'images par espèves
X_Y                 = x_y(heights=Hauteus, widths=Largeurs, size=len(sub_lengend))
# pixels
Sobels              = [data['sobels'][q] for q in [0, 1, 3, 6, 9, 11]]
# Sobel_legends 
Sobel_legends       = [legend[q] for q in [0, 1, 3, 6, 9, 11]]
# Concatenation
X                   = [X_Y, Sobels]

if      sum(Sobels) == 0: figsize_=(6, 3)
else:   figsize_    = (12, 3)

hist_pie_plot(
    X               = X,                    # both
    legend          = sub_lengend,          # fig 1
    colors          = sub_colors,           # both
    bb_box          = bb_box,               # fig 1
    figsize         = figsize_,             # fig 1
    vline           = True,                 # fig 1 (show vertical line)
    xlabel          = ['Ratio (L/H)', ""],  # fig 1
    ylabel          = ['Population', ""],   # fig 1
    y_lim           = [-5, 800],            # fig 1
    x_lim           = [0.99, 1.1],          # fig 1
    Sobel_legends   = Sobel_legends,        # fig 2
    radius          = 1.2,                  # fig 2 (circle radius)
    explode_id      = [1],                  # fig 2 
    titles          = ['Hitogramme (L/H)'], # fig 1
    v_line          = False,                # fig 1
    save            = False,
    pctdistance     = 0.80
)

- *[Histogrammes de couleurs en RGR2-LAB( 3 canaux )]()*

In [None]:
# Selection d'images RGR2-LAB
images  =  [data['X'][m][index].astype("float32").copy() for m in range(12)]#id_sel

# Visualisations graphiques des histogrammes de couleurs sur les 3 axes [0, 1, 2]  
filter_selection(
    img             = images, 
    names           = legend,  #sub_lengend
    select_index    = [x for x in range(12)], 
    figsize         = (15,24), 
    bins            = 20
    )
plt.show()

#### __```Segmentation d'Images Sémantqiues(SIS) Différentes étapes```__
Comment effectuer la [Segmentation d'Images Sémantiques]()
* [REF. 1](https://fr.wikipedia.org/wiki/Segmentation_d%27image)
* [REF. 2](https://nanonets.com/blog/semantic-image-segmentation-2020/)
* [REF. 3](https://towardsdatascience.com/semantic-segmentation-popular-architectures-dff0a75f39d0)

In [None]:
# seuils de valeurs RGB de l'arrère plan(background) voir ( l'histogramme de couleurs du canal 1 )
threshold   = [0, 0.465]

# seuils max du noir sur une échelle de [0, 255] pour les 3 canaux
upper_color = [30, 30, 30]

# seuils min du noir sur une échelle de [0, 255] pour les 3 canaux
lower_color = [0, 0, 0]

# rayon utilisé pour la dilatation et l'érosion
radius      = 1

# méthode de segmentation  utilisée
method      = "numpy"
# visualisation des images dans l'infra-rouge

In [None]:
cmap = python_colors_map[id_cmap]
bg='all'
SemanticImage(
        data        = data,
        index       = index,
        channel     = channel,
        threshold   = threshold,
        upper_color = upper_color,
        lower_color = lower_color,
        legend      = legend,
        radius      = radius,
        method      = method,
        bg          = bg,
        id_sel      = id_sel, 
        deep_mask   = True,
        kernel      = (2, 2),
        cmap        = cmap,
        figsize     = (15, 12),
        value       = [1, 1, 1]
    )

In [None]:
feature_index   = 1
_image_seg_     = ImageSegmentation(
        img=data['X'][feature_index][index].astype(np.float32).copy(),
        src=data['images'][feature_index][index].astype(np.float32).copy(),
        threshold=threshold, 
        radius=radius,
        shape=shape,
        axis=channel,
        method=method
    )

In [None]:
Data_aug = data_aug()

In [None]:
plt.figure(figsize=(15, 6))
for i in range(18):
    for j in range(1):
        plt.subplot(3, 6, i+j+1)
        if i != 0 : 
            XX_ = Data_aug(_image_seg_)
            XX_ = XX_.numpy().reshape((1, reshape[0][0], reshape[0][1], 3))
        else : 
            XX_ = tf.expand_dims(_image_seg_, 0) 
            plt.title("img originale", color="red")
 
        plt.axis("off")
        plt.imshow(XX_[0]) 

#plt.savefig("./images/img_aug.png")

In [None]:
sep             = 12
srcs            = data['images'][:sep]
imgs            = data['X'][:sep]
target          = data['target'][:sep]              # cible
threshold       = threshold                         # fenetre du vert dans le RGB2-LAB
shape           = (2, 2)                            # nayau de la dilatation + erosion 
radius          = 2                                 # noyau 
dil_and_er      = False                             # vrai si application du deep mask
color           = "img"                           # formats wihite and black 
formats         = "128x128"                         # données traité (160, 160, 3)
feature_names   = data['feature_names'][:sep]       # noms des classes
paths           = data['paths'][:sep]               # chemins des images
resize_dim      = reshape[0] + (3,)                 # reduire la dimension
resize          = False                             # accepter le redimensionnement 
max_per_class   = 3000                              # nombre d'images par classes
use_same_samples=False

In [None]:
new_data_transform = \
    Semantic_Image_Plus_Data_Augment(
    imgs=imgs,
    srcs=srcs,
    target=target,
    feature_names=feature_names,
    threshold=threshold,
    shape=shape,
    radius=radius,
    color=color, 
    upper_color=upper_color,
    lower_color=lower_color,
    paths=paths, 
    max_per_class=max_per_class,
    resize=resize,
    resize_dim=resize_dim,
    use_same_samples=use_same_samples,
    contrast= False
)

In [None]:
import random  
x_w = new_data_transform['X:img']
x_b = new_data_transform['X:black']
idd = random.sample(range(36000), k=1)
#x = [x_b[idd], x_w[idd]]
x = x_w[idd]
t = new_data_transform['target'][idd]
t_name = new_data_transform['feature_names'][idd]

In [None]:
#x[0].shape, x[0].min(), x[0].max(), x[1].min(), x[1].max(), x_b.shape
x.shape, x.min(), x.max(), x_w.shape

In [None]:

fig, axes = plt.subplots(1, 2, figsize=(4, 2))
for i in range(axes.shape[0]):
    axes[i].imshow( x[0] )
    axes[i].axis("off")
    axes[i].set_title(f'{t_name[0]} = {t[0]}', fontsize="small")

In [None]:
data_split = compilation.split_data(
    X=new_data_transform['X:img'],
    y=new_data_transform["target"],
    test_size=0.10,
    normalize=False
)
X_train, X_test, X_dev = data_split['X_train'], data_split['X_test'], data_split["X_dev"]
y_train, y_test, y_dev = data_split['y_train'], data_split['y_test'], data_split['y_dev']

In [None]:
X_test.max(), X_train.max(), X_test.min(), X_train.min()

In [None]:
def conv(input_shape : tuple = (128, 128), classes : int = 12):
    
    # creating the input layer
    inputs = tf.keras.layers.Input(shape=input_shape)

    # première couche de convolution stride = (4,4), padding="valid", kernel=(11, 11), pol_size = (2,2), filters=96
    X = tf.keras.layers.Conv2D(
        filters=32, kernel_size=(9, 9), 
        kernel_initializer="glorot_uniform",
        strides=(2, 2), padding="valid"
        )(inputs)
    # fonction  d'activation 
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # réduction de dimension par 2
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(2, 2)
        )(X)
    X = tf.keras.layers.Dropout(rate=0.4)(X)

    # deuxième couche de convolution stride = (1,1), padding="valid", kernel=(5,5), pol_size = (2,2), filters=256
    X = tf.keras.layers.Conv2D(
        filters=64, kernel_size=(7, 7), 
        kernel_initializer="glorot_uniform",
        strides=(1, 1), padding="valid"
        )(X)
    # fonction  d'activation 
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # réduction de dimension par 2
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(1, 1), padding="valid"
        )(X)
    X = tf.keras.layers.Dropout(rate=0.4)(X)

    # troisième couche de convolution stride = (1,1), padding="valid", kernel=(5,5), pol_size = (1, 1), filters =384
    X = tf.keras.layers.Conv2D(
        filters=128, kernel_size=(3, 3), 
        kernel_initializer="glorot_uniform",
        strides=(1, 1), padding="valid"
        )(X)
    # fonction d'activation
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # conversation de la taille de l'image padding = "same" ---> stride = (1, 1)
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(1, 1), padding="valid"
        )(X)
    X = tf.keras.layers.Dropout(rate=0.3)(X)

    # quatrième couche de convolution stride = (1,1), padding="valid", kernel=(1, 1), 
    # filters =384, pol_size = (1, 1), drop_out = 0.7
    X = tf.keras.layers.Conv2D(
        filters=126, kernel_size=(3, 3), 
        kernel_initializer="glorot_uniform",
        strides=(1, 1), padding="valid"
        )(X)
    # fonction d'activation
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # conversation de la taille de l'image padding = "same" ---> stride = (1, 1)
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(1, 1) 
        )(X)
    # déconnection de 20 % des couches de façon random pour limiter le surapprentissage 
    X = tf.keras.layers.Dropout(rate=0.2)(X)

    # 5eme couche de convolution stride = (1,1), padding="valid", kernel=(1, 1), filters = 512, drop_out = 0.5
    X = tf.keras.layers.Conv2D(
        filters=384, kernel_size=(3, 3), 
        kernel_initializer="glorot_uniform",
        strides=(2, 2), padding="valid"
        )(X)
    # fonction d'activation
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # conversation de la taille de l'image padding = "same" ---> stride = (1, 1)
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(1, 1) 
        )(X)
    X = tf.keras.layers.Dropout(rate=0.4)(X)
    # 6eme couche de convolution stride = (1,1), padding="valid", kernel=(1, 1), filters = 512, drop_out = 0.5
    X = tf.keras.layers.Conv2D(
        filters=512, kernel_size=(3, 3), 
        kernel_initializer="glorot_uniform",
        strides=(1, 1), padding="valid"
        )(X)
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # conversation de la taille de l'image padding = "same" ---> stride = (1, 1)
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(1, 1) 
        )(X)
    X = tf.keras.layers.Dropout(rate=0.5)(X)
    # 7eme couche de convolution 
    """
    X = tf.keras.layers.Conv2D(
        filters=1024, kernel_size=(3, 3), 
        kernel_initializer="glorot_uniform",
        strides=(1, 1), padding="same"
        )(X)
    # fonction d'activation
    X = tf.keras.layers.ReLU()(X)
    X = tf.keras.layers.BatchNormalization(axis=3)(X)
    # conversation de la taille de l'image padding = "same" ---> stride = (1, 1)
    X = tf.keras.layers.MaxPooling2D(
        pool_size=(2, 2), strides=(1, 1) 
        )(X)
    # déconnection de 20 % des couches de façon random pour limiter le surapprentissage 
    X = tf.keras.layers.Dropout(rate=0.5)(X)
    """
    # applatissement 
    X = tf.keras.layers.Flatten()(X)

    # 1ere couche full connected, avec 4096 neurones et relu comme function d'activation
    X = tf.keras.layers.Dense(units= 4096, 
                              activation=tf.keras.activations.relu)(X)
    # déconnection de 50 % de neurone de façon random pour limiter le surapprentissage 
    X = tf.keras.layers.Dropout(rate=0.5)(X)

    # couche de classification (12 classes)
    X = tf.keras.layers.Dense(units=classes, activation=tf.keras.activations.softmax)(X)

    # output 
    outputs = X

    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)

    return model

In [None]:
from keras.layers import Dense, Flatten, AveragePooling2D, MaxPool2D, GlobalAveragePooling2D, Dropout
from keras.layers import Input, Conv2D, Add, BatchNormalization, ReLU, ZeroPadding2D, MaxPooling2D

random_uniform = tf.keras.initializers.random_uniform
constant = tf.keras.initializers.constant
glorot_uniform = tf.initializers.glorot_uniform

def identity_block(X : np.ndarray, filters : list = [], init=glorot_uniform, channels : int = 2):
    shape = X.shape[1:]

    XX = X
  
    #Input(shape = shape)
    X = Conv2D(filters=filters[0], kernel_size=(1, 1), strides=(1,1), 
                     padding='valid', kernel_initializer=random_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = ReLU()(X)

   
    X = Conv2D(filters=filters[1], kernel_size=(channels, channels), strides=(1,1), 
                                        padding='same', kernel_initializer=random_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = ReLU()(X)

    
    X = Conv2D(filters=filters[2], kernel_size=(1,1), strides=(1,1), 
                     padding='valid', kernel_initializer=random_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    
    
    X = Add()([XX, X])
    X = ReLU()(X)
  
    return X

def convolutional_block(X : np.ndarray, filters : list = [], init=random_uniform, channels : int =2, s : int = 1):
   
    XX = X#.copy()
  
    #Input(shape = shape)
    X = Conv2D(filters=filters[0], kernel_size=(1, 1), strides=(s,s), 
                     padding='valid', kernel_initializer=random_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = ReLU()(X)

   
    X = Conv2D(filters=filters[1], kernel_size=(channels, channels), strides=(1,1), 
                                        padding='same', kernel_initializer=random_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    X = ReLU()(X)

    
    X = Conv2D(filters=filters[2], kernel_size=(1,1), strides=(1,1), 
                     padding='valid', kernel_initializer=random_uniform(seed=0))(X)
    X = BatchNormalization(axis=3)(X)
    
    # shortcut block 
    XX = Conv2D(filters=filters[2], kernel_size=(1,1), strides=(s,s), 
                     padding='valid', kernel_initializer=random_uniform(seed=0))(XX)
    XX = BatchNormalization(axis=3)(XX)
    
    X = Add()([XX, X])
    X = ReLU()(X)
  
    return X

def ResNet16(input_shape : tuple = (128, 128, 3), classes : int = 6, training : bool = False, pad : tuple = (3, 3)):
    """
    Stage-wise implementation of the architecture of the popular ResNet50:
    CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> CONVBLOCK -> IDBLOCK*2 -> CONVBLOCK -> IDBLOCK*3
    -> CONVBLOCK -> IDBLOCK*5 -> CONVBLOCK -> IDBLOCK*2 -> AVGPOOL -> FLATTEN -> DENSE 

    Arguments:
    input_shape -- shape of the images of the dataset
    classes -- integer, number of classes

    Returns:
    model -- a Model() instance in Keras
    """
    
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    W, H, C = input_shape
    C = 256

    # Zero-Padding
    X = ZeroPadding2D(padding=pad)(X_input)
    
    # Stage 1
    X = Conv2D(filters=64, kernel_size=(3, 3), strides = (1, 1), kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3)(X)
    X = ReLU()(X)
    X = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X=X, channels = 3, filters = [64, 64, 256], s = 1)
    X = identity_block(X=X, channels=3, filters=[64, 64, 256])
    #X = identity_block(X=X, channels=3, filters=[64, 64, 256])
    
    # `convolutional_block` with correct values of `f`, `filters` and `s` for this stage
    X = convolutional_block(X=X, channels = 3, filters = [128,128,512], s = 2)
    # the 3 `identity_block` with correct values of `f` and `filters` for this stage
    X = identity_block(X=X, channels=3, filters=[128,128,512])
    #X = identity_block(X=X, channels=3, filters=[128,128,512])
    #X = identity_block(X=X, channels=3, filters=[128,128,512])

    # add `convolutional_block` with correct values of `f`, `filters` and `s` for this stage
    #X = convolutional_block(X=X, channels= 3, filters = [256, 256, 1024], s = 2)
    # the 5 `identity_block` with correct values of `f` and `filters` for this stage
    #X = identity_block(X=X, channels=3, filters=[256, 256, 1024])
    #X = identity_block(X=X, channels=3, filters=[256, 256, 1024])
    #X = identity_block(X=X, channels=3, filters=[256, 256, 1024])
    #X = identity_block(X=X, channels=3, filters=[256, 256, 1024])
    #X = identity_block(X=X, channels=3, filters=[256, 256, 1024])

    # add `convolutional_block` with correct values of `f`, `filters` and `s` for this stage
    X = convolutional_block(X=X, channels = 3, filters = [512, 512, 2048], s = 2)
    # the 2 `identity_block` with correct values of `f` and `filters` for this stage
    X = identity_block(X=X, channels=3, filters=[512, 512, 2048])
    #X = identity_block(X=X, channels=3, filters=[512, 512, 2048])

    # AVGPOOL "X = AveragePooling2D()(X)"
    X = MaxPooling2D(pool_size=(2, 2))(X)
    
    ### END CODE HERE
    # output layer
    X = Flatten()(X)
    X = Dense(2048, activation='relu', kernel_initializer = glorot_uniform(seed=0))(X)
    X = Dropout(rate=0.5)(X)
    X = Dense(classes, activation='softmax', kernel_initializer = glorot_uniform(seed=0))(X)
    
    # Create model
    model = tf.keras.models.Model(inputs = X_input, outputs = X)

    return model

In [None]:
input_shape         = reshape[0] + (3, )
n_classes           = 12
batch_size_train    = 128
batch_size_test     = 64
epochs              = 60
model_cnn           = conv(input_shape = input_shape, classes=n_classes)

In [None]:
model_cnn.summary()

In [None]:
model_cnn = compilation.compile(model=model_cnn, Loss="cce", Op='adam', scoring=['accuracy'])

In [None]:

def backup(epoch):

    log_dir = "./embeding_models/logs"
    callback_tf = tf.keras.callbacks.\
        TensorBoard(log_dir=log_dir, histogram_freq=1)

    path_models = "./embeding_models/model-{epoch:04d}.h5"
    callbacks_models = tf.keras.callbacks.\
        ModelCheckpoint(filepath=path_models, verbose=1)
    
    path_best_models = f"./embeding_models/best-{color}-{formats}-model.h5"
    callbacks_best_models = tf.keras.callbacks.\
        ModelCheckpoint(filepath=path_best_models, 
            monitor="val_accuracy", verbose=1, save_best_only=True)
    

    earlystopping = tf.keras.callbacks.\
                EarlyStopping(
                    monitor='val_accuracy',
                    patience=20,
                    min_delta=0.01,
                    verbose=1,
                    restore_best_weights=False
                )
    
    improve_learnin_rate = tf.keras.callbacks. \
        ReduceLROnPlateau(
                    monitor="val_accuracy",
                    factor=0.1,
                    patience=5,
                    cooldown=3,
                    min_delta=0.001,
                    verbose=1
                    )
    
    return callback_tf, callbacks_models, callbacks_best_models, earlystopping, improve_learnin_rate

def compile_(
        model ,
        Loss : str = "cce", 
        Op   : str = "sgd", 
        scoring = ['accuracy']
        ):
   
    model.compile(
        loss=tf.losses.CategoricalCrossentropy() if Loss == "cce" else tf.losses.SparseCategoricalCrossentropy(),
        optimizer=tf.optimizers.SGD(momentum=0.9) if Op == 'sgd' else tf.optimizers.Adam(),
        metrics=scoring
    )
    
    return model

def fit_(
        model, 
        X_train : np.ndarray, 
        X_test  : np.ndarray, 
        y_train : np.ndarray, 
        y_test  : np.ndarray, 
        augment : bool = True, 
        epochs  : int = 40, 
        batch_size_train : int = batch_size_train,
        batch_size_test_or_val : int = batch_size_test,
        subset : str = 'test',
        use_callbacks : bool = True,
        ):

    if use_callbacks:
            callback_tf, callbacks_models, callbacks_best_models, earlystopping, improve_learnin_rate = backup(epoch=epochs)
        
    if augment is True:
        datagen_train = data_augmenter_v2()
        datagen_test = data_augmenter_v2()
        # compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied)
        datagen_train.fit(X_train)
        datagen_test.fit(X_test)

        # fits the model on batches with real-time data augmentation:
        if subset == 'validation' : 
            model.fit(datagen_train.flow(X_train, y_train, batch_size=batch_size_train,
                    subset='training'),
                    validation_data=datagen_test.flow(X_train, y_train,
                    batch_size=batch_size_test_or_val, subset=subset), 
                    epochs=epochs,
                    callbacks=[ 
                        callback_tf, 
                        callbacks_models, 
                        callbacks_best_models,
                        earlystopping,
                        improve_learnin_rate
                        ] if use_callbacks else None
                    )

        elif subset == 'test':
            model.fit(datagen_train.flow(X_train, y_train, batch_size=batch_size_train,
                    subset='training'),
                    validation_data=datagen_test.flow(X_test, y_test,
                    batch_size=batch_size_test_or_val, subset="validation"),
                    epochs=epochs,
                    callbacks=[ 
                        callback_tf, 
                        callbacks_models, 
                        callbacks_best_models,
                        earlystopping,
                        improve_learnin_rate
                        ] if use_callbacks else None
                    )
        else: print("subset should be 'test' ot 'validation'")
    else:
        # do not use data augmentation 
        if subset == 'validation' : 
            model.fit(X_train, y_train,
                    batch_size=batch_size_train,
                    epochs=epochs,
                    validation_data=(X_train, y_train),
                    verbose=1,
                    callbacks=[ 
                        callback_tf, 
                        callbacks_models, 
                        callbacks_best_models,
                        earlystopping,
                        improve_learnin_rate
                        ] if use_callbacks else None
                )
   
        elif subset == 'test':
            model.fit(X_train, y_train,
                    batch_size=batch_size_train,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    verbose=1,
                    callbacks=[ 
                        callback_tf, 
                        callbacks_models, 
                        callbacks_best_models,
                        earlystopping,
                        improve_learnin_rate
                        ] if use_callbacks else None
                )
        else: print("subset should be 'test' ot 'validation'")

    return model

In [None]:
model_cnn = fit_(
            model       = model_cnn,
            X_train     = X_train,
            X_test      = X_test,
            y_train     = y_train,
            y_test      = y_test,
            augment     = False,
            epochs      = epochs,
            batch_size_test_or_val  = batch_size_test,
            batch_size_train        = batch_size_train,
            use_callbacks           = True,
            subset      ="test"
        )

In [None]:
history = model_cnn.history
df = pd.DataFrame(history.history)
display(df.head())
df.to_csv(f'./DataSet/history_{color}_.csv', sep=';')

In [None]:
fig, axes = plt.subplots(1,2, figsize=(12, 3), sharex=True)
df[['loss', 'val_loss']].plot(ax=axes[0], marker='o', title="epoch loss for seedling plants")
df[['accuracy', 'val_accuracy']].plot(ax=axes[1], marker='o', title="epoch accuracy for seedling plants")

for i in range(2):
    axes[i].set_xlabel("epochs")

axes[0].set_ylabel("loss function")
axes[1].set_ylabel("accuracy")
axes[1].set_yticks(np.arange(0, 1.1, 0.1).round(1), np.arange(0, 1.1, 0.1).round(1))
plt.show()

In [None]:
best_cnn_model = tf.keras.models.load_model(f"./embeding_models/best-{color}-{formats}-model.h5")

In [None]:
metrics = best_cnn_model.fit(X_train, y_train)

In [None]:
print(f"loss train = {np.round( metrics.history['loss'][0], 4 )}\nscoring train = {np.round( metrics.history['accuracy'][0], 4)}")

In [None]:
metrics = best_cnn_model.fit(X_test, y_test)
print(f"loss test = {np.round( metrics.history['loss'][0], 4 )}\nscoring test = {np.round( metrics.history['accuracy'][0], 4)}")

In [None]:
y_test_pred = best_cnn_model.predict(X_test)


In [None]:
def metric_evaluations(y_pred : np.ndarray, y_test : np.ndarray, feature_names : list, average : str="micro", shwo_roc : bool = False ):
    from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score, auc
    from sklearn.metrics import f1_score, precision_score, recall_score
    import matplotlib.pyplot as plt 

    y_pred_arg      = np.argmax(y_pred, axis=-1)
    y_test_arg      = np.argmax(y_test, axis=-1)
    y_scores        = np.max(y_pred, axis=-1, keepdims=False)
    fpr             = dict()
    tpr             = dict()
    roc             = dict()
    air_under_curve = dict()
    threshold       = dict()

    if shwo_roc is True:
        plt.figure(figsize=(8, 4))
        
    for i in range(12):
        fpr[i], tpr[i], threshold[i]   = roc_curve(y_test[:, i], y_pred[:, i])
        roc[i]              = auc(fpr[i], tpr[i])
        air_under_curve[i]  = auc(fpr[i], tpr[i])
        if shwo_roc is True:
            plt.plot(fpr[i], tpr[i], label='%s %d (AUC = %0.2f)' % (feature_names[i], i, air_under_curve[i]))
    
    if shwo_roc is True:
        plt.plot([0, 1], [0, 1], color='k', ls='--')
        plt.legend(loc="best", fontsize="small", ncol=2)
        plt.xlabel("1 - specificity (fpr)")
        plt.ylabel("sensitivity (tpr)")
        plt.title("Receiver Operating characteristic (ROC) Curves for seedling classification")
        plt.show()

    
    #confusution matrix 
    cm = confusion_matrix(y_true=y_test_arg, y_pred=y_pred_arg)
    # classification_repport 
    cr = classification_report(y_true=y_test_arg, y_pred=y_pred_arg, target_names=feature_names)
    # f1_score 
    fs = f1_score(y_true=y_test_arg, y_pred=y_pred_arg, average=average)
    #precision 
    pr = precision_score(y_true=y_test_arg, y_pred=y_pred_arg, average=average)
    # recall
    rs = recall_score(y_true=y_test_arg, y_pred=y_pred_arg, average=average)
    #
    mask = y_test_arg != y_pred_arg 

    y_test_arg_error = y_test_arg[mask]
    y_pred_arg_error = y_pred_arg[mask]

    cm_error = confusion_matrix(y_true=y_test_arg_error, y_pred=y_pred_arg_error)

    data = {
        "confusion_matrix" : cm ,
        "classification_report" : cr,
        "roc" : [fpr, tpr, threshold],
        "auc" : air_under_curve,
        "precision" : pr,
        "recall" : rs,
        "f1-score" : fs,
        "mask" : mask,
        "confusion_matrix_error" : cm_error,
        "max_scores" : y_scores
    }

    return data

In [None]:
metrics = metric_evaluations(y_pred=y_test_pred, y_test=y_test, feature_names=feature_names, shwo_roc=False)

In [None]:
print(metrics['classification_report'])

In [None]:
fpr, tpr, thresholds = metrics['roc']
cm, cm_error = metrics['confusion_matrix'], metrics['confusion_matrix_error']

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 4))
sns.heatmap(data=cm, annot=True, cmap="plasma", ax=axes[0], fmt="4d")
sns.heatmap(data=cm_error, annot=True, cmap="plasma", ax=axes[1], fmt="4d")
for i in range(2):
    axes[i].set_xlabel('Predicted Labels')
    axes[i].set_ylabel('True Labels')

axes[0].set_title("Confusion matrix of Prediction : seedling plants")
axes[1].set_title("Confusion matrix of Errors")
plt.show()

In [None]:
def image_transform(
        imgs            : np.ndarray, 
        dim             : tuple     = (64, 64), 
        channel         : int       = 1, 
        radius          : float     = 1, 
        threshold       : list      = [0, 0.46],
        method          : str       = "numpy",
        img_type        : str       = "black",
        show_img        : bool      = False,
        labels          : list      = []
        ):
    
    from skimage.transform import resize 
    from skimage.color import rgba2rgb
    from modules_python.image_processing.tools import get_mask, change_bg, erorsion_and_dilation
    import matplotlib.pyplot as plt 

    all_imgs = []

    f, axes = plt.subplots(1, len(imgs), figsize=(2 * len(imgs), 2))

    for i, img in enumerate(imgs):
        shape = img.shape 
        if shape[-1] >= 4:
            img = rgba2rgb(img)
        
        img_resize          = resize(img, output_shape=dim)
        img_lab             = cv2.cvtColor(img_resize, cv2.COLOR_BGR2LAB)
        img_lab[:, :, 0]    = np.clip(img_lab[:, :, 0] / 255.0, 0, 1)
        img_lab[:, :, 1]    = np.clip((img_lab[:, :, 1] + 128) / 255.0, 0, 1)
        img_lab[:, :, 2]    = np.clip((img_lab[:, :, 2] + 128) / 255.0, 0, 1)
        X                   = img_lab.astype(dtype=np.float32).copy() 
        mask                = get_mask(img=X[..., channel], threshold=threshold, radius=radius, method=method)
        mask                = mask * 1.
        img_black           = img_resize.astype(dtype=np.float32).copy()
        img_black[..., 0]   = img_black[..., 0] * mask * 1.
        img_black[..., 1]   = img_black[..., 1] * mask * 1.
        img_black[..., 2]   = img_black[..., 2] * mask * 1.
        img_white           = img_black.reshape((1, dim[0], dim[1], 3))
        img_white           = change_bg(imgs=img_white, lower_color=lower_color, upper_color=upper_color, value=[1, 1, 1])

        _ = None 

        if img_type =='black': _= img_black
        if img_type =="white": _= img_white[0]
        if img_type == "mask": _= mask 
        if img_type == "orig": _= img_resize
        if img_type == "LAB" : _= img_lab[..., 1]

        if show_img is True:
            if _ is not None:
                axes[i].imshow(_, interpolation="nearest", cmap="plasma")
                if labels : axes[i].set_title(label=labels[i])
                axes[i].axis('off')
            else: pass

        _ = _.reshape((dim[0], dim[1], 3 ))

        all_imgs.append(_)
    plt.show()
    
    return np.array(all_imgs)

In [None]:
def url_img_read( urls : str, show_img : str = False, url_labels : list = []):
    from PIL import Image
    import requests
    from io import BytesIO
    import numpy as np 
    import time

    all_imgs = []

    f, axes = plt.subplots(1, len(urls), figsize=(2 * len(urls), 2))

    for i, url in enumerate(urls):
        image = None
        # Replace 'url' with the URL of the image you want to read

        try:
            start = time.time()
            response = requests.get(url)
            # Check if the request was successful
            if response.status_code == 200:
                # Read the image from the response content
                image_data = BytesIO(response.content)
                image = Image.open(image_data) #Image.open(image_data)

                # You can now work with the 'image' object (e.g., display or process it)
                # For example, you can display the image:
                image = np.array(image).astype(np.float32) / 255 
                if show_img : 
                    axes[i].imshow(image, interpolation="nearest", cmap="plasma")
                    if url_labels: axes[i].set_title(label=url_labels[i])
                    axes[i].axis('off')
                    
                all_imgs.append(image)
            else:
                print(f"Failed to retrieve image. Status code: {response.status_code}")
        except Exception as e:
            print(f"An error occurred: {str(e)}")

    end = time.time()
    plt.show()
    print(f"response time : {np.round( end-start, 4 )}s")

    return all_imgs

In [None]:
urls = [
"https://www.nexles.com/articles/wp-content/uploads/2019/07/Sinapis-arvensis-small-plant-1.jpg",
"https://media.sciencephoto.com/image/c0065348/400wm/C0065348-Cleavers_seedling.jpg",
"https://www.clemson.edu/cafls/research/weeds/weed-id-bio/broadleaf-weeds-parent/broadleaf-weed-seedlings/chickweed-seedling.jpg"
]
url_labels = ['Charlock', "Cleavers", 'Common Chickweed']

IMG = url_img_read(urls=urls, show_img=True, url_labels=url_labels)

In [None]:
IMG_TRAN = image_transform(imgs=IMG, img_type='black', show_img=True, threshold=[0, 0.46], radius=0.6, dim=reshape[0], labels=url_labels)

In [None]:
y_pred_1 = best_cnn_model.predict(IMG_TRAN)
y_pred_1_argmax = np.argmax(y_pred_1, axis=-1)
pred_dict = {"Predicted Label" : [feature_names[i] for i in y_pred_1_argmax], "True Label" : url_labels, "Score" : list(y_pred_1.max(axis=1)) }
pd.DataFrame(pred_dict)

In [None]:
feature_names, target

In [None]:
import tensorboard

In [None]:
decoder = tf.keras.applications.mobilenet_v3 
process_input = decoder.preprocess_input 


class Model_Transfer:
    def __init__(self) -> None:
        self.model = None 
    def build_model(self, shape : tuple = (128, 128, 3), classes : int  = 12) -> any:
        base_model = tf.keras.applications.\
                MobileNetV3Large(input_shape=shape, weights="imagenet", include_top=False) 
        
        inputs = tf.keras.layers.Input(shape=shape)

        X = base_model(inputs, trainable=True)
        X = process_input(X)
        X = tf.keras.layers.BatchNormalization(ax=3)(X)
        X = tf.keras.layers.GlobalAveragePooling2D()(X)
        X = tf.keras.layers.Dropout(rate=0.8)(X)
        X = tf.keras.layers.Dense(units=4096)(X)
        X = tf.keras.layers.Dropout(rate=0.5)(X)
        X = tf.keras.layers.Dense(units=classes)(X)
        X = tf.keras.layers.Activation(activation="softmax")(X)

        outputs = X
        self.model = tf.keras.models.Model(inputs=inputs, ouputs=outputs)

        return self.model 
    
    def compile(self, learning_rate : float = 1e-3, beta_1  :float = 0.9, beta_2 : float = 0.999, loss : str = 'cce') -> None:
        self.model.compile(
            optimizer=tf.keras.optimizers.\
                Adam(learning_rate=learning_rate, beta_1 = beta_1, beta_2=beta_2),
            loss=tf.keras.losses.\
                CategoricalCrossentropy(from_logits=True) if loss == "cce" else tf.keras.losses.\
                    SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy'])
    
    def fit(self, X_train, y_train, X_test, y_test, train_bacth_size : int = 128) -> tuple:
        X_train_gen = data_augmenter_v2()
        X_test_gen  = data_augmenter_v2()

        X_train_gen.fit(X_train, augment=True)
        X_test_gen.fit(X_test, augment=True)

        self.history = self.model.fit(
                        X_train_gen.flow(
                            x = X_train, y = y_train, batch_size=train_bacth_size, seed = 3, subset="training"
                        ),
                        validation_data=X_test_gen.flow(
                            x=X_test, y=y_test, batch_size=32, subset='validation', seed=3
                        ),
                        validation_split = 0.2,
                        steps_per_epoch = int(X_train[0] / train_bacth_size),
                        verbose = 1,
                        callbacks=None
                    )
        
        return self.history, X_train_gen, X_test_gen
    
    def predict(self,  X_gen, top : int = 2, decoding : bool = False, decode = None):

        self.model.trainable = False
        y_pred_decode        = None 

        image_batch, label_batch = next(iter(X_gen))
        image_var = tf.Variable(process_input(image_batch))
        y_pred = self.model.predict(image_var)

        if decoding : y_pred_decode = np.array( decode.decode_predictions(y_pred.numpy(), top=top))

        return (y_pred, y_pred_decode, label_batch, self.model)

In [None]:
feature_names

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf



pd.plotting.autocorrelation_plot()

In [None]:
def plot_components(data, model, images=None, ax=None,
                    thumb_frac=0.05, cmap='gray_r', prefit = False):
    ax = ax or plt.gca()
    
    if not prefit :
        proj = model.fit_transform(data)
    else:
        proj = data
    ax.plot(proj[:, 0], proj[:, 1], '.b')
    
    if images is not None:
        min_dist_2 = (thumb_frac * max(proj.max(0) - proj.min(0))) ** 2
        shown_images = np.array([2 * proj.max(0)])
        for i in range(data.shape[0]):
            dist = np.sum((proj[i] - shown_images) ** 2, 1)
            if np.min(dist) < min_dist_2:
                # On ne montre pas le points trop proches
                continue
            shown_images = np.vstack([shown_images, proj[i]])
            imagebox = offsetbox.AnnotationBbox(
                offsetbox.OffsetImage(images[i], cmap=cmap),
                                      proj[i])
            ax.add_artist(imagebox)

def plot_reduced_image(image, selector):
    mask = selector.get_support()
    #toprint = [image[i] if mask[i] == True else 0 for i in range(sum(mask))]
    fullimg = np.array([])
    cnt = 0
    for i in range(62*47):
        if mask[i]:
            fullimg = np.append(fullimg, image[i-cnt])
        else:
            fullimg = np.append(fullimg, 0)
            cnt += 1
    plt.imshow(fullimg.reshape(62,47), cmap = 'gray')

In [None]:
def plot_word_cloud(text, masque, background_color = "black"):
    # Définir un masque
    mask_coloring = np.array(Image.open(str(masque)))
    # Définir le calque du nuage des mots
    wc = WordCloud(background_color=background_color, max_words=200, 
                   stopwords=stop_words, mask = mask_coloring, 
                   max_font_size=50, random_state=42)
    # Générer et afficher le nuage de mots
    plt.figure(figsize= (20,10))
    wc.generate(text)
    plt.imshow(wc)
    plt.show()

plot_word_cloud(text, "iron.jpg")