**Deep Learning Classification**

Step : 
- Data Preparation : create the patch and associate each patch to its corresponding groundtruth
- Data Preprocessing : standardization of the patch
- Create the U-net model 
- Split the date between Training, Validation and Test 
- Train models : two distinct models can be train one on NDVI images and the other on Multispectral images
- Test the model : model could be test on other spatial or temporal data to evaluate the robustness of it
- Compare Results obtain

**1) Data Preparation**

In [2]:
#!pip install -r requirements.txt
from tqdm import tqdm
import subprocess
from PIL import Image
import numpy as np
import rasterio as rio
import pandas as pd
import os
from torchvision import transforms
from torchvision.utils import save_image

from utils import *
from unet_whole_model import *
from dataset import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
image_dir = "./image_PeelRiver/"
output_dir = "./PeelRiver/"
gt_dir = "./gt_PeelRiver/"
gt_output_dir = "./PeelRiver/gt/"
band_order = [3, 2, 1, 4] 
patch_size = 256


Function to create patch from the images

In [63]:
def open_image(image_path): 
    # Ouvrir le fichier TIFF avec rasterio
    with rio.open(image_path) as img:
        # Lire les bandes spécifiées
        blue, green, red, nir = img.read(band_order)
    return blue, green, red, nir

def extract_patches(blue, green, red, nir):
    blue_patches = []
    green_patches = []
    red_patches = []
    nir_patches = []
    height, width = blue.shape
    for y in range(0, height - patch_size + 1, patch_size):
        for x in range(0, width - patch_size + 1, patch_size):
            # Extraire les patchs pour chaque bande
            blue_patch = blue[y:y+patch_size, x:x+patch_size]
            green_patch = green[y:y+patch_size, x:x+patch_size]
            red_patch = red[y:y+patch_size, x:x+patch_size]
            nir_patch = nir[y:y+patch_size, x:x+patch_size]

            # Correction de la transparence
            blue_patch = np.nan_to_num(blue_patch)
            green_patch = np.nan_to_num(green_patch)
            red_patch = np.nan_to_num(red_patch)
            nir_patch = np.nan_to_num(nir_patch)

            
            # Ajouter les patchs normalisés aux listes
            blue_patches.append(blue_patch)
            green_patches.append(green_patch)
            red_patches.append(red_patch)
            nir_patches.append(nir_patch)
    
    blue_patches = np.array(blue_patches)
    green_patches = np.array(green_patches)
    red_patches = np.array(red_patches)
    nir_patches = np.array(nir_patches)

    return blue_patches, green_patches, red_patches, nir_patches

def save_patch_as_png(patch_data, output_path):
    patch_data_standardized = (patch_data - np.mean(patch_data)) / np.std(patch_data)
    patch_data= (patch_data_standardized - np.min(patch_data_standardized)) / (np.max(patch_data_standardized) - np.min(patch_data_standardized)) 
        # Convertir les données en valeurs uint8
    image_uint8 = (patch_data*255).astype('uint8')
    patch_data_rgb = image_uint8[..., [2, 1, 0, 3]] 
    # Créer l'image à partir du tableau NumPy avec toutes les bandes
    image = Image.fromarray(patch_data_rgb, mode='CMYK')
    
    # Enregistrer l'image au format TIFF
    image.save(output_path, format='TIFF')


In [64]:
# Liste des noms de fichiers dans le répertoire
image_files = os.listdir(image_dir)

date=0
# Boucle sur chaque fichier d'image
for image_file in image_files:
    # Chemin complet de l'image
    image_path = os.path.join(image_dir, image_file)
    
    # Ouvrir l'image avec les fonctions fournies
    blue, green, red, nir = open_image(image_path)
    
    # Extraire les patchs de l'image
    blue_patches, green_patches, red_patches, nir_patches = extract_patches(blue, green, red, nir)


    image_name = f'date{date}'
    image_output_dir = os.path.join(output_dir, image_name)
    os.makedirs(image_output_dir, exist_ok=True)

    # Sauvegarder chaque patch en tant que fichier PNG
    for i, (blue_patch, green_patch, red_patch, nir_patch) in enumerate(zip(blue_patches, green_patches, red_patches, nir_patches), 1):
        # Créer un nom de fichier unique pour chaque patch
        output_patch_path =os.path.join(image_output_dir, f"patch_{date}_{i}.tif")
        
        # Créer un tableau 3D pour le patch composé des 4 bandes
        patch_data = np.stack((blue_patch, green_patch, red_patch, nir_patch), axis=2)
    
        # Convertir et sauvegarder le patch en PNG
        save_patch_as_png(patch_data, output_patch_path)
    
    date+=1
        

In [78]:
def open_gt(gt_path): 
    with rio.open(gt_path) as img:
        gt= img.read(1)  
    return gt

def extract_gt_patches(gt, patch_size):
    gt_patches = []
    height, width = gt.shape

    # Vérifier que la taille du patch est valide par rapport aux dimensions de l'image
    if height < patch_size or width < patch_size:
        raise ValueError("La taille du patch est plus grande que les dimensions de l'image")

    for y in range(0, height, patch_size):
        for x in range(0, width, patch_size):
            # Extraire le patch de la groundtruth
            patch = gt[y:y+patch_size, x:x+patch_size]

            # Vérifier si le patch a la même forme que les autres patchs
            if patch.shape != (patch_size, patch_size):
                # Si la forme du patch est incorrecte, passer au patch suivant
                continue

            gt_patches.append(patch)

    gt_patches = np.array(gt_patches)

    return gt_patches

def save_gt_patch_as_png(patch_data, output_path):
    # Convertir les valeurs booléennes en entiers (0 ou 255)
    patch_data_uint8 = (patch_data * 255).astype('uint8')

    # Créer l'image à partir du patch
    patch_image = Image.fromarray(patch_data_uint8, mode='L')  # Mode 'L' pour une image en niveaux de gris

    # Sauvegarder l'image au format PNG
    patch_image.save(output_path)

In [79]:
# Exemple d'utilisation
groundtruth_path = gt_dir + 'labelled_image_PeelRiver.tif'

# Ouvrir l'image groundtruth
gt = open_gt(groundtruth_path)


patches = extract_gt_patches(gt, patch_size)

# Enregistrer chaque patch en tant qu'image PNG
for i, patch in enumerate(patches, 1):
    # Créer un nom de fichier unique pour chaque patch
    output_patch_path = os.path.join(gt_output_dir, f"gt_13_{i}.tif")
    
    # Sauvegarder le patch en tant qu'image PNG
    save_gt_patch_as_png(patch, output_patch_path)

**2) Loading all the patches**

We store the patches in pytorch datasets.

In [4]:
file_path = "./PeelRiver"
data_paths = {}

#build paths for each pipeline

for date in range(0,13):
    data_paths[f"date{date}"] = f"{file_path}/date{date}"
data_paths["gt"] = f"{file_path}/gt"
print(data_paths)

{'date0': './PeelRiver/date0', 'date1': './PeelRiver/date1', 'date2': './PeelRiver/date2', 'date3': './PeelRiver/date3', 'date4': './PeelRiver/date4', 'date5': './PeelRiver/date5', 'date6': './PeelRiver/date6', 'date7': './PeelRiver/date7', 'date8': './PeelRiver/date8', 'date9': './PeelRiver/date9', 'date10': './PeelRiver/date10', 'date11': './PeelRiver/date11', 'date12': './PeelRiver/date12', 'gt': './PeelRiver/gt'}


In [6]:
all_data = {key : load_paths_data(path) for key, path in data_paths.items()}

Loading data: 100%|██████████| 99/99 [00:00<00:00, 98607.48it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 42799.02it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 48702.33it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 49633.77it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 49479.99it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 44953.57it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 47799.71it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 99291.27it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 41947.28it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 98420.50it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 99243.81it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 97979.26it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 49462.31it/s]
Loading data: 100%|██████████| 99/99 [00:00<00:00, 99220.09it/s]


In [7]:
from collections import defaultdict
grouped_paths = defaultdict(list)

# Parcourir le dictionnaire all_data
for date, patches in all_data.items():
    for patch_index, patch_info in enumerate(patches, start=1):
        # Récupérer le chemin d'accès au patch
        patch_path = patch_info['patch_path']
        patch_path = patch_path.replace('\\', '/')
        # Ajouter le chemin d'accès au patch dans le groupe correspondant à son index de patch
        grouped_paths[patch_index].append(patch_path)

print(grouped_paths)

defaultdict(<class 'list'>, {1: ['./PeelRiver/date0/patch_0_1.tif', './PeelRiver/date1/patch_1_1.tif', './PeelRiver/date2/patch_2_1.tif', './PeelRiver/date3/patch_3_1.tif', './PeelRiver/date4/patch_4_1.tif', './PeelRiver/date5/patch_5_1.tif', './PeelRiver/date6/patch_6_1.tif', './PeelRiver/date7/patch_7_1.tif', './PeelRiver/date8/patch_8_1.tif', './PeelRiver/date9/patch_9_1.tif', './PeelRiver/date10/patch_10_1.tif', './PeelRiver/date11/patch_11_1.tif', './PeelRiver/date12/patch_12_1.tif', './PeelRiver/gt/gt_13_1.tif'], 2: ['./PeelRiver/date0/patch_0_2.tif', './PeelRiver/date1/patch_1_2.tif', './PeelRiver/date2/patch_2_2.tif', './PeelRiver/date3/patch_3_2.tif', './PeelRiver/date4/patch_4_2.tif', './PeelRiver/date5/patch_5_2.tif', './PeelRiver/date6/patch_6_2.tif', './PeelRiver/date7/patch_7_2.tif', './PeelRiver/date8/patch_8_2.tif', './PeelRiver/date9/patch_9_2.tif', './PeelRiver/date10/patch_10_2.tif', './PeelRiver/date11/patch_11_2.tif', './PeelRiver/date12/patch_12_2.tif', './PeelRiv

**Creating batches :**  when you train a deep learning model, you don't usually run each sample individually through the neural network. Instead, you divide them into small groups called batches and run each batch through the network at the same time.
We choose first to create 99 batch, each batch containing 13 patch of the same place at different time.  

MA BE I CAN IMPLEMENT THIS IN THE DATASET.py FILE

In [8]:
# Créer un dictionnaire pour stocker les chemins d'accès aux images et aux groundtruths
images_paths_dict = defaultdict(list)
groundtruths_paths_dict = defaultdict(list)

# Parcourir le dictionnaire patch_paths_dict
for index, paths in grouped_paths.items():
    for path in paths:
        if "gt" in path:
            for _ in range(13):
                groundtruths_paths_dict[index].append(path)
        else:
            images_paths_dict[index].append(path)


In [9]:
dataset = PatchesDataset(images_paths_dict, groundtruths_paths_dict, "PeelRiver")

In [15]:
modelname1 = "no_augmentation_model"
lr = 8e-4
n_epochs = 15
no_augmentation_model = Model(modelname1, lr = lr)
no_augmentation_model.train(dataset, num_epochs= n_epochs)
no_augmentation_model.plot_history()

From initial model:
Epoch: 0/15 Train_loss: -3.03322, Train_F1: 0.00000, Train_Accuracy: 0.053899262717407, Val_loss: -2.72663, Val_F1: 0.00000, Val_Accuracy: 0.04187


KeyboardInterrupt: 