# Notebook to help the main come to life 

In [None]:
########## IMPORTS ##########

# common
import rasterio
import numpy as np
import matplotlib.pyplot as plt

# OTSU
from skimage.filters import threshold_otsu 

# filtering 
from scipy.ndimage import uniform_filter
from skimage.measure import label, regionprops 

# shapefile_to_mask
import geopandas as gpd
from rasterio.features import rasterize

# test & display
from pprint import pprint
import itertools
import traceback

In [None]:
## BANDS RECUPERATION ##

# TODO

# === paths to both imagess ===

path_img1 = r"C:\Users\gbonlieu\Documents\herramienta\change_detection_tool\data\preprocessed\t6\t6_pre_sigma.tif"
path_img2 = r"C:\Users\gbonlieu\Documents\herramienta\change_detection_tool\data\preprocessed\t6\t6_post_sigma.tif"

# === bands reading and opening  === 

with rasterio.open(path_img1) as src1:  # pre image  
    img1 = src1.read()         # shape = (nb_bands, height, width)
    profile1 = src1.profile    # metadata 

with rasterio.open(path_img2) as src2: # post image
    img2 = src2.read()
    profile2 = src2.profile

## TODO 

# Tests 

# === quick print for verification ===
print("Image 1 :", img1.shape, "bands =", profile1["count"])
print("Image 2 :", img2.shape, "bands =", profile2["count"])
print("Number of NaN :", np.isnan(img1).sum())


In [None]:
########## NODATA HANDLING ##########
 
def to_nan(arr: np.ndarray, nodata_values=(-9999, -32768, -3.4028235e38)) -> np.ndarray:
    """
    Replace NoData values by NaN.

    Parameters
    ----------
    arr : np.ndarray
        Input array (any dtype).
    nodata_values : tuple of numbers, optional
        Values to treat as NoData and convert to NaN.

    Returns
    -------
    np.ndarray
        Float32 array with the same shape as `arr`, where all nodata_values
        have been replaced by NaN.
    """
    out = arr.astype("float32", copy=True) # we cast to float32 so that NaN is representable

    for nd in nodata_values:
        np.putmask(out, out == nd, np.nan) # np.putmask(array, mask, value) replaces array[mask] by value

    return out

# TODO 

img1=to_nan(img1)
img2=to_nan(img2)

# TODO  

# Tests 

# === quick print for verification ===
print("Image 1 :", img1.shape, "bands =", profile1["count"])
print("Image 2 :", img2.shape, "bands =", profile2["count"])
print("Number of NaN :", np.isnan(img1).sum())


In [None]:
########## DIMENSIONS ALIGNMENT BY PADDING ##########


def pad_right(img: np.ndarray, ncols: int = 1, fill_value: float = np.nan) -> np.ndarray:
    """
    Add one or several columns of pixels to the RIGHT side of an image.

    Parameters
    ----------
    img : np.ndarray
        3D (C, H, W) image (even if one band with rasterio opening 3D image with C=1)
    ncols : int, optional
        Number of columns to add (default 1).
    fill_value : float, optional
        Value used to fill the new pixels (NaN by default).

    Returns
    -------
    np.ndarray
        New image with `ncols` extra columns on the right.
    """
    if img.ndim == 3:
        C, H, W = img.shape
        pad_width = ((0, 0), (0, 0), (0, ncols)) # padding on width axis only: (C), (H), (W)
    else:
        raise ValueError("img must be 3D (C, H, W)")

    img_padded = np.pad(img, pad_width=pad_width, mode="constant", constant_values=fill_value)

    return img_padded


def pad_bottom(img: np.ndarray, nrows: int = 1, fill_value: float = np.nan) -> np.ndarray:
    """
    Add one or several rows of pixels at the BOTTOM of an image.

    Parameters
    ----------
    img : np.ndarray
        3D (C, H, W) image (even if one band with rasterio opening 3D image with C=1)
    nrows : int, optional
        Number of rows to add (default 1).
    fill_value : float, optional
        Value used to fill the new pixels (NaN by default).

    Returns
    -------
    np.ndarray
        New image with `nrows` extra rows at the bottom.
    """
    if img.ndim == 3:
        C, H, W = img.shape
        pad_width = ((0, 0), (0, nrows), (0, 0)) # padding on height axis only: (C), (H), (W)
    else:
        raise ValueError("img must 3D (C, H, W)")

    img_padded = np.pad(img, pad_width=pad_width, mode="constant", constant_values=fill_value)
    
    return img_padded


def align_by_padding(img1: np.ndarray, profile1: dict, img2: np.ndarray, profile2: dict, fill_value: float = np.nan, check_max_diff: bool = True) -> tuple[np.ndarray, np.ndarray]:
    """
    Align two images by adding columns (to the RIGHT) and/or rows (at the BOTTOM) to the smaller one so that both have the same size (H, W).

    We do not modify the profile here, otherwise we would also have to update
    the affine transform, which is not trivial . Later in the pipeline we will crop back to the original size to retrurn to img1's format

    Parameters
    ----------
    img1 : np.ndarray     First image (2D or 3D).
    profile1 : dict       Rasterio profile for the first image.
    img2 : np.ndarray     Second image (2D or 3D).
    profile2 : dict       Rasterio profile for the second image.
    fill_value : float, optional      Value used for padding (NaN by default).
    check_max_diff : bool, optional   If True, raise an error if the initial height/width difference is greater than 1 row or 1 column.

    Returns
    -------
    img1_out : np.ndarray First image after padding (if any).
    img2_out : np.ndarray Second image after padding (if any).
    """

    # ----- Check that the two profiles are identical EXCEPT for keys directly or indirectly related to size -----

    keys_to_ignore = {"transform", "height", "width", "blockxsize", "blockysize"}

    for key in profile1:
        if key in keys_to_ignore:
            continue  # skips directly to the next key
        if key not in profile2:
            raise ValueError(
                f"[ERROR] align_by_padding: key '{key}' is missing in the second profile.")
        if profile1[key] != profile2[key]:
            raise ValueError(
                f"[ERROR] align_by_padding: profiles differ on key '{key}'.\n"
                f"profile1[{key!r}] = {profile1[key]!r}\n"
                f"profile2[{key!r}] = {profile2[key]!r}\n"
                "Images are not spatially compatible for spatial alignment."
            )

    # ----- Helper to get current H, W -----
    def hw(arr: np.ndarray) -> tuple[int, int]:
        return arr.shape[-2], arr.shape[-1]  # (H, W)

    H1, W1 = hw(img1)
    H2, W2 = hw(img2)

    # Differences (positive => img2 larger than img1)
    dH = H2 - H1
    dW = W2 - W1

    if check_max_diff and (abs(dH) > 1 or abs(dW) > 1):
        raise ValueError(f"Size difference greater than 1 row/column: dH={dH}, dW={dW}")

    img1_out, img2_out = img1, img2

    # ----- Align height (rows): pad at the BOTTOM -----
    if dH > 0:
        # img1 is smaller in H -> pad img1
        img1_out = pad_bottom(img1_out, nrows=dH, fill_value=fill_value)
        H1 = H2  
    elif dH < 0:
        # img2 is smaller in H -> pad img2
        img2_out = pad_bottom(img2_out, nrows=-dH, fill_value=fill_value)
        H2 = H1  

    # ----- Align width (columns): pad to the RIGHT -----
    if dW > 0:
        # img1 is smaller in W -> pad img1
        img1_out = pad_right(img1_out, ncols=dW, fill_value=fill_value)
        W1 = W2
    elif dW < 0:
        # img2 is smaller in W -> pad img2
        img2_out = pad_right(img2_out, ncols=-dW, fill_value=fill_value)
        W2 = W1

    return img1_out, img2_out



# TODO 

img1, img2 = align_by_padding(img1, profile1, img2, profile2)

# TODO  

# Tests 

# === quick print for verification ===
print("Image 1 :", img1.shape, "bands =", profile1["count"])
print("Image 2 :", img2.shape, "bands =", profile2["count"])
print("Number of NaN :", np.isnan(img1).sum())

In [None]:
########## CLIPPING AND NORMALIZATION OF BANDS ########## 

def clip_percentiles(img: np.ndarray, p_low: float = 1, p_high: float = 99) -> np.ndarray:
    """
    Clips extreme values of each band between percentiles p_low and p_high.
    Input: array (nb_bands, height, width)
    Output: array of the same shape
    Handles NaN values
    """
    out = np.empty_like(img, dtype=float)

    for i in range(img.shape[0]):
        band = img[i]
        low = np.nanpercentile(band, p_low)
        high = np.nanpercentile(band, p_high)
        out[i] = np.clip(band, low, high)

    return out


def normalize_band(arr: np.ndarray) -> np.ndarray:
    """
    Normalizes a SINGLE band.
    Handles NaN values
    """
    arr_min = np.nanmin(arr)
    arr_max = np.nanmax(arr)

    if arr_max == arr_min:
        return np.zeros_like(arr, dtype=float)

    return (arr - arr_min) / (arr_max - arr_min)


def normalize_image(img: np.ndarray) -> np.ndarray:
    """
    Normalize a multi-band image: calls normalize_band independently for each band (otherwise we would normalize different bands at the same time)
    """
    img_norm = np.empty_like(img, dtype=float) # creates empty image of same shape
    for i in range(img.shape[0]):
        img_norm[i] = normalize_band(img[i])
    return img_norm

## TODO 

img1 = normalize_image(clip_percentiles(img1))
img2 = normalize_image(clip_percentiles(img2))

## TODO 

# Test 

val_min = np.nanmin(img1) 
val_max = np.nanmax(img1)
print(f"Value min : {val_min}")
print(f"Value max : {val_max}")



In [None]:
########## DISSIMILARITY MATRIX ##########

def dissimilarity(img1: np.ndarray, img2: np.ndarray) -> np.ndarray:
    """
    Gives a similarity index between two images between 0 and 1,
    where 1 = very similar pixels, and 0 = very different pixels
    """

    # Basic verification
    if img1.shape != img2.shape:
        raise ValueError("Both images must have the same shape (bands, height, width)")

    # Vectorized computation of the Euclidean distance
    dist = np.sqrt(np.sum((img1 - img2) ** 2, axis=0))  # sum over the band axis -> for each pixel we compute Euclidean distance between the vectors formed by band values
    dist = dist / (np.sqrt(img1.shape[0])+1)  # if Euclidean distance is 0, dissimilarity = 0, if it is sqrt(nb of bands) (maximum by Pythagorean theorem), dissimilarity = 1
    return dist

## TODO

dist=dissimilarity(img1, img2)

## TODO

# Test


val_min = np.nanmin(dist)  # possible to get 0 even with a lot of bands, if all bands or almost all contain NaNs for certain pixels and same values on the remaining bands
val_max = np.nanmax(dist)
print(f"Value min : {val_min}")
print(f"Value max : {val_max}")
print("Number of NaN :", np.isnan(dist).sum())

figdist, axdist = plt.subplots(figsize=(7,5))
imgdist = axdist.imshow(dist, cmap="gray")
axdist.set_title("Dissimilarity")
figdist.colorbar(imgdist, ax=axdist, label="Pixel values")

path_img1 = r"C:\Users\gbonlieu\Documents\herramienta\change_detection_tool\data\preprocessed\t6\t6_pre_sigma.tif"
output_path = r"C:\Users\gbonlieu\Documents\herramienta\explore\dis.tif"
with rasterio.open(path_img1) as src1:
        img1 = src1.read()
        profile1 = src1.profile
with rasterio.open(output_path, "w", **profile) as dst:
    # On multiplie par 255 si 'final' est entre 0 et 1, puis on convertit
    data_to_write = (dist * 255).astype('uint8')
    dst.write(data_to_write, 1)


In [None]:
########## TILING ##########


def tile_image_2d(img2d: np.ndarray, n: int, fill_value=np.nan):
    """
    Découpe une image 2D (H, W) en n x n tuiles de taille égale.
    Si H ou W ne sont pas multiples de n, on pad avec `fill_value` (en bas/droite), 
    ceci implique que la qualité de l'analyse sur le bord droit et le bord bas est très fortement altérée

    Retour:
      tiles : np.ndarray de shape (n, n, tile_height, tile_width)
      meta  : dict avec infos (tile_height, tile_width, pad_bottom, pad_right)
    """
    assert img2d.ndim == 2, "img2d doit être 2D (H, W)."
    H, W = img2d.shape

    # Tailles de tuiles (on prend ceil pour ne rien perdre → padding si nécessaire)
    tile_height = int(np.ceil(H / n))
    tile_width = int(np.ceil(W / n))

    # Dimensions padées pour tomber exactement sur n * tile
    H_pad = tile_height * n
    W_pad = tile_width * n
    pad_bottom = H_pad - H
    pad_right  = W_pad - W

    # Padding bas/droite
    img_pad = np.pad(img2d,
                     pad_width=((0, pad_bottom), (0, pad_right)),
                     mode="constant",
                     constant_values=fill_value)

    # Reshape → (n, tile_height, n, tile_width) puis permute → (n, n, tile_height, tile_width)  objectif tiles[i;j] → images 2D taille (tile_height, tile_width)

    tiles = img_pad.reshape(n, tile_height, n, tile_width).transpose(0, 2, 1, 3)
    meta = dict(tile_height=tile_height, tile_width=tile_width, pad_bottom=pad_bottom, pad_right=pad_right) # dictionnaire plus pratique pour la reconstruction 

    return tiles, meta

## TODO 

tiles, meta = tile_image_2d(dist, 1)

## TODO 


# Test

tile11 = tile_image_2d(dist, 10)[0][0,0]
figtile1, axtile1 = plt.subplots(figsize=(7,5))  
imgtile1=axtile1.imshow(tile11, cmap="gray") # 
axtile1.set_title("Dissimilarity tile 11")
figtile1.colorbar(imgtile1, ax=axtile1, label="Pixel Values")


In [None]:
########## THRESHOLDING ##########

def otsu_tile(img: np.ndarray, k: float = 1.0, nan_as_bg: bool = True, foreground: str = "high"):
    """
    Global Otsu on a 2D image (values expected in [0, 1]).

    Parameters
    ----------
    img : np.ndarray
        2D image. NaN allowed.
    k : float
        Threshold scaling factor.
    nan_as_bg : bool
        If True, NaN become background in the final mask (background).
        If False, NaN become object in the final mask (foreground).
    foreground : {"high", "low"}
        "high"  -> pixels > threshold are set to 1 ; "low" -> pixels <= threshold are set to 1.
        Here we consider "change" as the object, so usually "high".

    Returns
    -------
    mask : np.ndarray bool
        Binary mask.
    thr : float
        Otsu threshold computed on finite values.
    """
    if img.ndim != 2:
        raise ValueError("Image must be 2D")

    # valid values to estimate the threshold
    valid = img[np.isfinite(img)]
    if valid.size == 0:
        # all NaN: no usable threshold
        raise ValueError("Cannot compute Otsu threshold: the image contains only NaN values.")
    
    vmin, vmax = float(valid.min()), float(valid.max())
    if vmin == vmax:
        # degenerate case: all identical -> threshold = that value
        thr = vmin
    else:
        thr = k * float(threshold_otsu(valid))

    if foreground == "high":
        mask = img >= thr
    elif foreground == "low":
        mask = img <= thr
    else:
        raise ValueError("foreground must be 'high' or 'low'")

    if nan_as_bg:
        mask = np.where(np.isfinite(img), mask, False)  # np.where(condition, value_if_true, value_if_false)
        # more functional (steps more readable, debugging easier) than an in-place transform like np.putmask(mask, ~np.isfinite(img), False)
        # if nan_as_bg is False, then nodata (a non-zero value) becomes True so it is considered as object

    return mask.astype(bool), thr



def apply_otsu_to_tiles(tiles: np.ndarray, k: float = 1.0, *, nan_as_bg: bool = True, foreground: str = "high"):
    """
    Apply otsu_tile on each tile of an array (n, n, tile_h, tile_w)

    Returns
    -------
    masks : (n, n, tile_h, tile_w) bool: Binary mask for each tile
    thr_grid : (n, n) float32: Threshold per tile
    """
    if tiles.ndim != 4:
        raise ValueError("tiles must have shape (n_rows, n_cols, tile_h, tile_w)")

    n_r, n_c, th, tw = tiles.shape
    masks = np.empty_like(tiles, dtype=bool)
    thr_grid = np.empty((n_r, n_c), dtype=np.float32)

    for i in range(n_r):
        for j in range(n_c):
            m, thr = otsu_tile(tiles[i, j], k, nan_as_bg=nan_as_bg, foreground=foreground)
            masks[i, j] = m
            thr_grid[i, j] = thr

    return masks, thr_grid




## TODO

masks, thr_grid = apply_otsu_to_tiles(tiles)

## TODO 

# Test


tile11s, thr = otsu_tile(tile11)
print(f"Thresholds : {thr_grid}")
figtile1s, axtile1s = plt.subplots(figsize=(7,5))  
imgtile1s=axtile1s.imshow(tile11s, cmap="gray") # 
axtile1s.set_title("Dissimilarity tile 11 thresholded")
figtile1s.colorbar(imgtile1s, ax=axtile1s, label="Pixel values")



In [None]:
########## REASSEMBLY ##########

def assemble_tiles_to_image(masks_4d: np.ndarray, meta: dict | None = None) -> np.ndarray:  # meta: dict | None = None, this notation is possible since python 3.11
    """
    Stitches a 4D array (n_rows, n_cols, tile_h, tile_w) back into a single 2D image.

    If `meta` is provided (with pad_bottom and pad_right), the padding added during tiling is removed.

    Parameters
    ----------
    masks_4d : np.ndarray bool
        One mask (2D array) per tile, shape: (n_rows, n_cols, tile_h, tile_w)
    meta : dict, optional
        Dictionary returned by tile_image_2d, dict(tile_height=tile_height, tile_width=tile_width,
        pad_bottom=pad_bottom, pad_right=pad_right)

    Returns
    -------
    mask_2d : np.ndarray bool
        Global 2D mask (without padding if meta is provided)
    """
    if masks_4d.ndim != 4:
        raise ValueError("masks_4d must have shape (n_rows, n_cols, tile_h, tile_w)")

    n_r, n_c, tile_h, tile_w = masks_4d.shape

    # Put tiles back in original order so they can be merged with reshape
    mask_padded = masks_4d.transpose(0, 2, 1, 3).reshape(n_r * tile_h, n_c * tile_w)

    # Remove padding if meta is provided
    if meta is not None:
        pad_bottom = int(meta.get("pad_bottom", 0))  # meta.get("pad_bottom", 0) reads the value if it exists, else 0 by default (no padding)
        pad_right = int(meta.get("pad_right", 0))    # same here
        if pad_bottom or pad_right:  # If at least one of the two is non-zero, we crop
            mask_padded = mask_padded[: mask_padded.shape[0] - pad_bottom,
                                      : mask_padded.shape[1] - pad_right]

    return mask_padded.astype(bool)


## TODO

final = assemble_tiles_to_image(masks, meta)

## TODO 

# Tests

figfin, axfin = plt.subplots(figsize=(7,5))  
imgfin=axfin.imshow(final, cmap="gray") # 
axfin.set_title("Final")
figfin.colorbar(imgfin, ax=axfin, label="Pixel values")



In [None]:
########## FILTERING ##########



def filter_dense_regions(mask: np.ndarray,
                          win_size: int = 30,
                          d: float = 0.5,
                          min_area: int = 3000, closing: bool = False) -> np.ndarray:
    """
    Filters a binary mask to keep only the zones:
      - located in a locally dense white region
      - with a sufficient size (minimum area)

    Parameters
    ----------
    mask : np.ndarray bool 0/1
        Binary image (True/1 = white).
    win_size : int
        Size of the side square window for local density computation (in pixels), if win_size even a convention is used to center the pixel in the window.
    d : float
        Minimum density of white in the window (0-1).
    min_area : int
        Minimum area (in pixels) for kept connected components.
    closing : bool
        If True, keep only pixels both dense AND originally white.

    Returns
    -------
    filt : np.ndarray bool
        Filtered binary mask.
    """

    # Ensure we have a float 0/1 array
    mask_float = mask.astype(float)

    # 1) Local density of white in a window of size win_size
    # mode="nearest" avoids border artifacts by duplicating edge pixels
    local_mean = uniform_filter(mask_float, size=win_size, mode="nearest")
    dense_mask = local_mean >= d  # pixels inside a dense zone

    # Optionally restricts to pixels originally white
    if closing:
        core = dense_mask & (mask_float > 0.5)
    else: core = dense_mask

    # 2) Filter by connected-component size
    labels = label(core, connectivity=2)  # label() converts binary mask in connected components, 8-neighborhood (diagonals included)
    filt = np.zeros_like(labels, dtype=np.uint8)  # GeoTIFF does not support native boolean → convert to uint8

    for region in regionprops(labels):  # regionprops analyses each connected component of label
        if region.area >= min_area:     # region.area = number of pixels in the connected component
            filt[labels == region.label] = True  # region.label = ID of the region

    return filt

## TODO

filt = filter_dense_regions(final, 30, 0.5, 3000, closing=False)

## TODO 

# Tests

figfilt, axfilt = plt.subplots(figsize=(7,5))  
imgfilt=axfilt.imshow(filt, cmap="gray") # 
axfilt.set_title("Filtered")
figfilt.colorbar(imgfilt, ax=axfilt, label="Pixel values")


In [None]:
########## MAIN_DTOD_0 ##########
 
def main_dtod_0(path_img1: str, path_img2: str, n: int, k: float = 1.0, closing: bool = False, p: int = 30, d: float = 0.5, a: int = 4000, out_path: str | None = None) -> tuple[dict, np.ndarray]:
    """
    ...
    If out_path is not None, writes the raster at the given path out_path
    """

    # ==== bands loading ====

    with rasterio.open(path_img1) as src1:
        img1 = src1.read()
        profile1 = src1.profile

    with rasterio.open(path_img2) as src2:
        img2 = src2.read()
        profile2 = src2.profile

    # ==== NaN handling, padding, clipping and normalization ====

    img1 = to_nan(img1)
    img2 = to_nan(img2)
    img1, img2 = align_by_padding(img1, profile1, img2, profile2)

    img1 = normalize_image(clip_percentiles(img1))
    img2 = normalize_image(clip_percentiles(img2))

    # ==== dissimilarity computation ====

    dist = dissimilarity(img1, img2)

    # ==== tiling, thresholding and reassembly ====

    tiles, meta = tile_image_2d(dist, n)

    masks, thr_grid = apply_otsu_to_tiles(tiles, k)

    final = assemble_tiles_to_image(masks, meta)

    # ==== filtering ====

    filt = filter_dense_regions(final, p, d, a, closing)

    # We return to the initial size so that it matches the affine transform of the profile, which would be hard to update correctly; and we update the rest of the profile

    H1 = profile1["height"]
    W1 = profile1["width"]
    
    filt = filt[:H1, :W1]

    profile = profile1.copy()
    profile.update(dtype="uint8", nodata=0, count=1)   # update only what changed in the profile

    # ==== writing ====

    if out_path is not None:
        with rasterio.open(out_path, "w", **profile) as dst:
            dst.write(filt.astype("uint8") * 255, 1)  # ensure we don't write a tif with boolean values directly

    return profile, filt



# TODO

profile, filt = main_dtod_0(path_img1, path_img2, 1, k=1, p=27, d=0.5, a=3000) # out_path=r"C:\Users\gbonlieu\Documents\code_python_outil\outil_detection_changement\ouputs\output_vrac\t3bis_out\test2.tif")

# TODO

# Test

print(f"Height: {main_dtod_0(path_img1, path_img2, 10)[1].shape[-2]}")
pprint(profile)

# --- affichage ---
fig, axes = plt.subplots(figsize=(12, 6))

# Image filtrée
im1 = axes.imshow(filt, cmap="gray")
axes.set_title("Filtered")
fig.colorbar(im1, ax=axes, label="Pixel Values")

plt.tight_layout()




In [None]:
########## PERFORMANCE METRICS ##########

########## SHAPEFILE TO MASK ##########

def shapefile_to_mask(shp_path: str, ref_raster_path: str, out_path: str | None = None) -> np.ndarray:
    """
    Rasterizes a polygon shapefile into a binary mask (0/1),
    aligned on a reference raster.

    - shp_path : path to the shapefile (polygons)
    - ref_raster_path : reference GeoTIFF (size, transform, crs etc)
    - out_path : if not None, saves the mask as GeoTIFF using out_path as output file path

    Returns: mask (np.ndarray 2D, dtype=uint8)
    """

    # 1) Read reference raster
    with rasterio.open(ref_raster_path) as src:
        ref_transform = src.transform
        ref_crs = src.crs
        out_shape = (src.height, src.width)
        profile = src.profile

    # 2) Read shapefile
    gdf = gpd.read_file(shp_path)

    if gdf.empty or gdf.geometry.isna().all() or gdf.geometry.is_empty.all():
        raise ValueError(
            "[ERROR] shapefile contains no valid geometry "
            "(0 entities, null or empty geometries)."
        )

    # 3) Ensure both CRS match

    if gdf.crs != ref_crs:
        raise ValueError(
            f"[ERROR] CRS of shapefile ({gdf.crs}) differs from raster CRS ({ref_crs}).\n"
            f"Please reproject your shapefile to ({ref_crs}) before continuing."
        )

    # if we want the code to reproject:
        # if gdf.crs != ref_crs:
        #     print(
        #     f"[WARNING] Le CRS du shapefile ({gdf.crs}) est différent du CRS du raster ({ref_crs}). "
        #     f"Reprojection automatique en {ref_crs}.")
        #     gdf = gdf.to_crs(ref_crs)

        
    # 4) Prepare geometries for rasterization
    # each polygon is “burned” with value 1, creation of a tuple [(polygone1, 1), (polygone2, 1), (polygone3, 1), ...] as expected by rasterize()
    shapes = [(geom, 1) for geom in gdf.geometry if geom is not None]  # skip None geometries (corrupted or empty polygon)

    # 5) Rasterization -> mask 0/1
    mask = rasterize(shapes=shapes, out_shape=out_shape, transform=ref_transform, fill=0, dtype="uint8")

    # 6) Optional save as GeoTIFF
    if out_path is not None:

        profile.update(dtype="uint8", count=1, nodata=0)
        with rasterio.open(out_path, "w", **profile) as dst:
            dst.write(mask.astype("uint8") * 255, 1)  # multiplies by 255 for visibility, writing on band 1

    return mask



########## METRICS ##########

def confusion_from_masks(y_true: np.ndarray, y_pred: np.ndarray) -> tuple[int, int]:
    """
    Computes TP, TN, FP, FN between two binary masks (0/1 or bool).
    y_true : reference mask (ground truth)
    y_pred : predicted mask

    Shapes must match. If shapes differ, raise an error.
    Returns: tp, tn, fp, fn (integers)
    """

    # ---- Safety check: shapes must match ----

    # if y_pred was generated using shapefile_to_mask(shp_path: str, ref_raster_path = path_img1) with path_img1 the first image of the main
    # both masks should have the same size

    if y_true.shape != y_pred.shape:
        raise ValueError(
            f"[ERROR] Masks have different shapes: y_true{y_true.shape}, y_pred{y_pred.shape}."
        )

    yt = np.asarray(y_true).astype(bool)
    yp = np.asarray(y_pred).astype(bool)

    tp = np.logical_and(yt, yp).sum()
    tn = np.logical_and(~yt, ~yp).sum()
    fp = np.logical_and(~yt, yp).sum()
    fn = np.logical_and(yt, ~yp).sum()

    return tp, tn, fp, fn

def metrics_from_masks(y_true: np.ndarray, y_pred: np.ndarray) -> dict:
    """
    Returns a dict with accuracy, precision, recall, F1, MCC and Kappa.
    """

    tp, tn, fp, fn = confusion_from_masks(y_true, y_pred)
    tp, tn, fp, fn = map(float, (tp, tn, fp, fn))  # avoids int32 overflow, in the computation of denom for instance
    total = tp + tn + fp + fn

    # avoid division by zero
    eps = 1e-9

    precision = tp / (tp + fp + eps)
    recall = tp / (tp + fn + eps)
    f1 = 2 * precision * recall / (precision + recall + eps)
    accuracy = (tp + tn) / (total + eps)

    # Matthews Correlation Coefficient
    denom_mcc = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + eps)
    mcc = ((tp * tn) - (fp * fn)) / denom_mcc

    # Cohen's Kappa
    # po: observed agreement (identical to accuracy)
    # pe: expected agreement by chance
    po = accuracy
    pe = ((tp + fp) * (tp + fn) + (tn + fp) * (tn + fn)) / (total**2 + eps)
    kappa = (po - pe) / (1 - pe + eps)

    return {
        "tp": int(tp),
        "tn": int(tn),
        "fp": int(fp),
        "fn": int(fn),
        "mcc": float(mcc),
        "kappa": float(kappa),
        "F1": float(f1),
        "precision": float(precision),
        "recall": float(recall),
        "accuracy": float(accuracy)
    }


# Tests

shp_path = r"C:\Users\gbonlieu\Documents\herramienta\change_detection_tool\data\preprocessed\t9\t9_grd_truth\t9_grd_truth.shp"
ref_raster_path = r"C:\Users\gbonlieu\Documents\herramienta\change_detection_tool\data\preprocessed\t9\pre.tif"
out_path = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\ouputs\output_vrac\t3bis_out\testshp.tif"

truth = shapefile_to_mask(shp_path=shp_path, ref_raster_path=ref_raster_path) # out_path=out_path)
nb_whites, nb_blacks = truth.sum(), truth.size - truth.sum()
print (f"Number of whites: {nb_whites}\nNumber of blacks: {nb_blacks}")
fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(truth,
               cmap="gray",
               interpolation="nearest",  # pas de lissage
               vmin=0, vmax=1)           # valeurs 0/1 bien séparées
ax.set_title("Truth")
fig.colorbar(im, ax=ax, label="Pixel Values")


res = metrics_from_masks(truth, main_dtod_0(path_img1, path_img2, 1, k=1.18, p=25, d=0.38, a=2889)[1])
for (k, v) in res.items():
    print(k, ":", v)



In [None]:
########## MAIN_UNION0 ##########


def main_union0(
    configs: list[dict],
    shp_path: str,
):
    """
    Takes a list of configurations, applies main_dtod_0 to each one,
    checks that all reference images share the same profile, computes the
    logical union of all resulting binary masks, then displays and returns
    the final output.

    Each element of `configs` must contain at least:
      - "path_img1": str
      - "path_img2": str
      - "n": int
      - "k": float
      - "p": float
      - "d": float
      - "a": float
      - "closing": bool

    Parameters
    ----------
    configs : list[dict]
        List of dictionaries describing each run.
    shp_path : str
        Path to the ground-truth shapefile.

    Returns
    -------
    dict
        {
            "union_mask": np.ndarray (uint8, 0/1),
            "metrics": dict (mcc, f1, precision, recall, accuracy),
            "configs": list[dict] (the configurations used)
        }
    """

    if not configs:
        raise ValueError("The 'configs' list is empty: nothing to process.")

    # --- 1) Rasterize ground truth on the first image ---
    first_path_img1 = configs[0]["path_img1"]
    mask_ref = shapefile_to_mask(
        shp_path=shp_path,
        ref_raster_path=first_path_img1,
    )

    # --- 2) Check profiles (CRS, transform, size) ---
    ref_profile = None
    union_mask_bool = None
    params_text_lines = []

    for idx, cfg in enumerate(configs, start=1):
        path_img1 = cfg["path_img1"]
        path_img2 = cfg["path_img2"]
        n = cfg["n"]
        k = cfg["k"]
        p = cfg["p"]
        d = cfg["d"]
        a = cfg["a"]
        closing = cfg["closing"]

        # Reference image profile
        with rasterio.open(path_img1) as src:
            profile = {
                "crs": src.crs,
                "transform": src.transform,
                "width": src.width,
                "height": src.height,
            }

        if ref_profile is None:
            ref_profile = profile
        else:
            # Check that all profiles match
            if (
                profile["crs"] != ref_profile["crs"]
                or profile["transform"] != ref_profile["transform"]
                or profile["width"] != ref_profile["width"]
                or profile["height"] != ref_profile["height"]
            ):
                raise ValueError(
                    f"[ERROR] Profile of image {path_img1} "
                    f"does not match the reference profile.\n"
                    f"Reference profile : {ref_profile}\n"
                    f"Current profile   : {profile}"
                )

        # --- 3) Apply main_dtod_0 with this configuration ---
        result = main_dtod_0(
            path_img1,
            path_img2,
            n,
            k=k,
            closing=closing,
            p=p,
            d=d,
            a=a,
        )[1]  # index 1 assumed to be the filtered binary mask

        # Convert to boolean (0/1 → False/True)
        mask_bool = (np.asarray(result) != 0)

        # --- 4) Logical union of masks ---
        if union_mask_bool is None:
            union_mask_bool = mask_bool
        else:
            union_mask_bool = union_mask_bool | mask_bool

        # Save one line of text describing this configuration (for the legend)
        params_text_lines.append(
            f"Run {idx}: n={n}, k={k:.2f}, p={p:.1f}, d={d:.2f}, a={a:.0f}, closing={closing}"
        )

    # --- 5) Convert to uint8 (0/1) ---
    union_mask = union_mask_bool.astype(np.uint8)

    # --- 6) Compute global metrics on the union mask ---
    mets = metrics_from_masks(mask_ref, union_mask)

    # --- 7) Display ---
    h, w = union_mask.shape
    fig, ax = plt.subplots(figsize=(w / 100, h / 100))

    im = ax.imshow(union_mask, cmap="gray")
    ax.set_xticks([])
    ax.set_yticks([])

    # Main title
    ax.set_title("Union of binary masks", fontsize=11)

    # Metrics text
    txt_metrics = (
        f"MCC={mets['mcc']:.2f}  "
        f"F1={mets['F1']:.2f}  "
        f"P={mets['precision']:.2f}  "
        f"R={mets['recall']:.2f}  "
        f"Acc={mets['accuracy']:.2f}"
    )

    ax.text(
        0.5,
        -0.08,
        txt_metrics,
        transform=ax.transAxes,
        ha="center",
        va="top",
        fontsize=8,
    )

    # Parameters text
    txt_params = "\n".join(params_text_lines)
    ax.text(
        0.5,
        -0.25,
        txt_params,
        transform=ax.transAxes,
        ha="center",
        va="top",
        fontsize=7,
    )

    plt.tight_layout()
    plt.show()

    return {
        "union_mask": union_mask,
        "metrics": mets,
        "configs": configs,
    }


In [None]:
########## SINGLE TEST & DISPLAY ##########

"""
Small test script to run main_dtod_0() on two rasters and print the result.
"""


def test_main_display(path_img1: str, path_img2: str, shp_path: str,
                      n: int, k: float = 1.0, closing: bool = False,
                      p: int = 30, d: float = 0.5, a: int = 4000, 
                      out_path: str | None = None):
    """
    Runs main_dtod_0() with the given parameters, computes performance
    metrics using the ground-truth shapefile, and displays the
    result (binary mask) with the parameters & metrics as annotations.
    """

    # --- Compute predicted mask ---
    profile, filt = main_dtod_0(path_img1, path_img2,
                          n, k=k, closing=closing,
                          p=p, d=d, a=a, out_path=out_path)

    # --- Load and rasterize ground-truth ---
    mask_ref = shapefile_to_mask(shp_path, ref_raster_path=path_img1)

    # --- Compute performance metrics ---
    mets = metrics_from_masks(mask_ref, filt)

    # --- Prepare nice figure ---

    w, h = profile['width'], profile['height']
    fig, ax = plt.subplots(figsize=((w/100), (h/100)))

    im = ax.imshow(filt, cmap="gray")

    # --- Title: parameters ---
    ax.set_title(
        f"Result mask\n\n"
        f"n={n}, k={k}, p={p}, d={d}, a={a}, closing={closing}",
        fontsize=11
    )

    # --- Text with metrics ---
    txt = (f"MCC={mets['mcc']:.3f}   " f"F1={mets['F1']:.3f}   " f"P={mets['precision']:.3f}   " f"R={mets['recall']:.3f}   " f"Acc={mets['accuracy']:.3f}" ) # :.3f to display only the first 3 decimal places

    ax.text(0.5, -0.08, txt, transform=ax.transAxes, ha="center", va="top", fontsize=10)

    plt.show()
    
    print("Full profile :")
    pprint(profile)


path_img1 = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_pre_varVV.tif"
path_img2 = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_0208_varVV.tif"
shp_path = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_grd_truth\t3bis_grdtruth_0204shp\t3bis_grdtruth_0204.shp"
out_path = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\ouputs\output_vrac\t3bis_out\main_out.tif"

test_main_display(path_img1, path_img2, shp_path,
                  n=1, k=1.27, closing=False, p=27, d=0.35, a=3000)


In [None]:
########## 2D TEST & DISPLAY  ##########



def test_parametres_2D(
    path_img1: str, path_img2: str,
    idx1: int,
    idx2: int,
    list1: list,
    list2: list,
    closing: bool = False,
    n: int = 2,
    k: float = 1.15,
    p: int = 30,
    d: float = 0.5,
    a: int = 4000,
    shp_path: str | None = None,
):
    """
    Explores all possible combinations of TWO parameters among (n, k, p, d, a).

    Variable parameters are selected using their indices:
    - 1 -> n (tile size / number of tiles)
    - 2 -> k (threshold multiplier)
    - 3 -> p (filter parameter p)
    - 4 -> d (filter parameter d)
    - 5 -> a (minimum area)

    idx1, idx2 : integers in [1,5] and must be different
    list1, list2 : lists of values tested for these two parameters

    Other non-explored parameters:
    - default values (n=… k=1.15, p=30, d=0.5, a=4000)
    - unless explicitly passed in arguments
    """

    # ---- Basic checks ----
    if idx1 == idx2:
        raise ValueError("idx1 and idx2 must be different (from 1 to 5).")

    for idx in (idx1, idx2):
        if idx < 1 or idx > 5:
            raise ValueError("Indices must be between 1 and 5.")

    # ---- Mapping index → parameter name ----
    index_to_name = {1: "n", 2: "k", 3: "p", 4: "d", 5: "a"}

    name1 = index_to_name[idx1]
    name2 = index_to_name[idx2]

    # ---- Base parameter values ----
    base_params = {"n": n, "k": k, "p": p, "d": d, "a": a}

    # ---- Ground truth mask (optional) ----
    mask_ref = None
    if shp_path is not None:
        mask_ref = shapefile_to_mask(shp_path=shp_path, ref_raster_path=path_img1)

    # ---- Read raster shape for figure geometry ----
    with rasterio.open(path_img1) as src:
        W = src.width
        H = src.height

    # ---- Build combination grid (param1, param2) ----
    combos = list(itertools.product(list1, list2))
    n_combos = len(combos)

    # ---- "Square-like" grid ----
    ncols = len(list2)
    nrows = len(list1)

    fig, axes = plt.subplots( nrows, ncols, figsize=(ncols * (W / 100), nrows * (H / 100)))
    axes = np.array(axes).reshape(nrows, ncols)   # to avoid errors when there is only one row or column, this forces axes to always be a 2D array with the correct shape

    # ---- Main loop over combinations ----
    for idx, (val1, val2) in enumerate(combos):  # enumerate() lets you iterate over a list while keeping the current index
        i = idx // ncols
        row = idx // ncols
        col = idx % ncols
        ax = axes[row, col]

        # start with base parameters
        params = base_params.copy()
        params[name1] = val1
        params[name2] = val2

        try:
            # call main algorithm
            result = main_dtod_0( path_img1, path_img2, n=params["n"], k=params["k"], closing=closing, p=params["p"], d=params["d"], a=params["a"], )[1]

            im = ax.imshow(result, cmap="gray")

            # title with the two explored parameters    
            ax.set_title(
                f"{name1}={val1:.2f}, {name2}={val2:.2f}, closing={closing}",                                  
                fontsize=15,
            )

            # metrics if ground truth provided
            if mask_ref is not None:
                mets = metrics_from_masks(mask_ref, result)
                txt = (
                    f"MCC={mets['mcc']:.3f} "
                    f"F1={mets['F1']:.3f} "
                    f"P={mets['precision']:.3f} "
                    f"R={mets['recall']:.3f} "
                    f"Acc={mets['accuracy']:.3f} "
                )
                ax.text(0.5, 0.-0.08, txt, transform=ax.transAxes, ha="center", va="top", fontsize=15)

        except Exception as e:
            # display error message for this combination
            print("\n=== ERROR DURING PARAMETER COMBINATION ===")
            print("Message court :", e)
            print("\nTraceback complet :")
            traceback.print_exc() 
            


    # ---- Hide unused axes if grid not full ----
    for idx in range(n_combos, nrows * ncols):
        row = idx // ncols
        col = idx % ncols
        axes[row, col].axis("off")

    plt.tight_layout()
    plt.show()

# Tests
    
path_img1 = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_pre_varVV.tif"
path_img2 = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_0208_varVV.tif"
shp_path = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_grd_truth\t3bis_grdtruth_0204shp\t3bis_grdtruth_0204.shp"
list_n = [int(x) for x in np.linspace(1, 7, 7)]
list_k = [x for x in np.linspace(1, 1.4, 5)]
list_p = [round(x) for x in np.linspace(10, 40, 10)] 
list_d = [x for x in np.linspace(0.3, 0.55, 5)]
list_a = [round(x) for x in np.linspace(500, 4500, 30)]


test_parametres_2D(
    path_img1,
    path_img2,
    idx1=4,        
    idx2=2,        
    list1=list_d,
    list2=list_k,
    closing=False,
    n=1,
    p=27.0,
    a=2900,
    shp_path=shp_path
)

In [None]:
########## GRID SEARCH ##########

def grid_search_best_params(
    path_img1: str,
    path_img2: str,
    list_n: list[int],
    list_k: list[float],
    list_p: list[int],
    list_d: list[float],
    list_a: list[int],
    closing: bool = False,
    shp_path: str | None = None,
    top: int = 3,
    mcc_ref: float = 0.7
):
    """
    Explores all possible parameter combinations (n, k, p, d, a),
    evaluates metrics against ground truth, and:

      - returns the top 3 tuples for MCC, precision (with MCC>mcc_ref), recall (with MCC>mcc_ref)
      - displays a 3*3 figure: top 3 MCC, top 3 Precision, top 3 Recall

    Each subplot shows:
      - the binary output of main_dtod(...)
      - title: values of n, k, p, d, a
      - text under image: MCC, F1, P, R, Acc
    """

    if shp_path is None:
        raise ValueError("shp_path cannot be None: ground truth is required ")

    # ----- 1) Ground truth rasterized on reference image -----
    mask_ref = shapefile_to_mask(shp_path=shp_path, ref_raster_path=path_img1)

    # ----- 2) Read raster to get dimensions -----
    with rasterio.open(path_img1) as src:
        w = src.width
        h = src.height

    # ----- 3) Build combinations -----
    all_combos = list(itertools.product(list_n, list_k, list_p, list_d, list_a))

    perf = []  # perf is a list of dict, each dict stores params, metrics, result

    # ----- 4) Loop over all combinations -----
    for (n, k, p, d, a) in all_combos:
        try:
            # main algorithm call
            result = main_dtod_0(
                path_img1,
                path_img2,
                n=n,
                k=k,
                closing=closing,
                p=p,
                d=d,
                a=a
            )[1]

            mets = metrics_from_masks(mask_ref, result)

            perf.append({
                "params": {"n": n, "k": k, "p": p, "d": d, "a": a},
                "metrics": mets,
                "result": result,
            })

        except Exception as e:
            # display error message for this combination
            print(f"[WARNING] Skipped combination (n={n}, k={k}, p={p}, d={d}, a={a}) "
                  f"due to error: {e}")
            continue

    if not perf:  # list perf empty
        raise RuntimeError("No valid combination produced a result.")

    # ----- 5) Sort by each metric -----
    def sort_by(metric_name: str):
        '''
        sorted sorts perf, it needs a key= to know what to compare on 
        lambda takes a dict r from perf and returns r["metrics"][metric_name] (in our case mcc or precision or recall)
        perf is thus sorted by values of metric_name (mcc, precision, recall)
        ''' 
        return sorted(perf, key=lambda r: r["metrics"][metric_name], reverse=True)
    
    sorted_mcc = sort_by("mcc")
    sorted_prec = sort_by("precision")
    sorted_rec = sort_by("recall")

    # keep top X
    top_mcc = sorted_mcc[:top]
    top_prec = [r for r in sorted_prec if r["metrics"]["mcc"] >= mcc_ref][:top]
    top_rec = [r for r in sorted_rec if r["metrics"]["mcc"] >= mcc_ref][:top]


    # ----- 6) Display figure (3×3: top MCC / top Precision / top Recall) -----
    fig, axes = plt.subplots(3, top, figsize=(top * (w / 100), 3 * (h / 100)))

    axes = np.atleast_2d(axes)  # ensure 2D even for top=1

    def show_row(row_idx: int, selected_results, row_title: str):
        '''
        displays the result of each line (line 1: mcc, line 2: precision, line 3: recall)
        selected_results will thus be top_mcc, top_prec, top_prec
        '''
        for i, res in enumerate(selected_results):  # enumerate() lets you iterate over a list while keeping the current index
            ax = axes[row_idx, i]
            img = res["result"]
            params = res["params"]
            mets = res["metrics"]

            ax.imshow(img, cmap="gray")

            ax.set_title(f"{row_title} #{i+1}\n"
                f"n={params['n']}, k={params['k']:.2f}, p={params['p']}, d={params['d']:.2f}, a={params['a']}",
                fontsize=15)

            txt = (
                f"MCC={mets['mcc']:.3f} "
                f"F1={mets['F1']:.3f} "
                f"P={mets['precision']:.3f} "
                f"R={mets['recall']:.3f} "
                f"Acc={mets['accuracy']:.3f}"
            )

            ax.text(
                0.5,
                -0.08,
                txt,
                transform=ax.transAxes,
                ha="center",
                va="top",
                fontsize=15,
            )

        # hide empty axes
        for j in range(len(selected_results), top):
            axes[row_idx, j].axis("off")

    show_row(0, top_mcc, "Top MCC")
    show_row(1, top_prec, "Top Precision")
    show_row(2, top_rec, "Top Recall")

    plt.tight_layout()
    plt.show()

    # ----- 7) Returned values: best tuples -----
    return {"top_mcc": top_mcc, "top_precision": top_prec, "top_recall": top_rec}

# Tests

path_img1 = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_pre_varVV.tif"
path_img2 = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_0208_varVV.tif"

shp_path = r"C:\Users\gbonlieu\Documents\codepythonoutil\outil_detection_changement\data\raw\testnotebooks\t3bis\t3bis_grd_truth\t3bis_grdtruth_0204shp\t3bis_grdtruth_0204.shp"
list_n = [int(x) for x in np.linspace(1, 3, 3)]
list_k = [x for x in np.linspace(1, 1.4, 5)]
list_p = [round(x) for x in np.linspace(20, 40, 4)] 
list_d = [x for x in np.linspace(0.2, 0.6, 4)]
list_a = [round(x) for x in np.linspace(3000, 3000, 1)]

best = grid_search_best_params(
    path_img1=path_img1,
    path_img2=path_img2,
    list_n=list_n,
    list_k=list_k,
    list_p=list_p,
    list_d=list_d,
    list_a=list_a,
    closing=False,
    shp_path=shp_path,
    top=3,
    mcc_ref=0.6
)

# Example : get the best tuple for MCC
best_mcc_1 = best["top_mcc"][0]
print("Best MCC :", best_mcc_1["metrics"]["mcc"])
print("Params :", best_mcc_1["params"])
