# Living Earth for Land Degradation
L4 layers pre-processing
---

This notebookprovides an interactive workflow to preprocess L4 layers, Environmental Descriptors (ED), for implementation of Living Earth for Land Degradation pipeline.


Authors: Audrey Lambiel, Mona Bonnier, Carole Planque, Gregory Giuliani

contact: audrey.lambiel@unige.ch

----
**Inputs**
- Rules JSON : defines per-layer preprocessing.
- AOI file : contains min_x, max_x, min_y, max_y (in degrees), resolution (meters), and crs (EPSG code).
- Input folder: contains source rasters or vectors referenced by the rules JSON.
- Output folder: destination for generated GeoTIFF files.

**What it does**

- *Setting up paths and parameters* (rules JSON, AOI file, input and output directories)
- *Parsing the Area of Interest (AOI)* (extent, resolution, CRS)
- *Applying preprocessing* per layer according to the JSON rules (see table hereafter)
- *Visualizing outputs* with appropriate color maps for continuous and categorical data

| input type       |preprocess       |description       |  output type   |
|---     |---    |---    |---    |
|  cat     |  None   |  No preprocess needed   | cat   |
|  cat     |  reclassification |  Reclassified according to a dictionnary |cat    |
|  con     |  None |  No preprocess needed   |con    |
|  con     |  reclassification  |  Reclassified according to a dictionnary |cat    |
|  con     |  formula  |  Apply a numpy formula, e.g. multiplied by a factor   |con    |
|  con     |  focal  |  Apply a focal function, i.e. sum   |con    |


**How it works**

User should first read carefully the 'l4_layers_LE4LD_modified.py' and modifiy it if needed (e.g. change number of class, type of layer, categorical or continuous, ...). To do so, use **custom_l4_layers_LE4LD_modified.ipynb** notebook. 

Once this step is done, make sure that the rules file (sepcified under 'json_path') is up to date, or create it if needed. 

Then:
- update the paths and parameters in the setup below
- run the AOI parsing cell
- run the processing cell to execture all layer rules
- inspect the plots for each output to verify preprocess results

**Requirements**

Please ensure the following Python packages are installed in your environment:

- `numpy`
- `rasterio`
- `geopandas`
- `pyproj`
- `matplotlib`
- `earthpy`
- `scipy`

> If you use Conda, a typical environment might be created with:
```bash
conda create -n le4ld python=3.13.9 numpy rasterio geopandas pyproj matplotlib earthpy scipy
conda activate le4ld
```

---
## Set up


In [None]:
# modules and libraries
import os
import re
import json
import numpy as np
import geopandas as gpd
import rasterio
from pyproj import Transformer
from rasterio.transform import from_origin
from rasterio.features import rasterize
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import earthpy.plot as ep
from scipy.ndimage import convolve

In [None]:
json_path = "C:/monalisa/rules.json"  # Update as needed
aoi_path = "C:/monalisa/aoi/Asterousia.txt"  # Update as needed
input_path = "C:/monalisa/input_test" # Update as needed
output_path = "C:/monalisa/l4_asterousia"  # Update as needed
nodata_val = 0

# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)

---
## Process


In [None]:
# Define custom functions
def parse_aoi(aoi_file):
    """
    Parse AOI file to extract extent (in degrees), resolution (in meters), and CRS.
    Then convert extent to meters using the target CRS.
    """
    extent = {}
    resolution = None
    crs = None

    try:
        with open(aoi_file, "r") as f:
            for line in f:
                line = line.strip()
                if line.startswith("min_x"): extent["min_x"] = float(line.split(":")[1])
                elif line.startswith("max_x"): extent["max_x"] = float(line.split(":")[1])
                elif line.startswith("min_y"): extent["min_y"] = float(line.split(":")[1])
                elif line.startswith("max_y"): extent["max_y"] = float(line.split(":")[1])
                elif line.startswith("resolution"):
                    # Extract numbers inside brackets
                    nums = re.findall(r"[-]?\d+", line)
                    resolution = [abs(float(n)) for n in nums]  # Ensure positive values
                elif line.startswith("crs"):
                    match = re.search(r'EPSG:\d+', line)
                    if match:
                        crs = match.group(0)

        if not extent or not resolution or not crs:
            raise ValueError("AOI file is missing required fields (extent, resolution, or CRS).")

        # Convert extent from degrees to meters using pyproj
        transformer = Transformer.from_crs("EPSG:4326", crs, always_xy=True)
        min_x_m, min_y_m = transformer.transform(extent["min_x"], extent["min_y"])
        max_x_m, max_y_m = transformer.transform(extent["max_x"], extent["max_y"])
        extent_m = {"min_x": min_x_m, "max_x": max_x_m, "min_y": min_y_m, "max_y": max_y_m}

        return extent_m, resolution, crs

    except Exception as e:
        print(f"AOI parsing failed: {e}")
        return None, None, None


def process_cat_reclass(layer_name, rule, preprocess, nodata_val, layer_path, output_path):
    """
    Reclassify categorical raster according to JSON rules.
    """
    original_classes = {int(k): v for k, v in rule["original_classes"].items()}
    reclass_dict = rule["reclass_dict"]
    out_path = f"{output_path}/{layer_name}.tif"

    with rasterio.open(layer_path) as src:
        band = src.read(1)
        if src.nodata is not None:
            band[band == src.nodata] = nodata_val

        reclass_array = np.full(band.shape, nodata_val, dtype=object)
        value_to_new = {val: reclass_dict.get(name, nodata_val) for val, name in original_classes.items()}

        for val in np.unique(band[~np.isnan(band)]):
            reclass_array[band == val] = value_to_new.get(int(val), nodata_val)

        flat_values = reclass_array.flatten()
        present_classes = sorted(set(val for val in flat_values if isinstance(val, str)))
        new_class_codes = {cls: int(cls[1:]) for cls in present_classes}

        reclass_int = np.zeros_like(band, dtype="int16")
        for cls, code in new_class_codes.items():
            reclass_int[reclass_array == cls] = code

        meta = src.meta.copy()
        meta.update({"dtype": "int16", "count": 1, "nodata": nodata_val})

        with rasterio.open(out_path, "w", **meta) as dst:
            dst.write(reclass_int, 1)

    print(f"{layer_name} processed ({preprocess}) and exported.")
    return out_path


def process_con_reclass(layer_name, rule, preprocess, nodata_val, layer_path, output_path):
    """
    Reclassify continuous raster according to JSON rules.
    """
    rules_list = rule["reclass_rules"]
    out_path = f"{output_path}/{layer_name}.tif"

    with rasterio.open(layer_path) as src:
        data = src.read(1)
        if src.nodata is not None:
            data[data == src.nodata] = nodata_val

        reclass_array = np.full(data.shape, nodata_val, dtype=object)
        for rule_entry in rules_list:
            cls = rule_entry["class"]
            low, high = rule_entry["min"], rule_entry["max"]
            if low == "None" and high != "None":
                mask = data <= high
            elif high == "None" and low != "None":
                mask = data >= low
            elif low != "None" and high != "None":
                mask = (data >= low) & (data <= high)
            else:
                mask = np.full(data.shape, False)
            reclass_array[mask] = cls

        flat_values = reclass_array.flatten()
        present_classes = sorted(set(val for val in flat_values if isinstance(val, str)))
        new_class_codes = {cls: int(cls[1:]) for cls in present_classes}

        reclass_int = np.zeros_like(data, dtype="int16")
        for cls, code in new_class_codes.items():
            reclass_int[reclass_array == cls] = code

        meta = src.meta.copy()
        meta.update(dtype="int16", nodata=nodata_val)

        with rasterio.open(out_path, "w", **meta) as dst:
            dst.write(reclass_int, 1)

    print(f"{layer_name} processed ({preprocess}) and exported.")
    return out_path


def process_con_formula(layer_name, rule, preprocess, nodata_val, layer_path, output_path):
    """
    Apply numpy formula defined in JSON to a continuous raster.
    """
    expression = rule.get("expression")
    if not expression:
        print(f"{layer_name} has no formula expression, skipped.")
        return None

    out_path = f"{output_path}/{layer_name}.tif"
    with rasterio.open(layer_path) as src:
        data = src.read(1).astype(float)
        result = eval(expression, {"np": np, "data": data})

        meta = src.meta.copy()
        meta.update(dtype="float32")

        with rasterio.open(out_path, "w", **meta) as dst:
            dst.write(result.astype("float32"), 1)

    print(f"{layer_name} processed ({preprocess}) and exported.")
    return out_path


def process_con_focal(layer_name, rule, preprocess, nodata_val, layer_path, output_path):
    """
    Apply focal statistics to compute sum within a moving window.
    """
    resolution = rule.get("resolution_m")
    window_area = rule.get("window_m2")
    side_length = np.sqrt(window_area)
    window_size = int(np.round(side_length / resolution))

    out_path = f"{output_path}/{layer_name}.tif"
    with rasterio.open(layer_path) as src:
        data = src.read(1).astype(float)
        if src.nodata is not None:
            data[data == src.nodata] = 0

        kernel = np.ones((window_size, window_size), dtype=float)
        result = convolve(data, kernel, mode="constant", cval=0.0)

        meta = src.meta.copy()
        meta.update(dtype="float32", nodata=nodata_val)

        with rasterio.open(out_path, "w", **meta) as dst:
            dst.write(result.astype("float32"), 1)

    print(f"{layer_name} processed ({preprocess}, window={window_area} m²) and exported.")
    return out_path


def process_vector_con_focal(layer_name, rule, preprocess, nodata_val, layer_path, output_path, extent, resolution, crs):
    """
    Rasterize vector data (points or lines) and compute density using focal statistics.
    extent: dict with min_x, max_x, min_y, max_y
    resolution: [pixel_size_x, pixel_size_y]
    """
    resolution_x, resolution_y = resolution
    width = int((extent["max_x"] - extent["min_x"]) / abs(resolution_x))
    height = int((extent["max_y"] - extent["min_y"]) / abs(resolution_y))
    transform = from_origin(extent["min_x"], extent["max_y"], abs(resolution_x), abs(resolution_y))

    # Read vector data
    gdf = gpd.read_file(layer_path).to_crs(crs)

    # Prepare shapes for rasterization
    if gdf.geometry.iloc[0].geom_type == "Point":
        # Each point contributes 1
        shapes = ((geom, 1) for geom in gdf.geometry)
    elif gdf.geometry.iloc[0].geom_type in ["LineString", "MultiLineString"]:
        # Each line contributes its length in meters
        shapes = ((geom, geom.length) for geom in gdf.geometry)
    else:
        raise ValueError("Unsupported geometry type for focal density.")
        
    if width <= 0 or height <= 0:
        raise ValueError(f"Invalid grid size: width={width}, height={height}. Check AOI extent and resolution.")

    # Rasterize: initial grid with counts or lengths
    base_raster = rasterize(
        shapes=shapes,
        out_shape=(height, width),
        transform=transform,
        fill=0,
        dtype="float32"
    )

    # Apply focal statistics for density
    window_area = rule.get("window_m2")
    side_length = np.sqrt(window_area)
    window_size = int(np.round(side_length / abs(resolution_x)))
    kernel = np.ones((window_size, window_size), dtype=float)
    result = convolve(base_raster, kernel, mode="constant", cval=0.0)

    # Convert to density per m2
    result = result / window_area

    # Save raster
    out_path = f"{output_path}/{layer_name}.tif"
    meta = {
        "driver": "GTiff",
        "height": height,
        "width": width,
        "count": 1,
        "dtype": "float32",
        "crs": crs,
        "transform": transform,
        "nodata": nodata_val
    }
    with rasterio.open(out_path, "w", **meta) as dst:
        dst.write(result.astype("float32"), 1)

    print(f"{layer_name} processed (vector focal density, window={window_area} m²) and exported.")
    return out_path


def plot_raster(raster_path, ltype):
    """
    Plot continous raster with a continuous colormap (viridis)
    Plot categorical raster with categorical colormap (RdYlGn_r)
    """
    with rasterio.open(raster_path) as src:
        data = src.read(1)

    unique_values = sorted(set(data.flatten()))

    plt.figure(figsize=(8,6))

    if ltype =="con":
        cmap = "viridis"
        plt.imshow(data, cmap=cmap)
        plt.colorbar(label="value")
        plt.title(f"{os.path.basename(raster_path)}", fontsize = 14)

    elif ltype=="cat":
        # create a normalized coloramp for LDDSI classes (from D100 to D200)
        classes = [f"D{v}" for v in range(100, 201, 1)]
        classes = ["0"] + classes
        palette = plt.colormaps["RdYlGn_r"]
        palette = palette(np.linspace(0, 1, len(classes) - 1))
        colors_list = [np.array([0, 0, 0, 1])] + list(palette)  # 0 = black
        cmap = colors.ListedColormap(colors_list, name="lddsi_colors")
        classes_to_value = {cls: int(cls[1:]) for cls in classes if cls != "0"}
        classes_to_value["0"] = 0
        bounds = sorted(classes_to_value.values())
        norm = colors.BoundaryNorm(bounds, cmap.N)
        class_names_list = [str(v) for v in unique_values]
        
        img = plt.imshow(data, cmap=cmap, norm=norm)
        ep.draw_legend(img, titles=class_names_list)
        plt.title(f"{os.path.basename(raster_path)}", fontsize=14)

    plt.axis("off")
    plt.show()

In [None]:
# Parse AOI file
extent, resolution, crs = parse_aoi(aoi_path)
if extent is None or resolution is None or crs is None:
    raise ValueError("Failed to parse AOI file. Check its format.")

# Main process
try:
    with open(json_path, "r") as f:
        rules = json.load(f)
except Exception as e:
    print(f"Failed to load JSON rules: {e}")
    rules = {}

for layer_name, rule in rules.items():
    preprocess = rule.get("preprocess", "None")
    ltype = rule.get("type")
    layer_path = f"{input_path}/{rule.get("filename_with_ext")}"

    if not os.path.exists(layer_path):
        print(f"Layer path not found: {layer_path}")
        continue

    if preprocess == "None":
        out_path = f"{output_path}/{layer_name}.tif"
        with rasterio.open(layer_path) as src:
            data = src.read(1)
            meta = src.meta.copy()
            if src.nodata is not None:
                data[data == src.nodata] = nodata_val
            meta.update({"nodata": nodata_val})
            with rasterio.open(out_path, "w", **meta) as dst:
                dst.write(data, 1)
        print(f"{layer_name} skipped (no preprocessing) but exported.")
        plot_raster(out_path, ltype)
        continue

    if ltype == "cat" and preprocess == "reclassification":
        path = process_cat_reclass(layer_name, rule, preprocess, nodata_val, layer_path, output_path)
        plot_raster(path, ltype="cat")
    elif ltype == "con" and preprocess == "reclassification":
        path = process_con_reclass(layer_name, rule, preprocess, nodata_val, layer_path, output_path)
        plot_raster(path, ltype="con")
    elif ltype == "con" and preprocess == "formula":
        path = process_con_formula(layer_name, rule, preprocess, nodata_val, layer_path, output_path)
        if path: plot_raster(path, ltype="con")
    elif ltype == "con" and preprocess == "focal":
        if layer_path.endswith(".shp"):
            # Vector focal density
            path = process_vector_con_focal(layer_name, rule, preprocess, nodata_val, layer_path, output_path, extent, resolution, crs)
        else:
            # Raster focal
            path = process_con_focal(layer_name, rule, preprocess, nodata_val, layer_path, output_path)
        plot_raster(path, ltype="con")
    else:
        print(f"{layer_name}: Unknown combination type={ltype}, preprocess={preprocess}")
