## LEOHS Apply Harmonization XGB Tool
This script applies harmonization equations to either multiband Landsat images or a directory containing multiband Landsat images.

https://doi.org/10.1080/10106049.2025.2538108

Coded by Galen Richardson with assistance of ChatGPT 5 on on 2/4/2026.

This code can also skip applying XGB harmonization to water pixels, just set Water_mask=True.

In [5]:
#python imports
from pathlib import Path
import numpy as np, rasterio, pickle, gc
from joblib import Parallel, delayed
import xgboost as xgb

In [6]:
def apply_harmonization_xgb(in_path, harm_dir, out_prefix="2L8XGB_", Water_mask=False):
    model_fns = ("L7toL8_BXGB.pkl","L7toL8_GXGB.pkl","L7toL8_RXGB.pkl","L7toL8_NIRXGB.pkl","L7toL8_SWIR1XGB.pkl","L7toL8_SWIR2XGB.pkl")
    nod_override, eps_zero, clip01 = -1.0, 1e-6, True
    n_jobs, backend = -1, "threading"
    compress, zstd_level, predictor = "zstd", 9, 2
    in_path, harm_dir = Path(in_path), Path(harm_dir)
    tifs = (sorted(in_path.glob("*.tif")) + sorted(in_path.glob("*.tiff"))) if in_path.is_dir() else [in_path]
    models = [pickle.load(open(harm_dir/fn, "rb")) for fn in model_fns]
    _pred = lambda m, X: m.predict(X) if hasattr(m, "predict") else m.predict(xgb.DMatrix(X))
    print(f"[inputs] {'dir' if in_path.is_dir() else 'file'}: {in_path} | {len(tifs)} tif(s)")
    for fp in tifs:
        fp = Path(fp); out_fp = fp.with_name(out_prefix + fp.name)
        with rasterio.open(fp) as src:
            if src.count != 6: raise ValueError(f"Expected 6 bands, got {src.count}: {fp}")
            nod = src.nodata if src.nodata is not None else nod_override
            prof = src.profile.copy()
            prof.update(count=6, dtype="float32", nodata=nod, BIGTIFF="YES")
            if compress:
                by, bx = src.block_shapes[0]
                prof.update(compress=compress, tiled=True, blockxsize=bx, blockysize=by)
                if str(compress).lower() == "zstd": prof.update(zstd_level=zstd_level)
                if predictor is not None: prof.update(predictor=predictor)
            print(f"\n[file] {fp.name} -> {out_fp.name}")
            print(f"[info] {src.width}x{src.height} nod={nod} block={src.block_shapes[0]}")
            total_blocks = sum(1 for _ in src.block_windows(1))
            step, next_mark, last_pct = max(1, total_blocks//10), 0, -10
            tot_px = tot_good = tot_allzero = 0
            with rasterio.open(out_fp, "w", **prof) as dst:
                for wi, (_, win) in enumerate(src.block_windows(1), 1):
                    X = src.read([1,2,3,4,5,6], window=win).astype(np.float32, copy=False)
                    Xf = X.reshape(6, -1).T; del X
                    if Water_mask:
                        nir = Xf[:, 3].astype(np.float16)  # NIR (band 4)
                        g   = Xf[:, 1].astype(np.float16)  # Green (band 2)
                        NDWI = (g - nir) / (g + nir + np.float16(1e-10))
                        NDWI_mask = NDWI < 0  # non-water mask (water is NDWI >= 0)
                        del NDWI, nir, g
                    else:
                        NDWI_mask = None
                    n = Xf.shape[0]; tot_px += n
                    good = np.isfinite(Xf).all(1) & (Xf != nod).all(1)
                    base_valid = good.copy()
                    if Water_mask:
                        water = base_valid & (~NDWI_mask)# water pixels (valid but NDWI >= 0)
                        good  = base_valid & (NDWI_mask)# only non-water pixels go to XGB
                    else:
                        water = None
                    if eps_zero is not None:
                        allzero = (np.abs(Xf) <= eps_zero).all(1)
                        good &= ~allzero; tot_allzero += int(allzero.sum())
                    ng = int(good.sum()); tot_good += ng
                    out = np.full((6, n), nod, np.float32)
                    if Water_mask and water is not None and water.any():
                        out[:, water] = Xf[water].T.astype(np.float32, copy=False)
                        del water
                    if ng:
                        Xg = Xf[good]
                        ys = Parallel(n_jobs=n_jobs, backend=backend)(delayed(_pred)(models[i], Xg) for i in range(6))
                        for i in range(6):
                            y = np.asarray(ys[i], np.float32)
                            if clip01: y = np.clip(y, 0.0, 1.0)  # clip ONLY predictions
                            out[i, good] = y
                        del Xg, ys
                    dst.write(out.reshape(6, win.height, win.width), window=win)
                    del Xf, good, out
                    gc.collect()
                    if wi >= next_mark:
                        pct10 = min(100, (int((wi/total_blocks)*100)//10)*10)
                        if pct10 > last_pct:
                            print(f"[progress] {pct10}%  blocks={wi}/{total_blocks}")
                            last_pct = pct10
                        next_mark += step
            print(f"Wrote={out_fp}")

In [None]:
#Code to run this function
Landsat_image_or_dir=r'E:\GIS\Lichen_work_master\Landsat_Datasets_models\HBL\c_ALBHBL_Med5_2018_2022ETM.tif'
#Harmonization folder is the output from LEOHS containing XGB models
harmonization_folder = r'E:\GIS\Lichen_work_master\Harmonization\HBL_AOI'
apply_harmonization_xgb(Landsat_image_or_dir, harmonization_folder,Water_mask=False)

[inputs] file: E:\GIS\Lichen_work_master\Landsat_Datasets_models\HBL\c_ALBHBL_Med5_2018_2022ETM.tif | 1 tif(s)

[file] c_ALBHBL_Med5_2018_2022ETM.tif -> 2L8XGB_c_ALBHBL_Med5_2018_2022ETM.tif
[info] 44575x31734 nod=-1.0 block=(512, 512)
[progress] 0%  blocks=1/5456
