In [None]:
import rasterio
import numpy as np

in_path = "../data/raw/images/S2H_2024_2024_07_29.tif"
out_path = "../data/raw/images/S2H_2024_2024_07_29_nodata.tif"

with rasterio.open(in_path) as src:
    profile = src.profile
    profile.update(
        dtype="float64",  
        nodata=-9999.0
    )
    data = src.read().astype(np.float64)
    
    # Replace BOTH invalid values AND NaNs with nodata
    mask = (data < 0) | np.isnan(data) | np.isinf(data)
    fixed = np.where(mask, -9999.0, data)
    
    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(fixed)

print("Saved:", out_path)

# Verify the output
with rasterio.open(out_path) as src:
    data = src.read(masked=True)  # This respects the nodata value
    print(f"\nStatistics (excluding nodata={src.nodata}):")
    print(f"Min: {data.min()}")
    print(f"Max: {data.max()}")
    print(f"Mean: {data.mean()}")
    print(f"Valid pixels: {data.count()}")
    print(f"Nodata pixels: {data.mask.sum()}")