## anthromes for estimating Land Use Land Cover from 1940-70 in angola

source: HYDE database

In [8]:
"""
Reproject HYDE Anthrome .ASC files (5 arc-min) to EPSG:32733 (1 km)
and clip to Angola boundary.
"""

import os
import zipfile
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Paths and setup
# -------------------------------------------------------------------
hyde_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/anthromes/zip"
tmp_extract = os.path.join(hyde_dir, "unzipped")
os.makedirs(tmp_extract, exist_ok=True)

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes"
os.makedirs(out_dir, exist_ok=True)

years = [1940, 1950, 1960, 1970]

# -------------------------------------------------------------------
# 2. Load Angola boundary
# -------------------------------------------------------------------
angola = gpd.read_file(angola_gpkg)
angola = angola.to_crs("EPSG:32733")
angola_geom = [angola.union_all()]  # for rasterio.mask

# -------------------------------------------------------------------
# 3. Process each HYDE Anthrome file
# -------------------------------------------------------------------
for year in years:
    zip_path = os.path.join(hyde_dir, f"{year}AD_anthromes.zip")
    if not os.path.exists(zip_path):
        print(f"⚠️ Missing ZIP for {year}: {zip_path}")
        continue

    print(f"📂 Extracting {year}...")

    # Extract zip
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(tmp_extract)

    # Find the ASC file inside the extracted folder
    asc_path = None
    for root, _, files in os.walk(tmp_extract):
        for f in files:
            if f.endswith(".asc"):
                asc_path = os.path.join(root, f)
                break
        if asc_path:
            break

    if not asc_path:
        print(f"⚠️ No .asc file found in {zip_path}")
        continue

    print(f"📄 Found ASC: {os.path.basename(asc_path)}")

    # -------------------------------------------------------------------
    # 3.1 Open ASC and assign CRS = EPSG:4326
    # -------------------------------------------------------------------
    with rasterio.open(asc_path, driver="AAIGrid") as src:
        src_data = src.read(1)
        src_data = np.where(src_data < 0, np.nan, src_data)  # mask negatives
        src_transform = src.transform
        src_crs = "EPSG:4326"

        # -------------------------------------------------------------------
        # 3.2 Define 1 km resolution target grid in UTM33S
        # -------------------------------------------------------------------
        target_crs = "EPSG:32733"
        target_res = 1000  # 1 km pixels

        transform, width, height = calculate_default_transform(
            src_crs, target_crs,
            src.width, src.height,
            *src.bounds,
            resolution=target_res
        )

        # -------------------------------------------------------------------
        # 3.3 Reproject to 1 km UTM33S grid
        # -------------------------------------------------------------------
        reprojected = np.empty((height, width), dtype=np.float32)
        reproject(
            source=src_data,
            destination=reprojected,
            src_transform=src_transform,
            src_crs=src_crs,
            dst_transform=transform,
            dst_crs=target_crs,
            resampling=Resampling.nearest,
        )

        # -------------------------------------------------------------------
        # 3.4 Write temporary file and clip to Angola
        # -------------------------------------------------------------------
        tmp_tif = os.path.join(out_dir, f"tmp_reproj_{year}.tif")
        meta = src.meta.copy()
        meta.update({
            "driver": "GTiff",
            "height": height,
            "width": width,
            "transform": transform,
            "crs": target_crs,
            "dtype": "float32",
            "nodata": -9999,
        })

        with rasterio.open(tmp_tif, "w", **meta) as tmp_dst:
            tmp_dst.write(reprojected, 1)

        with rasterio.open(tmp_tif) as tmp_src:
            out_image, out_transform = mask(
                tmp_src, angola_geom, crop=True, nodata=-9999
            )
            out_meta = tmp_src.meta.copy()
            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -9999,
                "compress": "lzw",
            })

        out_tif = os.path.join(out_dir, f"anthromes_angola_{year}_1km.tif")
        with rasterio.open(out_tif, "w", **out_meta) as dest:
            dest.write(out_image)

        os.remove(tmp_tif)

        print(f"✅ Saved: {out_tif}")

print("🎉 All done — Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.")


📂 Extracting 1940...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1940_1km.tif
📂 Extracting 1950...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1950_1km.tif
📂 Extracting 1960...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1960_1km.tif
📂 Extracting 1970...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1970_1km.tif
🎉 All done — Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.


In [9]:
## look up table for later

import pandas as pd

anthrome_df = pd.DataFrame([
    (11, "Urban"),
    (12, "Dense settlements"),
    (21, "Village, Rice"),
    (22, "Village, Irrigated"),
    (23, "Village, Rainfed"),
    (24, "Village, Pastoral"),
    (31, "Croplands, residential irrigated"),
    (32, "Croplands, residential rainfed"),
    (33, "Croplands, populated"),
    (34, "Croplands, pastoral"),
    (41, "Rangeland, residential"),
    (42, "Rangeland, populated"),
    (43, "Rangeland, remote"),
    (51, "Semi-natural woodlands, residential"),
    (52, "Semi-natural woodlands, populated"),
    (53, "Semi-natural woodlands, remote"),
    (54, "Semi-natural treeless and barren lands"),
    (61, "Wild, remote - woodlands"),
    (62, "Wild, remote - treeless & barren"),
    (63, "Wild, remote - ice"),
    (70, "No definition")
], columns=["anthrome_code", "anthrome_class"])


## LULC 

In [1]:
"""
Reproject HYDE Anthrome .ASC files (5 arc-min) to EPSG:32733 (1 km)
and clip to Angola boundary.
"""

import os
import zipfile
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Paths and setup
# -------------------------------------------------------------------
hyde_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/baseline/zip"
tmp_extract = os.path.join(hyde_dir, "unzipped")
os.makedirs(tmp_extract, exist_ok=True)

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde"
os.makedirs(out_dir, exist_ok=True)

years = [1940, 1950, 1960, 1970]

# -------------------------------------------------------------------
# 2. Load Angola boundary
# -------------------------------------------------------------------
angola = gpd.read_file(angola_gpkg)
angola = angola.to_crs("EPSG:32733")
angola_geom = [angola.union_all()]  # for rasterio.mask

# -------------------------------------------------------------------
# 3. Process each HYDE LULC file
# -------------------------------------------------------------------
for year in years:
    zip_path = os.path.join(hyde_dir, f"{year}AD_lu.zip")
    if not os.path.exists(zip_path):
        print(f"⚠️ Missing ZIP for {year}: {zip_path}")
        continue

    print(f"\n📂 Extracting {year}...")
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(tmp_extract)

    # -------------------------------------------------------------------
    # 3.0 Clean macOS hidden files before proceeding
    # -------------------------------------------------------------------
    for f in os.listdir(tmp_extract):
        if f.startswith("._"):
            os.remove(os.path.join(tmp_extract, f))

    # -------------------------------------------------------------------
    # 3.1 Find all relevant .asc files
    # -------------------------------------------------------------------
    asc_files = []
    for root, _, files in os.walk(tmp_extract):
        for f in files:
            if f.endswith(".asc") and not f.startswith("._"):
                asc_files.append(os.path.join(root, f))

    if not asc_files:
        print(f"⚠️ No valid .asc files found in {zip_path}")
        continue

    print(f"🗺️ Found {len(asc_files)} .asc files for {year}")

    # -------------------------------------------------------------------
    # 3.2 Process each .asc file (e.g. cropland, grazing, pasture, etc.)
    # -------------------------------------------------------------------
    for asc_path in asc_files:
        var_name = os.path.splitext(os.path.basename(asc_path))[0]
        print(f"   🧩 Processing: {var_name}")

        # -------------------------------------------------------------------
        # Open ASC and assign CRS = EPSG:4326
        # -------------------------------------------------------------------
        with rasterio.open(asc_path, driver="AAIGrid") as src:
            src_data = src.read(1)
            src_data = np.where(src_data < 0, np.nan, src_data)
            src_transform = src.transform
            src_crs = "EPSG:4326"

            # -------------------------------------------------------------------
            # Define 1 km target grid in UTM33S
            # -------------------------------------------------------------------
            target_crs = "EPSG:32733"
            target_res = 1000  # 1 km pixels

            transform, width, height = calculate_default_transform(
                src_crs, target_crs,
                src.width, src.height,
                *src.bounds,
                resolution=target_res
            )

            # -------------------------------------------------------------------
            # Reproject to 1 km UTM33S grid
            # -------------------------------------------------------------------
            reprojected = np.empty((height, width), dtype=np.float32)
            reproject(
                source=src_data,
                destination=reprojected,
                src_transform=src_transform,
                src_crs=src_crs,
                dst_transform=transform,
                dst_crs=target_crs,
                resampling=Resampling.nearest,
            )

            # -------------------------------------------------------------------
            # Write temporary file and clip to Angola
            # -------------------------------------------------------------------
            tmp_tif = os.path.join(out_dir, f"tmp_reproj_{var_name}_{year}.tif")
            meta = src.meta.copy()
            meta.update({
                "driver": "GTiff",
                "height": height,
                "width": width,
                "transform": transform,
                "crs": target_crs,
                "dtype": "float32",
                "nodata": -9999,
            })

            with rasterio.open(tmp_tif, "w", **meta) as tmp_dst:
                tmp_dst.write(reprojected, 1)

        # -------------------------------------------------------------------
        # Clip to Angola
        # -------------------------------------------------------------------
        with rasterio.open(tmp_tif) as tmp_src:
            out_image, out_transform = mask(
                tmp_src, angola_geom, crop=True, nodata=-9999
            )
            out_meta = tmp_src.meta.copy()
            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -9999,
                "compress": "lzw",
            })

        out_tif = os.path.join(out_dir, f"{var_name}_angola_{year}_1km.tif")
        with rasterio.open(out_tif, "w", **out_meta) as dest:
            dest.write(out_image)

        os.remove(tmp_tif)
        print(f"      ✅ Saved: {out_tif}")

print("\n🎉 All done — Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.")


FileNotFoundError: ❌ No .asc files found in HYDE directory

In [2]:
"""
Resume HYDE LULC processing:
Reprojects and clips only rasters that are not yet in the output folder.
"""

import os
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Paths
# -------------------------------------------------------------------
hyde_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde"
angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped"
os.makedirs(out_dir, exist_ok=True)

# -------------------------------------------------------------------
# 2. Load Angola boundary
# -------------------------------------------------------------------
angola = gpd.read_file(angola_gpkg)
angola = angola.to_crs("EPSG:32733")
angola_geom = [angola.union_all()]

# -------------------------------------------------------------------
# 3. Find input and already processed files
# -------------------------------------------------------------------
tif_files = [os.path.join(root, f)
             for root, _, files in os.walk(hyde_dir)
             for f in files if f.endswith(".tif")]

if not tif_files:
    raise FileNotFoundError("❌ No .tif files found in HYDE directory")

existing_outputs = {os.path.splitext(f)[0].replace("_angola_1km", "") 
                    for f in os.listdir(out_dir) if f.endswith(".tif")}

print(f"📁 Found {len(existing_outputs)} already processed files")

# Filter out files that are already done
to_process = []
for tif_path in tif_files:
    var_name = os.path.splitext(os.path.basename(tif_path))[0]
    if var_name not in existing_outputs:
        to_process.append(tif_path)

print(f"🗺️ Found {len(to_process)} rasters left to process")

if not to_process:
    print("✅ All rasters already processed. Nothing to do.")
else:
    print("🚀 Continuing with remaining rasters...")

# -------------------------------------------------------------------
# 4. Process remaining rasters
# -------------------------------------------------------------------
for tif_path in to_process:
    fname = os.path.basename(tif_path)
    var_name = os.path.splitext(fname)[0]
    out_tif = os.path.join(out_dir, f"{var_name}_angola_1km.tif")

    print(f"\n🧩 Processing: {var_name}")

    try:
        with rasterio.open(tif_path) as src:
            src_crs = src.crs or "EPSG:4326"
            target_crs = "EPSG:32733"
            target_res = 1000

            # Reproject
            transform, width, height = calculate_default_transform(
                src_crs, target_crs,
                src.width, src.height, *src.bounds,
                resolution=target_res
            )

            reprojected = np.empty((height, width), dtype=np.float32)
            reproject(
                source=rasterio.band(src, 1),
                destination=reprojected,
                src_transform=src.transform,
                src_crs=src_crs,
                dst_transform=transform,
                dst_crs=target_crs,
                resampling=Resampling.nearest,
            )

            # Save temporary
            meta = src.meta.copy()
            meta.update({
                "driver": "GTiff",
                "height": height,
                "width": width,
                "transform": transform,
                "crs": target_crs,
                "dtype": "float32",
                "nodata": -9999,
            })

            tmp_path = os.path.join(out_dir, f"tmp_{var_name}.tif")
            with rasterio.open(tmp_path, "w", **meta) as tmp_dst:
                tmp_dst.write(reprojected, 1)

        # Clip
        with rasterio.open(tmp_path) as tmp_src:
            out_image, out_transform = mask(
                tmp_src, angola_geom, crop=True, nodata=-9999
            )
            out_meta = tmp_src.meta.copy()
            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -9999,
                "compress": "lzw",
            })

        with rasterio.open(out_tif, "w", **out_meta) as dest:
            dest.write(out_image)

        os.remove(tmp_path)
        print(f"✅ Saved: {out_tif}")

    except Exception as e:
        print(f"❌ Error processing {var_name}: {e}")
        continue

print("\n🎉 Done resuming HYDE LULC processing.")


📁 Found 0 already processed files
🗺️ Found 98 rasters left to process
🚀 Continuing with remaining rasters...

🧩 Processing: tmp_reproj_ir_rice1940AD_1960
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/tmp_reproj_ir_rice1940AD_1960_angola_1km.tif

🧩 Processing: cropland1940AD_angola_1940_1km
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/cropland1940AD_angola_1940_1km_angola_1km.tif

🧩 Processing: ._cropland1940AD_angola_1940_1km
❌ Error processing ._cropland1940AD_angola_1940_1km: '/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde/._cropland1940AD_angola_1940_1km.tif' not recognized as being in a supported file format.

🧩 Processing: ._tmp_reproj_ir_rice1940AD_1960
❌ Error processing ._tmp_reproj_ir_rice1940AD_1960: '/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde/._tmp_reproj_ir_rice1940AD_1960.tif' not recognized as being in a supported file format.

🧩 Processing: grazing1940AD_angola_1940_1km
✅ Saved: 

In [10]:
"""
HYDE 1960 LU processing:
- Unzips only 1960 LU zip
- Reprojects and clips rasters to Angola boundary
"""

import os
import zipfile
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Paths
# -------------------------------------------------------------------
hyde_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/baseline"
zip_1960 = os.path.join(hyde_dir, "zip", "1960AD_lu.zip")
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped"
os.makedirs(out_dir, exist_ok=True)

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"

# -------------------------------------------------------------------
# 2. Load Angola boundary
# -------------------------------------------------------------------
angola = gpd.read_file(angola_gpkg)
angola = angola.to_crs("EPSG:32733")
angola_geom = [angola.union_all()]

# -------------------------------------------------------------------
# 3. Unzip 1960 LU
# -------------------------------------------------------------------
unzip_dir = os.path.join(hyde_dir, "1960_lu")
os.makedirs(unzip_dir, exist_ok=True)

with zipfile.ZipFile(zip_1960, "r") as zip_ref:
    zip_ref.extractall(unzip_dir)
    print(f"📦 Unzipped 1960 LU → {unzip_dir}")

# -------------------------------------------------------------------
# 4. Collect .tif (or .asc) files
# -------------------------------------------------------------------
raster_files = []
for root, _, files in os.walk(unzip_dir):
    for f in files:
        if f.endswith(".tif") or f.endswith(".asc"):
            if not f.startswith("._"):
                raster_files.append(os.path.join(root, f))

if not raster_files:
    raise FileNotFoundError("❌ No raster files found after unzipping 1960 LU!")

# -------------------------------------------------------------------
# 5. Already processed outputs
# -------------------------------------------------------------------
existing_outputs = {
    os.path.splitext(f)[0].replace("_angola_1km", "")
    for f in os.listdir(out_dir) if f.endswith(".tif")
}
print(f"📁 Found {len(existing_outputs)} already processed rasters")

# -------------------------------------------------------------------
# 6. Process 1960 rasters
# -------------------------------------------------------------------
for raster_path in raster_files:
    fname = os.path.basename(raster_path)
    var_name = os.path.splitext(fname)[0]
    out_tif = os.path.join(out_dir, f"{var_name}_angola_1km.tif")

    # Skip if already exists
    if os.path.exists(out_tif):
        print(f"⏩ Skipping (already processed): {var_name}")
        continue

    # Convert ASC to temporary TIF if needed
    if raster_path.endswith(".asc"):
        tmp_tif = raster_path.replace(".asc", "_tmp.tif")
        with rasterio.open(raster_path) as src:
            profile = src.meta.copy()
            profile.update(driver="GTiff")
            with rasterio.open(tmp_tif, "w", **profile) as dst:
                dst.write(src.read(1), 1)
        raster_path = tmp_tif
        print(f"🧾 Converted: {fname} → temporary GeoTIFF")

    print(f"\n🧩 Processing: {var_name}")

    try:
        with rasterio.open(raster_path) as src:
            src_crs = src.crs or "EPSG:4326"
            target_crs = "EPSG:32733"
            target_res = 1000

            # Reproject
            transform, width, height = calculate_default_transform(
                src_crs, target_crs,
                src.width, src.height, *src.bounds,
                resolution=target_res
            )

            reprojected = np.empty((height, width), dtype=np.float32)
            reproject(
                source=rasterio.band(src, 1),
                destination=reprojected,
                src_transform=src.transform,
                src_crs=src_crs,
                dst_transform=transform,
                dst_crs=target_crs,
                resampling=Resampling.nearest,
            )

            # Save temporary reprojected raster
            meta = src.meta.copy()
            meta.update({
                "driver": "GTiff",
                "height": height,
                "width": width,
                "transform": transform,
                "crs": target_crs,
                "dtype": "float32",
                "nodata": -9999,
            })
            tmp_reproj = os.path.join(out_dir, f"tmp_{var_name}.tif")
            with rasterio.open(tmp_reproj, "w", **meta) as tmp_dst:
                tmp_dst.write(reprojected, 1)

        # Clip to Angola
        with rasterio.open(tmp_reproj) as tmp_src:
            out_image, out_transform = mask(
                tmp_src, angola_geom, crop=True, nodata=-9999
            )
            out_meta = tmp_src.meta.copy()
            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -9999,
                "compress": "lzw",
            })

        with rasterio.open(out_tif, "w", **out_meta) as dest:
            dest.write(out_image)

        os.remove(tmp_reproj)
        if "_tmp.tif" in raster_path:
            os.remove(raster_path)

        print(f"✅ Saved: {out_tif}")

    except Exception as e:
        print(f"❌ Error processing {var_name}: {e}")
        continue

print("\n🎉 Done processing 1960 LU rasters!")


📦 Unzipped 1960 LU → /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/baseline/1960_lu
📁 Found 56 already processed rasters
🧾 Converted: cropland1960AD.asc → temporary GeoTIFF

🧩 Processing: cropland1960AD
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/cropland1960AD_angola_1km.tif
🧾 Converted: grazing1960AD.asc → temporary GeoTIFF

🧩 Processing: grazing1960AD
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/grazing1960AD_angola_1km.tif
🧾 Converted: pasture1960AD.asc → temporary GeoTIFF

🧩 Processing: pasture1960AD
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/pasture1960AD_angola_1km.tif
🧾 Converted: rangeland1960AD.asc → temporary GeoTIFF

🧩 Processing: rangeland1960AD
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/rangeland1960AD_angola_1km.tif
🧾 Converted: conv_rangeland1960AD.asc → temporary GeoTIFF

🧩 Processing: conv_rangeland1960AD
✅ Saved: /Volumes/O

In [11]:
## clleaning hyde lulc NAMES
import os
import re

base_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped"

for f in os.listdir(base_dir):
    if not f.endswith(".tif") or f.startswith("._"):
        continue

    match = re.search(r'(\d{4})AD', f)
    if not match:
        continue
    true_year = match.group(1)

    after_angola = re.search(r'angola_(\d{4})', f)
    if after_angola:
        existing_year = after_angola.group(1)
        if existing_year == true_year:
            print(f"⏩ Skipping (already correct): {f}")
            continue
    else:
        existing_year = None

    if existing_year:
        # ✅ Safe regex replacement using lambda
        new_name = re.sub(
            r'(angola_)\d{4}',
            lambda m: f"{m.group(1)}{true_year}",
            f
        )
    else:
        new_name = f.replace("_angola", f"_angola_{true_year}")

    old_path = os.path.join(base_dir, f)
    new_path = os.path.join(base_dir, new_name)

    if old_path != new_path:
        os.rename(old_path, new_path)
        print(f"✅ Renamed: {f} → {new_name}")

print("\n🎉 All filenames cleaned and synced to correct HYDE year.")



⏩ Skipping (already correct): tmp_reproj_ir_rice1940AD_1960_angola_1940_1km.tif
⏩ Skipping (already correct): cropland1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): grazing1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): pasture1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): rangeland1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): conv_rangeland1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): rf_rice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): ir_rice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): rf_norice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): ir_norice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): tot_rice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): tot_rainfed1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): tot_irri1940AD_angola_1940_1km_angola_1km.tif


## Add Final terrain covariates from saga

In [12]:
import os
import glob
import processing
from qgis.core import QgsApplication

# -------------------------------------------------------
# OPTIONAL: initialize QGIS if running standalone (not inside QGIS)
QgsApplication.setPrefixPath("/usr", True)
qgs = QgsApplication([], False)
qgs.initQgis()
# -------------------------------------------------------

# === Paths ===
dem_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/DEM_tiles/"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/terrain_covariates/"
os.makedirs(out_dir, exist_ok=True)

# === DEM tile list ===
dem_tiles = sorted(glob.glob(os.path.join(dem_dir, "*.tif")))
print(f"Found {len(dem_tiles)} DEM tiles")

# === Loop over tiles ===
for tile in dem_tiles:
    basename = os.path.splitext(os.path.basename(tile))[0]
    tile_out = os.path.join(out_dir, basename)
    os.makedirs(tile_out, exist_ok=True)

    print(f"\n🔹 Processing tile: {basename}")

    # --- 1. Slope, Aspect, Curvatures ---
    processing.run("saga:slopeaspectcurvature", {
        'ELEVATION': tile,
        'SLOPE': f'{tile_out}/{basename}_slope.tif',
        'ASPECT': f'{tile_out}/{basename}_aspect.tif',
        'C_GENE': f'{tile_out}/{basename}_curv_general.tif',
        'C_PROF': f'{tile_out}/{basename}_curv_profile.tif',
        'C_PLAN': f'{tile_out}/{basename}_curv_plan.tif',
        'C_TOTA': f'{tile_out}/{basename}_curv_total.tif',
        'C_MAXI': f'{tile_out}/{basename}_curv_max.tif',
        'C_MINI': f'{tile_out}/{basename}_curv_min.tif',
        'C_LONG': f'{tile_out}/{basename}_curv_flowline.tif'
    })

    # --- 2. Hillshade ---
    processing.run("saga:hillshading", {
        'ELEVATION': tile,
        'SHADE': f'{tile_out}/{basename}_hillshade.tif'
    })

    # --- 3. Mid-slope position ---
    processing.run("saga:terrainpositionindex", {
        'DEM': tile,
        'TPI': f'{tile_out}/{basename}_mid_slope_position.tif'
    })

    # --- 4. Normalized height ---
    processing.run("saga:normalizedheights", {
        'DEM': tile,
        'HEIGHT': f'{tile_out}/{basename}_normalized_height.tif'
    })

    # --- 5. Slope height ---
    processing.run("saga:slopeheight", {
        'DEM': tile,
        'SLOPE': f'{tile_out}/{basename}_slope_height.tif'
    })

    # --- 6. Standardized height ---
    processing.run("saga:standardizedheight", {
        'DEM': tile,
        'STDHGT': f'{tile_out}/{basename}_standardized_height.tif'
    })

    # --- 7. Terrain surface convexity ---
    processing.run("saga:terrainsurfaceconvexity", {
        'DEM': tile,
        'CONVEXITY': f'{tile_out}/{basename}_terrain_surface_convexity.tif'
    })

    # --- 8. Terrain surface texture ---
    processing.run("saga:terrainsurfacetexture", {
        'DEM': tile,
        'TEXTURE': f'{tile_out}/{basename}_terrain_surface_texture.tif'
    })

    # --- 9. Valley depth ---
    processing.run("saga:valleydepth", {
        'DEM': tile,
        'VALLEY_DEPTH': f'{tile_out}/{basename}_valley_depth.tif'
    })

    # --- 10. Valley bottom flatness ---
    processing.run("saga:valleybottomflatness", {
        'DEM': tile,
        'FLATNESS': f'{tile_out}/{basename}_valley_flatness.tif'
    })

    print(f"✅ Done with tile: {basename}")

# -------------------------------------------------------
# OPTIONAL: clean up QGIS if running standalone
qgs.exitQgis()
# -------------------------------------------------------


ModuleNotFoundError: No module named 'processing'

In [None]:
import os
import glob
import processing
from qgis.core import QgsApplication

# -------------------------------------------------------
# OPTIONAL: initialize QGIS if running standalone (not inside QGIS)
QgsApplication.setPrefixPath("/usr", True)
qgs = QgsApplication([], False)
qgs.initQgis()
# -------------------------------------------------------

# === Paths ===
dem_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/DEM_tiles/"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/terrain_covariates/"
os.makedirs(out_dir, exist_ok=True)

# === DEM tile list ===
dem_tiles = sorted(glob.glob(os.path.join(dem_dir, "*.tif")))
print(f"Found {len(dem_tiles)} DEM tiles")

# === Loop over tiles ===
for tile in dem_tiles:
    basename = os.path.splitext(os.path.basename(tile))[0]
    tile_out = os.path.join(out_dir, basename)
    os.makedirs(tile_out, exist_ok=True)

    print(f"\n🔹 Processing tile: {basename}")

    # --- 1. Slope, Aspect, Curvatures ---
    processing.run("saga:slopeaspectcurvature", {
        'ELEVATION': tile,
        'SLOPE': f'{tile_out}/{basename}_slope.tif',
        'ASPECT': f'{tile_out}/{basename}_aspect.tif',
        'C_GENE': f'{tile_out}/{basename}_curv_general.tif',
        'C_PROF': f'{tile_out}/{basename}_curv_profile.tif',
        'C_PLAN': f'{tile_out}/{basename}_curv_plan.tif',
        'C_TOTA': f'{tile_out}/{basename}_curv_total.tif',
        'C_MAXI': f'{tile_out}/{basename}_curv_max.tif',
        'C_MINI': f'{tile_out}/{basename}_curv_min.tif',
        'C_LONG': f'{tile_out}/{basename}_curv_flowline.tif'
    })

    # --- 2. Hillshade ---
    processing.run("saga:hillshading", {
        'ELEVATION': tile,
        'SHADE': f'{tile_out}/{basename}_hillshade.tif'
    })

    # --- 3. Mid-slope position ---
    processing.run("saga:terrainpositionindex", {
        'DEM': tile,
        'TPI': f'{tile_out}/{basename}_mid_slope_position.tif'
    })

    # --- 4. Normalized height ---
    processing.run("saga:normalizedheights", {
        'DEM': tile,
        'HEIGHT': f'{tile_out}/{basename}_normalized_height.tif'
    })

    # --- 5. Slope height ---
    processing.run("saga:slopeheight", {
        'DEM': tile,
        'SLOPE': f'{tile_out}/{basename}_slope_height.tif'
    })

    # --- 6. Standardized height ---
    processing.run("saga:standardizedheight", {
        'DEM': tile,
        'STDHGT': f'{tile_out}/{basename}_standardized_height.tif'
    })

    # --- 7. Terrain surface convexity ---
    processing.run("saga:terrainsurfaceconvexity", {
        'DEM': tile,
        'CONVEXITY': f'{tile_out}/{basename}_terrain_surface_convexity.tif'
    })

    # --- 8. Terrain surface texture ---
    processing.run("saga:terrainsurfacetexture", {
        'DEM': tile,
        'TEXTURE': f'{tile_out}/{basename}_terrain_surface_texture.tif'
    })

    # --- 9. Valley depth ---
    processing.run("saga:valleydepth", {
        'DEM': tile,
        'VALLEY_DEPTH': f'{tile_out}/{basename}_valley_depth.tif'
    })

    # --- 10. Valley bottom flatness ---
    processing.run("saga:valleybottomflatness", {
        'DEM': tile,
        'FLATNESS': f'{tile_out}/{basename}_valley_flatness.tif'
    })

    print(f"✅ Done with tile: {basename}")

# -------------------------------------------------------
# OPTIONAL: clean up QGIS if running standalone
qgs.exitQgis()
# -------------------------------------------------------
