## anthromes for estimating Land Use Land Cover from 1940-70 in angola

source: HYDE database

In [8]:
"""
Reproject HYDE Anthrome .ASC files (5 arc-min) to EPSG:32733 (1 km)
and clip to Angola boundary.
"""

import os
import zipfile
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Paths and setup
# -------------------------------------------------------------------
hyde_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/anthromes/zip"
tmp_extract = os.path.join(hyde_dir, "unzipped")
os.makedirs(tmp_extract, exist_ok=True)

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes"
os.makedirs(out_dir, exist_ok=True)

years = [1940, 1950, 1960, 1970]

# -------------------------------------------------------------------
# 2. Load Angola boundary
# -------------------------------------------------------------------
angola = gpd.read_file(angola_gpkg)
angola = angola.to_crs("EPSG:32733")
angola_geom = [angola.union_all()]  # for rasterio.mask

# -------------------------------------------------------------------
# 3. Process each HYDE Anthrome file
# -------------------------------------------------------------------
for year in years:
    zip_path = os.path.join(hyde_dir, f"{year}AD_anthromes.zip")
    if not os.path.exists(zip_path):
        print(f"‚ö†Ô∏è Missing ZIP for {year}: {zip_path}")
        continue

    print(f"üìÇ Extracting {year}...")

    # Extract zip
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(tmp_extract)

    # Find the ASC file inside the extracted folder
    asc_path = None
    for root, _, files in os.walk(tmp_extract):
        for f in files:
            if f.endswith(".asc"):
                asc_path = os.path.join(root, f)
                break
        if asc_path:
            break

    if not asc_path:
        print(f"‚ö†Ô∏è No .asc file found in {zip_path}")
        continue

    print(f"üìÑ Found ASC: {os.path.basename(asc_path)}")

    # -------------------------------------------------------------------
    # 3.1 Open ASC and assign CRS = EPSG:4326
    # -------------------------------------------------------------------
    with rasterio.open(asc_path, driver="AAIGrid") as src:
        src_data = src.read(1)
        src_data = np.where(src_data < 0, np.nan, src_data)  # mask negatives
        src_transform = src.transform
        src_crs = "EPSG:4326"

        # -------------------------------------------------------------------
        # 3.2 Define 1 km resolution target grid in UTM33S
        # -------------------------------------------------------------------
        target_crs = "EPSG:32733"
        target_res = 1000  # 1 km pixels

        transform, width, height = calculate_default_transform(
            src_crs, target_crs,
            src.width, src.height,
            *src.bounds,
            resolution=target_res
        )

        # -------------------------------------------------------------------
        # 3.3 Reproject to 1 km UTM33S grid
        # -------------------------------------------------------------------
        reprojected = np.empty((height, width), dtype=np.float32)
        reproject(
            source=src_data,
            destination=reprojected,
            src_transform=src_transform,
            src_crs=src_crs,
            dst_transform=transform,
            dst_crs=target_crs,
            resampling=Resampling.nearest,
        )

        # -------------------------------------------------------------------
        # 3.4 Write temporary file and clip to Angola
        # -------------------------------------------------------------------
        tmp_tif = os.path.join(out_dir, f"tmp_reproj_{year}.tif")
        meta = src.meta.copy()
        meta.update({
            "driver": "GTiff",
            "height": height,
            "width": width,
            "transform": transform,
            "crs": target_crs,
            "dtype": "float32",
            "nodata": -9999,
        })

        with rasterio.open(tmp_tif, "w", **meta) as tmp_dst:
            tmp_dst.write(reprojected, 1)

        with rasterio.open(tmp_tif) as tmp_src:
            out_image, out_transform = mask(
                tmp_src, angola_geom, crop=True, nodata=-9999
            )
            out_meta = tmp_src.meta.copy()
            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -9999,
                "compress": "lzw",
            })

        out_tif = os.path.join(out_dir, f"anthromes_angola_{year}_1km.tif")
        with rasterio.open(out_tif, "w", **out_meta) as dest:
            dest.write(out_image)

        os.remove(tmp_tif)

        print(f"‚úÖ Saved: {out_tif}")

print("üéâ All done ‚Äî Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.")


üìÇ Extracting 1940...
üìÑ Found ASC: anthromes1940AD.asc
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1940_1km.tif
üìÇ Extracting 1950...
üìÑ Found ASC: anthromes1940AD.asc
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1950_1km.tif
üìÇ Extracting 1960...
üìÑ Found ASC: anthromes1940AD.asc
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1960_1km.tif
üìÇ Extracting 1970...
üìÑ Found ASC: anthromes1940AD.asc
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1970_1km.tif
üéâ All done ‚Äî Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.


In [9]:
## look up table for later

import pandas as pd

anthrome_df = pd.DataFrame([
    (11, "Urban"),
    (12, "Dense settlements"),
    (21, "Village, Rice"),
    (22, "Village, Irrigated"),
    (23, "Village, Rainfed"),
    (24, "Village, Pastoral"),
    (31, "Croplands, residential irrigated"),
    (32, "Croplands, residential rainfed"),
    (33, "Croplands, populated"),
    (34, "Croplands, pastoral"),
    (41, "Rangeland, residential"),
    (42, "Rangeland, populated"),
    (43, "Rangeland, remote"),
    (51, "Semi-natural woodlands, residential"),
    (52, "Semi-natural woodlands, populated"),
    (53, "Semi-natural woodlands, remote"),
    (54, "Semi-natural treeless and barren lands"),
    (61, "Wild, remote - woodlands"),
    (62, "Wild, remote - treeless & barren"),
    (63, "Wild, remote - ice"),
    (70, "No definition")
], columns=["anthrome_code", "anthrome_class"])


## LULC 


Features of this script:

- Auto-unzips missing 1940, 1950, 1960 LULC files into  LULC_hyde_clipped folder.

- Checks the folder for .tif files and counts them per year.

- Cleans filenames to match angola_<year> convention safely.

- Skips already-correct files to avoid unnecessary renaming.

In [2]:
import os
import zipfile
import re

# -----------------------------
# Paths
# -----------------------------
zip_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/baseline/zip"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde"

os.makedirs(out_dir, exist_ok=True)

years = ["1940", "1950", "1960"]
expected_zips = [f"{y}AD_lu.zip" for y in years]

# -----------------------------
# 1. Verify and unzip missing files
# -----------------------------
for zip_name, year in zip(expected_zips, years):
    zip_path = os.path.join(zip_dir, zip_name)

    if not os.path.exists(zip_path):
        print(f"‚ùå Missing zip: {zip_path}")
        continue

    # Check if this year's files are already extracted
    year_tifs = [f for f in os.listdir(out_dir) if f"{year}AD" in f and f.endswith(".tif")]
    if year_tifs:
        print(f"‚úÖ {year} files already extracted: {len(year_tifs)} .tifs found")
        continue

    # Extract zip
    print(f"üì¶ Unzipping {zip_name} ...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(out_dir)
    print(f"‚úÖ Extracted {zip_name} into {out_dir}")

üì¶ Unzipping 1940AD_lu.zip ...
‚úÖ Extracted 1940AD_lu.zip into /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde
üì¶ Unzipping 1950AD_lu.zip ...
‚úÖ Extracted 1950AD_lu.zip into /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde
üì¶ Unzipping 1960AD_lu.zip ...
‚úÖ Extracted 1960AD_lu.zip into /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde


FileNotFoundError: ‚ùå No .tif files found in /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde

In [5]:
import os
import numpy as np
import rasterio
from rasterio.transform import from_origin
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# ----------------------------
# Paths
# ----------------------------
asc_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde/"
mask_shp = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/angola_soiltype_rectangle.shp"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/LULC_hyde_clipped_tif/"
os.makedirs(out_dir, exist_ok=True)

# ----------------------------
# Load Angola shapefile
# ----------------------------
angola = gpd.read_file(mask_shp)
if angola.crs != "EPSG:4326":
    angola = angola.to_crs("EPSG:4326")
angola_geom = [angola.unary_union]

# ----------------------------
# Process each ASC file
# ----------------------------
for asc_file in os.listdir(asc_dir):
    if not asc_file.lower().endswith(".asc"):
        continue
    if asc_file.startswith("._"):
        continue

    asc_path = os.path.join(asc_dir, asc_file)
    base_name = os.path.splitext(asc_file)[0]
    print(f"\nüìÇ Processing {asc_file} ...")

    # Safety check for valid ASCII file
    with open(asc_path, "rb") as test_f:
        first_bytes = test_f.read(100)
        if b"ncols" not in first_bytes:
            print(f"‚ö†Ô∏è Skipping non-ASCII file: {asc_file}")
            continue

    # 1Ô∏è‚É£ Read header + data
    with open(asc_path, "r", encoding="latin1", errors="ignore") as f:
        header = {}
        for _ in range(6):
            key, value = f.readline().strip().split(None, 1)
            header[key.lower()] = float(value)
        ncols = int(header["ncols"])
        nrows = int(header["nrows"])
        xllcorner = header["xllcorner"]
        yllcorner = header["yllcorner"]
        cellsize = header["cellsize"]
        nodata = header.get("nodata_value", -9999)

        data = np.loadtxt(f, dtype=np.float32)
        data[data == nodata] = np.nan

    # 2Ô∏è‚É£ Define transform (upper-left origin)
    transform = from_origin(xllcorner, yllcorner + nrows * cellsize, cellsize, cellsize)

    # 3Ô∏è‚É£ Save temporary GeoTIFF (WGS84)
    tmp_tif = os.path.join(out_dir, f"{base_name}_wgs84.tif")
    with rasterio.open(
        tmp_tif, "w",
        driver="GTiff",
        height=nrows, width=ncols,
        count=1, dtype="float32",
        crs="EPSG:4326", transform=transform,
        nodata=-9999
    ) as dst:
        dst.write(np.nan_to_num(data, nan=-9999), 1)

    # 4Ô∏è‚É£ Clip by Angola rectangle
    with rasterio.open(tmp_tif) as src:
        out_image, out_transform = mask(src, angola_geom, crop=True, nodata=-9999)
        out_meta = src.meta.copy()
        out_meta.update({
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
            "crs": "EPSG:4326",
            "nodata": -9999,
            "compress": "lzw"
        })

    clipped_tif = os.path.join(out_dir, f"{base_name}_clipped_wgs84.tif")
    with rasterio.open(clipped_tif, "w", **out_meta) as dest:
        dest.write(out_image)

    # 5Ô∏è‚É£ Reproject + resample to EPSG:32733 @ 1 km
    final_tif = os.path.join(out_dir, f"{base_name}_utm33s_1km.tif")
    dst_crs = "EPSG:32733"
    target_res = (1000, 1000)   # metres per pixel

    with rasterio.open(clipped_tif) as src:
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs,
            src.width, src.height,
            *src.bounds,
            dst_resolution=target_res
        )
        kwargs = src.meta.copy()
        kwargs.update({
            "crs": dst_crs,
            "transform": transform,
            "width": width,
            "height": height,
            "nodata": -9999,
            "compress": "lzw"
        })

        with rasterio.open(final_tif, "w", **kwargs) as dst:
            reproject(
                source=rasterio.band(src, 1),
                destination=rasterio.band(dst, 1),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=Resampling.nearest  # keep classes intact
            )

    # 6Ô∏è‚É£ Clean temporary files
    os.remove(tmp_tif)
    os.remove(clipped_tif)
    print(f"‚úÖ Saved 1 km UTM33S GeoTIFF: {final_tif}")

print("\nüéâ All ASC files converted, clipped, and resampled to 1 km in UTM 33S!")

  angola_geom = [angola.unary_union]



üìÇ Processing cropland1940AD.asc ...
‚úÖ Saved 1 km UTM33S GeoTIFF: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/LULC_hyde_clipped_tif/cropland1940AD_utm33s_1km.tif

üìÇ Processing grazing1940AD.asc ...
‚úÖ Saved 1 km UTM33S GeoTIFF: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/LULC_hyde_clipped_tif/grazing1940AD_utm33s_1km.tif

üìÇ Processing pasture1940AD.asc ...
‚úÖ Saved 1 km UTM33S GeoTIFF: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/LULC_hyde_clipped_tif/pasture1940AD_utm33s_1km.tif

üìÇ Processing rangeland1940AD.asc ...
‚úÖ Saved 1 km UTM33S GeoTIFF: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/LULC_hyde_clipped_tif/rangeland1940AD_utm33s_1km.tif

üìÇ Processing conv_rangeland1940AD.asc ...
‚úÖ Saved 1 km UTM33S GeoTIFF: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/LULC_hyde_clipped_tif/conv_rangeland1940AD_utm33s_1km.tif

üìÇ Processing rf_rice1940AD.asc ...
‚úÖ Saved 1 km UTM33S GeoTIFF: /Volu

## resample to 1km (didn't work before) and convert to percent of landuse type covered in pixel

In [1]:
import os
import glob
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

# -------------------------------------------------------------
# 1) Paths
# -------------------------------------------------------------
input_dir = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_clipped_tif"
resampled_dir = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_resampled_1km"
percent_dir   = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km"

os.makedirs(resampled_dir, exist_ok=True)
os.makedirs(percent_dir, exist_ok=True)

# -------------------------------------------------------------
# HYDE pixel area (km¬≤)
# -------------------------------------------------------------
HYDE_CELL_AREA_KM2 = (9178.868534656302**2) / 1_000_000.0  # ‚âà 84.25 km¬≤

# -------------------------------------------------------------
# Loop through HYDE rasters
# -------------------------------------------------------------
rasters = sorted(glob.glob(os.path.join(input_dir, "*.tif")))

for src_path in rasters:
    fname = os.path.basename(src_path)
    print(f"\n=== Processing {fname} ===")

    with rasterio.open(src_path) as src:
        transform = src.transform
        crs = src.crs
        nodata = src.nodata

        # ---------------------------------------
        # Load HYDE km¬≤ data
        # ---------------------------------------
        arr = src.read(1).astype("float32")

        # Mask nodata
        valid_mask = arr != nodata if nodata is not None else np.ones_like(arr, dtype=bool)

        # ---------------------------------------
        # Convert HYDE km¬≤ -> fractional cover (0‚Äì1)
        # ---------------------------------------
        fraction = np.full_like(arr, nodata if nodata is not None else np.nan, dtype="float32")
        fraction[valid_mask] = arr[valid_mask] / HYDE_CELL_AREA_KM2

        # Clip fractions to 0‚Äì1 just to be safe
        fraction = np.clip(fraction, 0, 1)

        # ---------------------------------------
        # Prepare 1 km resampling grid
        # ---------------------------------------
        new_res = 1000  # 1 km
        dst_transform, width, height = calculate_default_transform(
            crs, crs, src.width, src.height, *src.bounds, resolution=new_res
        )

        profile = src.profile.copy()
        profile.update({
            "transform": dst_transform,
            "height": height,
            "width": width,
            "nodata": nodata,
            "dtype": "float32",
            "compress": "lzw"
        })

        # ---------------------------------------
        # RESAMPLE fractional cover to 1 km
        # ---------------------------------------
        fraction_resampled = np.zeros((height, width), dtype="float32")

        reproject(
            source=fraction,
            destination=fraction_resampled,
            src_transform=transform,
            src_crs=crs,
            dst_transform=dst_transform,
            dst_crs=crs,
            resampling=Resampling.bilinear,
        )

        # Save fractional raster
        resampled_path = os.path.join(resampled_dir, fname.replace(".tif", "_fraction_1km.tif"))
        with rasterio.open(resampled_path, "w", **profile) as dst:
            dst.write(fraction_resampled, 1)

        print(f"  ‚Üí Saved 1 km fractional raster: {resampled_path}")

        # ---------------------------------------
        # Convert to percent cover (0‚Äì100)
        # ---------------------------------------
        percent = (fraction_resampled * 100).astype("float32")

        # Save percent raster
        percent_path = os.path.join(percent_dir, fname.replace(".tif", "_percent_1km.tif"))
        with rasterio.open(percent_path, "w", **profile) as dst:
            dst.write(percent, 1)

        print(f"  ‚Üí Saved 1 km percent raster: {percent_path}")

print("\nüéâ Finished correct resampling + percent conversion of HYDE rasters.")



=== Processing conv_rangeland1940AD_utm33s_1km.tif ===
  ‚Üí Saved 1 km fractional raster: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_resampled_1km/conv_rangeland1940AD_utm33s_1km_fraction_1km.tif
  ‚Üí Saved 1 km percent raster: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/conv_rangeland1940AD_utm33s_1km_percent_1km.tif

=== Processing conv_rangeland1950AD_utm33s_1km.tif ===
  ‚Üí Saved 1 km fractional raster: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_resampled_1km/conv_rangeland1950AD_utm33s_1km_fraction_1km.tif
  ‚Üí Saved 1 km percent raster: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/conv_rangeland1950AD_utm33s_1km_percent_1km.tif

=== Processing conv_rangeland1960AD_utm33s_1km.tif ===
  ‚Üí Saved 1 km fractional raster: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/

### Average multi-decade percent rasters per land-use type

In [2]:
import os
import glob
import re
import numpy as np
import rasterio

# -------------------------------------------------------------
# 1) Paths
# -------------------------------------------------------------
percent_dir = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km"
output_dir  = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km_averaged"

os.makedirs(output_dir, exist_ok=True)

# -------------------------------------------------------------
# 2) Load all percent rasters
# -------------------------------------------------------------
rasters = sorted(glob.glob(os.path.join(percent_dir, "*percent_1km.tif")))

if not rasters:
    raise RuntimeError("No percent rasters found! Check file path or naming pattern.")

# -------------------------------------------------------------
# 3) Group files by land-use type
# e.g., cropland1960AD ‚Üí group "cropland"
# -------------------------------------------------------------
groups = {}

for r in rasters:
    fname = os.path.basename(r)
    
    # extract land-use type (non-digit prefix)
    # e.g., cropland1960AD_utm33s_1km_percent_1km.tif ‚Üí 'cropland'
    match = re.match(r"([a-zA-Z_]+)\d{4}", fname)
    if match:
        lutype = match.group(1)
        groups.setdefault(lutype, []).append(r)

# -------------------------------------------------------------
# 4) Average each group (each land-use type)
# -------------------------------------------------------------
for lutype, file_list in groups.items():
    print(f"\n=== Averaging decades for: {lutype} ===")
    print("\n".join([f"  {f}" for f in file_list]))

    arrays = []
    profile = None
    nodata = None

    # read all rasters of this land-use type
    for f in file_list:
        with rasterio.open(f) as src:
            arr = src.read(1).astype("float32")
            nodata = src.nodata
            profile = src.profile
            
            arrays.append(arr)

    # stack shape: (num_decades, rows, cols)
    stack = np.stack(arrays)

    # create mask: True where ALL decades are nodata
    mask_all_nodata = np.all(stack == nodata, axis=0)

    # compute mean ignoring nodata pixels
    # set nodata to NaN temporarily for averaging
    temp = np.where(stack == nodata, np.nan, stack)
    avg = np.nanmean(temp, axis=0)

    # restore nodata where appropriate
    avg[mask_all_nodata] = nodata if nodata is not None else np.nan

    # ---------------------------------------------------------
    # Save output raster
    # ---------------------------------------------------------
    out_path = os.path.join(output_dir, f"{lutype}_avg1940_1960_percent_1km.tif")

    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(avg.astype("float32"), 1)

    print(f"  ‚Üí Saved averaged raster: {out_path}")

print("\nüéâ Finished averaging HYDE percent rasters by land-use type!")



=== Averaging decades for: conv_rangeland ===
  /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/conv_rangeland1940AD_utm33s_1km_percent_1km.tif
  /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/conv_rangeland1950AD_utm33s_1km_percent_1km.tif
  /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/conv_rangeland1960AD_utm33s_1km_percent_1km.tif
  ‚Üí Saved averaged raster: /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km_averaged/conv_rangeland_avg1940_1960_percent_1km.tif

=== Averaging decades for: cropland ===
  /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/cropland1940AD_utm33s_1km_percent_1km.tif
  /Volumes/One_Touch/angola_soils_thesis/gis_features_updated/intermediate_data/LULC_hyde_percent_1km/cropland1950AD_utm33s_1km_percent_

## old converstion to landuse percent cover (didn't use in final version)

In [11]:
import os
import re
import numpy as np
import pandas as pd
import rasterio

# ----------------------------
# Paths
# ----------------------------
lu_dir = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/aligned_1km_rasterio/landuse"
out_csv = os.path.join(lu_dir, "hyde_landuse_percent_cover.csv")

# ----------------------------
# Helper: extract year and class
# ----------------------------
def parse_filename(filename):
    # Match pattern like 'grazing1950.tif' or 'cropland1960AD.tif'
    match = re.match(r"([a-zA-Z_]+)(\d{4})(?:AD)?\.tif$", filename)
    if not match:
        return None, None
    land_class, year = match.groups()
    land_class = land_class.strip("_").lower()
    year = int(year)
    return land_class, year

# ----------------------------
# Loop through raster files
# ----------------------------
records = []
for f in sorted(os.listdir(lu_dir)):
    if not f.endswith(".tif") or f.startswith("._"):
        continue

    land_class, year = parse_filename(f)
    if land_class is None or year is None:
        print(f"‚ö†Ô∏è Skipping unrecognized file name: {f}")
        continue

    path = os.path.join(lu_dir, f)
    print(f"Processing {f} ...")

    with rasterio.open(path) as src:
        data = src.read(1).astype(np.float32)
        nodata = src.nodata
        if nodata is not None:
            data[data == nodata] = np.nan

    # Skip completely empty rasters
    valid = np.isfinite(data)
    if not np.any(valid):
        print(f"‚ö†Ô∏è No valid data in {f}, skipping.")
        continue

    # ----------------------------
    # Auto-detect scale (0‚Äì1 vs 0‚Äì100)
    # ----------------------------
    dmin, dmax = np.nanmin(data), np.nanmax(data)
    mean_val = np.nanmean(data)

    if dmax <= 1.0:
        # fractional cover, convert to percent
        percent_cover = mean_val * 100.0
    elif dmax > 1.0 and dmax <= 100.0:
        # already in percent
        percent_cover = mean_val
    else:
        # unexpected values (e.g., 0‚Äì10000)
        percent_cover = mean_val / 100.0
        print(f"‚ö†Ô∏è Warning: {f} has unusually large values (max={dmax:.2f}), scaled down.")

    records.append({
        "year": year,
        "land_use_class": land_class,
        "percent_cover": round(percent_cover, 4)
    })

# ----------------------------
# Save results
# ----------------------------
if records:
    df = pd.DataFrame(records).sort_values(["year", "land_use_class"])
    df.to_csv(out_csv, index=False)
    print("\n‚úÖ Percent cover summary saved to:")
    print(out_csv)
    print(df.head())
else:
    print("‚ùå No valid raster files processed.")


Processing cropland1950AD.tif ...
Processing cropland1960AD.tif ...
Processing grazing1950AD.tif ...
Processing grazing1960AD.tif ...
Processing pasture1950AD.tif ...
Processing pasture1960AD.tif ...
Processing rangeland1950AD.tif ...
Processing rangeland1960AD.tif ...
Processing total_rainfed1950AD.tif ...
Processing total_rainfed1960AD.tif ...

‚úÖ Percent cover summary saved to:
/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/aligned_1km_rasterio/landuse/hyde_landuse_percent_cover.csv
   year land_use_class  percent_cover
0  1950       cropland     -16.399099
2  1950        grazing      13.523300
4  1950        pasture     -12.931000
6  1950      rangeland       8.175000
8  1950  total_rainfed     -16.417400


In [12]:
import os
import re
import numpy as np
import rasterio
from rasterio import shutil as rio_shutil
import pandas as pd

# ----------------------------
# Paths
# ----------------------------
lu_dir = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/aligned_1km_rasterio/landuse"
out_dir = os.path.join(lu_dir, "hyde_lu_percent")
os.makedirs(out_dir, exist_ok=True)

out_csv = os.path.join(out_dir, "hyde_landuse_percent_summary.csv")

# ----------------------------
# Helper: extract year and class
# ----------------------------
def parse_filename(filename):
    match = re.match(r"([a-zA-Z_]+)(\d{4})(?:AD)?\.tif$", filename)
    if not match:
        return None, None
    land_class, year = match.groups()
    land_class = land_class.strip("_").lower()
    year = int(year)
    return land_class, year

# ----------------------------
# Loop through raster files
# ----------------------------
records = []
for f in sorted(os.listdir(lu_dir)):
    if not f.endswith(".tif") or f.startswith("._"):
        continue

    land_class, year = parse_filename(f)
    if land_class is None or year is None:
        print(f"‚ö†Ô∏è Skipping unrecognized file name: {f}")
        continue

    path = os.path.join(lu_dir, f)
    print(f"Processing {f} ...")

    with rasterio.open(path) as src:
        profile = src.profile
        data = src.read(1).astype(np.float32)
        nodata = src.nodata
        if nodata is not None:
            data[data == nodata] = np.nan

    if not np.any(np.isfinite(data)):
        print(f"‚ö†Ô∏è No valid data in {f}, skipping.")
        continue

    # Detect scale and convert to percent
    dmax = np.nanmax(data)
    if dmax <= 1.0:
        percent_data = data * 100.0
    elif 1.0 < dmax <= 100.0:
        percent_data = data
    else:
        percent_data = data / 100.0
        print(f"‚ö†Ô∏è {f} scaled down (max={dmax:.2f})")

    # Write new raster
    out_path = os.path.join(out_dir, f"{land_class}_{year}_percent.tif")
    new_profile = profile.copy()
    new_profile.update(dtype=rasterio.float32, nodata=np.nan)
    with rasterio.open(out_path, "w", **new_profile) as dst:
        dst.write(percent_data.astype(np.float32), 1)

    # Add record for summary
    records.append({
        "year": year,
        "land_use_class": land_class,
        "mean_percent": float(np.nanmean(percent_data)),
        "min": float(np.nanmin(percent_data)),
        "max": float(np.nanmax(percent_data))
    })

# ----------------------------
# Save summary CSV
# ----------------------------
if records:
    df = pd.DataFrame(records).sort_values(["year", "land_use_class"])
    df.to_csv(out_csv, index=False)
    print("\n‚úÖ Percent cover rasters saved to:")
    print(out_dir)
    print("‚úÖ Summary table saved to:")
    print(out_csv)
    print(df.head())
else:
    print("‚ùå No valid rasters processed.")


Processing cropland1950AD.tif ...
Processing cropland1960AD.tif ...
Processing grazing1950AD.tif ...
Processing grazing1960AD.tif ...
Processing pasture1950AD.tif ...
Processing pasture1960AD.tif ...
Processing rangeland1950AD.tif ...
Processing rangeland1960AD.tif ...
Processing total_rainfed1950AD.tif ...
Processing total_rainfed1960AD.tif ...

‚úÖ Percent cover rasters saved to:
/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/aligned_1km_rasterio/landuse/hyde_lu_percent
‚úÖ Summary table saved to:
/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/aligned_1km_rasterio/landuse/hyde_lu_percent/hyde_landuse_percent_summary.csv
   year land_use_class  mean_percent          min        max
0  1950       cropland    -16.399130 -9921.839844  21.740070
2  1950        grazing     13.523286 -9921.330078  69.395355
4  1950        pasture    -12.930983 -9921.839844  63.184620
6  1950      rangeland      8.174992 -9921.330078  69.395355
8  1950  

In [13]:
import rasterio
import numpy as np

path = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/aligned_1km_rasterio/landuse/hyde_lu_percent/grazing_1950_percent.tif"
with rasterio.open(path) as src:
    data = src.read(1).astype(np.float32)
    data[data == src.nodata] = np.nan
    print("Mean:", np.nanmean(data))
    print("Max:", np.nanmax(data))


Mean: 13.523286
Max: 69.395355


In [17]:
import rasterio
import numpy as np

path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu/grazing1950.tif"
with rasterio.open(path) as src:
    data = src.read(1).astype(np.float32)
    data[data == src.nodata] = np.nan
    print("Mean:", np.nanmean(data))
    print("Max:", np.nanmax(data))


Mean: 31.874432
Max: 69.62984
