## anthromes for estimating Land Use Land Cover from 1940-70 in angola

source: HYDE database

In [8]:
"""
Reproject HYDE Anthrome .ASC files (5 arc-min) to EPSG:32733 (1 km)
and clip to Angola boundary.
"""

import os
import zipfile
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# -------------------------------------------------------------------
# 1. Paths and setup
# -------------------------------------------------------------------
hyde_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/anthromes/zip"
tmp_extract = os.path.join(hyde_dir, "unzipped")
os.makedirs(tmp_extract, exist_ok=True)

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes"
os.makedirs(out_dir, exist_ok=True)

years = [1940, 1950, 1960, 1970]

# -------------------------------------------------------------------
# 2. Load Angola boundary
# -------------------------------------------------------------------
angola = gpd.read_file(angola_gpkg)
angola = angola.to_crs("EPSG:32733")
angola_geom = [angola.union_all()]  # for rasterio.mask

# -------------------------------------------------------------------
# 3. Process each HYDE Anthrome file
# -------------------------------------------------------------------
for year in years:
    zip_path = os.path.join(hyde_dir, f"{year}AD_anthromes.zip")
    if not os.path.exists(zip_path):
        print(f"⚠️ Missing ZIP for {year}: {zip_path}")
        continue

    print(f"📂 Extracting {year}...")

    # Extract zip
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(tmp_extract)

    # Find the ASC file inside the extracted folder
    asc_path = None
    for root, _, files in os.walk(tmp_extract):
        for f in files:
            if f.endswith(".asc"):
                asc_path = os.path.join(root, f)
                break
        if asc_path:
            break

    if not asc_path:
        print(f"⚠️ No .asc file found in {zip_path}")
        continue

    print(f"📄 Found ASC: {os.path.basename(asc_path)}")

    # -------------------------------------------------------------------
    # 3.1 Open ASC and assign CRS = EPSG:4326
    # -------------------------------------------------------------------
    with rasterio.open(asc_path, driver="AAIGrid") as src:
        src_data = src.read(1)
        src_data = np.where(src_data < 0, np.nan, src_data)  # mask negatives
        src_transform = src.transform
        src_crs = "EPSG:4326"

        # -------------------------------------------------------------------
        # 3.2 Define 1 km resolution target grid in UTM33S
        # -------------------------------------------------------------------
        target_crs = "EPSG:32733"
        target_res = 1000  # 1 km pixels

        transform, width, height = calculate_default_transform(
            src_crs, target_crs,
            src.width, src.height,
            *src.bounds,
            resolution=target_res
        )

        # -------------------------------------------------------------------
        # 3.3 Reproject to 1 km UTM33S grid
        # -------------------------------------------------------------------
        reprojected = np.empty((height, width), dtype=np.float32)
        reproject(
            source=src_data,
            destination=reprojected,
            src_transform=src_transform,
            src_crs=src_crs,
            dst_transform=transform,
            dst_crs=target_crs,
            resampling=Resampling.nearest,
        )

        # -------------------------------------------------------------------
        # 3.4 Write temporary file and clip to Angola
        # -------------------------------------------------------------------
        tmp_tif = os.path.join(out_dir, f"tmp_reproj_{year}.tif")
        meta = src.meta.copy()
        meta.update({
            "driver": "GTiff",
            "height": height,
            "width": width,
            "transform": transform,
            "crs": target_crs,
            "dtype": "float32",
            "nodata": -9999,
        })

        with rasterio.open(tmp_tif, "w", **meta) as tmp_dst:
            tmp_dst.write(reprojected, 1)

        with rasterio.open(tmp_tif) as tmp_src:
            out_image, out_transform = mask(
                tmp_src, angola_geom, crop=True, nodata=-9999
            )
            out_meta = tmp_src.meta.copy()
            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "nodata": -9999,
                "compress": "lzw",
            })

        out_tif = os.path.join(out_dir, f"anthromes_angola_{year}_1km.tif")
        with rasterio.open(out_tif, "w", **out_meta) as dest:
            dest.write(out_image)

        os.remove(tmp_tif)

        print(f"✅ Saved: {out_tif}")

print("🎉 All done — Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.")


📂 Extracting 1940...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1940_1km.tif
📂 Extracting 1950...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1950_1km.tif
📂 Extracting 1960...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1960_1km.tif
📂 Extracting 1970...
📄 Found ASC: anthromes1940AD.asc
✅ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/hyde_angola_anthromes/anthromes_angola_1970_1km.tif
🎉 All done — Anthrome rasters now 1 km, EPSG:32733, and clipped to Angola.


In [9]:
## look up table for later

import pandas as pd

anthrome_df = pd.DataFrame([
    (11, "Urban"),
    (12, "Dense settlements"),
    (21, "Village, Rice"),
    (22, "Village, Irrigated"),
    (23, "Village, Rainfed"),
    (24, "Village, Pastoral"),
    (31, "Croplands, residential irrigated"),
    (32, "Croplands, residential rainfed"),
    (33, "Croplands, populated"),
    (34, "Croplands, pastoral"),
    (41, "Rangeland, residential"),
    (42, "Rangeland, populated"),
    (43, "Rangeland, remote"),
    (51, "Semi-natural woodlands, residential"),
    (52, "Semi-natural woodlands, populated"),
    (53, "Semi-natural woodlands, remote"),
    (54, "Semi-natural treeless and barren lands"),
    (61, "Wild, remote - woodlands"),
    (62, "Wild, remote - treeless & barren"),
    (63, "Wild, remote - ice"),
    (70, "No definition")
], columns=["anthrome_code", "anthrome_class"])


## LULC 


Features of this script:

- Auto-unzips missing 1940, 1950, 1960 LULC files into  LULC_hyde_clipped folder.

- Checks the folder for .tif files and counts them per year.

- Cleans filenames to match angola_<year> convention safely.

- Skips already-correct files to avoid unnecessary renaming.

In [1]:
import os
import zipfile
import re

# -----------------------------
# Paths
# -----------------------------
zip_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/baseline/zip"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped"

os.makedirs(out_dir, exist_ok=True)

years = ["1940", "1950", "1960"]
expected_zips = [f"{y}AD_lu.zip" for y in years]

# -----------------------------
# 1. Verify and unzip missing files
# -----------------------------
for zip_name, year in zip(expected_zips, years):
    zip_path = os.path.join(zip_dir, zip_name)

    if not os.path.exists(zip_path):
        print(f"❌ Missing zip: {zip_path}")
        continue

    # Check if this year's files are already extracted
    year_tifs = [f for f in os.listdir(out_dir) if f"{year}AD" in f and f.endswith(".tif")]
    if year_tifs:
        print(f"✅ {year} files already extracted: {len(year_tifs)} .tifs found")
        continue

    # Extract zip
    print(f"📦 Unzipping {zip_name} ...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(out_dir)
    print(f"✅ Extracted {zip_name} into {out_dir}")

# -----------------------------
# 2. Verify all .tif files exist
# -----------------------------
all_tifs = [f for f in os.listdir(out_dir) if f.lower().endswith((".tif", ".tiff"))]
if not all_tifs:
    raise FileNotFoundError(f"❌ No .tif files found in {out_dir}")
print(f"📁 Total .tif files in folder: {len(all_tifs)}")

for year in years:
    year_files = [f for f in all_tifs if f"{year}AD" in f]
    print(f"{year}: {len(year_files)} files found")

# -----------------------------
# 3. Clean/rename files
# -----------------------------
for f in all_tifs:
    if f.startswith("._"):
        continue

    match = re.search(r'(\d{4})AD', f)
    if not match:
        continue
    true_year = match.group(1)

    after_angola = re.search(r'angola_(\d{4})', f)
    if after_angola:
        existing_year = after_angola.group(1)
        if existing_year == true_year:
            print(f"⏩ Skipping (already correct): {f}")
            continue
    else:
        existing_year = None

    if existing_year:
        new_name = re.sub(
            r'(angola_)\d{4}',
            lambda m: f"{m.group(1)}{true_year}",
            f
        )
    else:
        new_name = f.replace("_angola", f"_angola_{true_year}")

    old_path = os.path.join(out_dir, f)
    new_path = os.path.join(out_dir, new_name)

    if old_path != new_path:
        os.rename(old_path, new_path)
        print(f"✅ Renamed: {f} → {new_name}")

print("\n🎉 HYDE LULC verification and renaming complete.")


✅ 1940 files already extracted: 28 .tifs found
✅ 1950 files already extracted: 24 .tifs found
✅ 1960 files already extracted: 24 .tifs found
📁 Total .tif files in folder: 76
1940: 28 files found
1950: 24 files found
1960: 24 files found
⏩ Skipping (already correct): tmp_reproj_ir_rice1940AD_1960_angola_1940_1km.tif
⏩ Skipping (already correct): cropland1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): grazing1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): pasture1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): rangeland1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): conv_rangeland1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): rf_rice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): ir_rice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): rf_norice1940AD_angola_1940_1km_angola_1km.tif
⏩ Skipping (already correct): ir_norice1940AD_angola_1940_1km_angola_1k

In [2]:
import rasterio

path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/LULC_hyde_clipped/conv_rangeland1960AD_angola_1960_1km.tif"

with rasterio.open(path) as src:
    print("CRS:", src.crs)
    print("Width x Height:", src.width, "x", src.height)
    print("Bounds:", src.bounds)
    print("Number of bands:", src.count)
    print("Data type:", src.dtypes)          # e.g., 'uint8', 'float32'
    print("NoData value:", src.nodata)
    print("Pixel size:", src.res)            # e.g., (1000, 1000) in meters


CRS: EPSG:32733
Width x Height: 1351 x 1522
Bounds: BoundingBox(left=144354.55889907666, bottom=7994929.886041995, right=1495354.5588990767, top=9516929.886041995)
Number of bands: 1
Data type: ('float32',)
NoData value: -9999.0
Pixel size: (1000.0, 1000.0)


In [9]:
import os
import numpy as np
import rasterio
from rasterio.transform import from_origin
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
import geopandas as gpd

# ----------------------------
# Paths
# ----------------------------
asc_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/HYDEdata/baseline/1950_lu"
mask_shp = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/angolaadminboundaries/ago_admbnda_gadm_ine_ocha_20180904/ago_admbnda_adm0_gadm_ine_ocha_20180904.shp"
out_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu"
os.makedirs(out_dir, exist_ok=True)

# -------------------------------
# Load Angola shapefile
# -------------------------------
angola = gpd.read_file(mask_shp)
angola = angola.to_crs("EPSG:4326")  # make sure mask is in WGS84
angola_geom = [angola.unary_union]    # rasterio expects a geometry collection

# -------------------------------
# Process each ASC
# -------------------------------
for asc_file in os.listdir(asc_dir):
    if not asc_file.endswith(".asc"):
        continue

    asc_path = os.path.join(asc_dir, asc_file)
    base_name = os.path.splitext(asc_file)[0]
    print(f"\nProcessing {asc_file} ...")

    # -------------------------------
    # Read ASC header safely
    # -------------------------------
    with open(asc_path, 'r', encoding='latin1', errors='ignore') as f:
        header = {}
        for _ in range(6):
            line = f.readline()
            key, value = line.strip().split(None, 1)
            header[key.lower()] = float(value)

        ncols = int(header['ncols'])
        nrows = int(header['nrows'])
        xllcorner = header['xllcorner']
        yllcorner = header['yllcorner']
        cellsize = header['cellsize']
        nodata = header.get('nodata_value', -9999)

        # read raster data safely
        data = np.loadtxt(f, dtype=np.float32)
        data[data == nodata] = np.nan

    # -------------------------------
    # Define raster transform
    # -------------------------------
    transform = from_origin(xllcorner, yllcorner + nrows * cellsize, cellsize, cellsize)

    # -------------------------------
    # Save temporary raster with EPSG:4326
    # -------------------------------
    tmp_raster = os.path.join(out_dir, f"{base_name}_wgs84.tif")
    with rasterio.open(
        tmp_raster,
        'w',
        driver='GTiff',
        height=nrows,
        width=ncols,
        count=1,
        dtype=data.dtype,
        crs="EPSG:4326",
        transform=transform,
        nodata=np.nan
    ) as dst:
        dst.write(data, 1)

    # -------------------------------
    # Clip raster by Angola mask
    # -------------------------------
    with rasterio.open(tmp_raster) as src:
        out_image, out_transform = mask(
            src, angola_geom, crop=True, nodata=np.nan
        )
        out_meta = src.meta.copy()
        out_meta.update({
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
            "nodata": np.nan,
            "compress": "lzw"
        })

    # -------------------------------
    # Reproject to UTM 33S
    # -------------------------------
    utm_tif = os.path.join(out_dir, f"{base_name}_utm33s.tif")
    dst_crs = "EPSG:32733"

    with rasterio.open(utm_tif, 'w', **out_meta) as dst:
        transform, width, height = calculate_default_transform(
            out_meta['crs'], dst_crs, out_meta['width'], out_meta['height'], *src.bounds
        )
        reprojected = np.empty((height, width), dtype=np.float32)

        reproject(
            source=out_image[0],
            destination=reprojected,
            src_transform=out_meta['transform'],
            src_crs=out_meta['crs'],
            dst_transform=transform,
            dst_crs=dst_crs,
            resampling=Resampling.nearest
        )

        meta = out_meta.copy()
        meta.update({
            "height": height,
            "width": width,
            "transform": transform,
            "crs": dst_crs
        })

        dst.write(reprojected, 1)

    # -------------------------------
    # Remove temporary raster
    # -------------------------------
    os.remove(tmp_raster)
    print(f"✅ Saved: {utm_tif}")

print("\n🎉 All ASC files clipped and reprojected to UTM33S.")


  angola_geom = [angola.unary_union]



Processing cropland1950AD.asc ...


CRSError: Missing src_crs.

In [18]:
import os
import re
import numpy as np
import pandas as pd
import rasterio

# ----------------------------
# Paths
# ----------------------------
lu_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu"
out_csv = os.path.join(lu_dir, "hyde_landuse_percent_cover.csv")

# ----------------------------
# Helper: extract year and class
# ----------------------------
def parse_filename(filename):
    # Match pattern like 'grazing1950.tif' or 'cropland1960AD.tif'
    match = re.match(r"([a-zA-Z_]+)(\d{4})(?:AD)?\.tif$", filename)
    if not match:
        return None, None
    land_class, year = match.groups()
    land_class = land_class.strip("_").lower()
    year = int(year)
    return land_class, year

# ----------------------------
# Loop through raster files
# ----------------------------
records = []
for f in sorted(os.listdir(lu_dir)):
    if not f.endswith(".tif") or f.startswith("._"):
        continue

    land_class, year = parse_filename(f)
    if land_class is None or year is None:
        print(f"⚠️ Skipping unrecognized file name: {f}")
        continue

    path = os.path.join(lu_dir, f)
    print(f"Processing {f} ...")

    with rasterio.open(path) as src:
        data = src.read(1).astype(np.float32)
        nodata = src.nodata
        if nodata is not None:
            data[data == nodata] = np.nan

    # Skip completely empty rasters
    valid = np.isfinite(data)
    if not np.any(valid):
        print(f"⚠️ No valid data in {f}, skipping.")
        continue

    # ----------------------------
    # Auto-detect scale (0–1 vs 0–100)
    # ----------------------------
    dmin, dmax = np.nanmin(data), np.nanmax(data)
    mean_val = np.nanmean(data)

    if dmax <= 1.0:
        # fractional cover, convert to percent
        percent_cover = mean_val * 100.0
    elif dmax > 1.0 and dmax <= 100.0:
        # already in percent
        percent_cover = mean_val
    else:
        # unexpected values (e.g., 0–10000)
        percent_cover = mean_val / 100.0
        print(f"⚠️ Warning: {f} has unusually large values (max={dmax:.2f}), scaled down.")

    records.append({
        "year": year,
        "land_use_class": land_class,
        "percent_cover": round(percent_cover, 4)
    })

# ----------------------------
# Save results
# ----------------------------
if records:
    df = pd.DataFrame(records).sort_values(["year", "land_use_class"])
    df.to_csv(out_csv, index=False)
    print("\n✅ Percent cover summary saved to:")
    print(out_csv)
    print(df.head())
else:
    print("❌ No valid raster files processed.")


Processing conv_rangeland1950.tif ...
Processing conv_rangeland1960AD.tif ...
Processing cropland1950.tif ...
Processing cropland1960.tif ...
Processing grazing1950.tif ...
Processing grazing1960.tif ...
Processing ir_norice1950.tif ...
Processing ir_norice1960.tif ...
Processing ir_rice1950.tif ...
Processing ir_rice1960.tif ...
Processing pasture1950.tif ...
Processing pasture1960.tif ...
Processing rangeland1950.tif ...
Processing rangeland1960.tif ...
Processing rf_norice1950.tif ...
Processing rf_norice1960.tif ...
Processing rf_rice1950.tif ...
Processing rf_rice1960.tif ...
Processing tot_irri1950.tif ...
Processing tot_irri1960.tif ...
Processing tot_rainfed1950.tif ...
Processing tot_rainfed1960.tif ...
Processing tot_rice1950.tif ...
Processing tot_rice1960.tif ...

✅ Percent cover summary saved to:
/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu/hyde_landuse_percent_cover.csv
   year  land_use_class  percent_cover
0  1950  conv_rangeland       0.1488

In [1]:
import os
import re
import numpy as np
import rasterio
from rasterio import shutil as rio_shutil
import pandas as pd

# ----------------------------
# Paths
# ----------------------------
lu_dir = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu"
out_dir = os.path.join(lu_dir, "hyde_lu_percent")
os.makedirs(out_dir, exist_ok=True)

out_csv = os.path.join(out_dir, "hyde_landuse_percent_summary.csv")

# ----------------------------
# Helper: extract year and class
# ----------------------------
def parse_filename(filename):
    match = re.match(r"([a-zA-Z_]+)(\d{4})(?:AD)?\.tif$", filename)
    if not match:
        return None, None
    land_class, year = match.groups()
    land_class = land_class.strip("_").lower()
    year = int(year)
    return land_class, year

# ----------------------------
# Loop through raster files
# ----------------------------
records = []
for f in sorted(os.listdir(lu_dir)):
    if not f.endswith(".tif") or f.startswith("._"):
        continue

    land_class, year = parse_filename(f)
    if land_class is None or year is None:
        print(f"⚠️ Skipping unrecognized file name: {f}")
        continue

    path = os.path.join(lu_dir, f)
    print(f"Processing {f} ...")

    with rasterio.open(path) as src:
        profile = src.profile
        data = src.read(1).astype(np.float32)
        nodata = src.nodata
        if nodata is not None:
            data[data == nodata] = np.nan

    if not np.any(np.isfinite(data)):
        print(f"⚠️ No valid data in {f}, skipping.")
        continue

    # Detect scale and convert to percent
    dmax = np.nanmax(data)
    if dmax <= 1.0:
        percent_data = data * 100.0
    elif 1.0 < dmax <= 100.0:
        percent_data = data
    else:
        percent_data = data / 100.0
        print(f"⚠️ {f} scaled down (max={dmax:.2f})")

    # Write new raster
    out_path = os.path.join(out_dir, f"{land_class}_{year}_percent.tif")
    new_profile = profile.copy()
    new_profile.update(dtype=rasterio.float32, nodata=np.nan)
    with rasterio.open(out_path, "w", **new_profile) as dst:
        dst.write(percent_data.astype(np.float32), 1)

    # Add record for summary
    records.append({
        "year": year,
        "land_use_class": land_class,
        "mean_percent": float(np.nanmean(percent_data)),
        "min": float(np.nanmin(percent_data)),
        "max": float(np.nanmax(percent_data))
    })

# ----------------------------
# Save summary CSV
# ----------------------------
if records:
    df = pd.DataFrame(records).sort_values(["year", "land_use_class"])
    df.to_csv(out_csv, index=False)
    print("\n✅ Percent cover rasters saved to:")
    print(out_dir)
    print("✅ Summary table saved to:")
    print(out_csv)
    print(df.head())
else:
    print("❌ No valid rasters processed.")


Processing conv_rangeland1950.tif ...
Processing conv_rangeland1960AD.tif ...
Processing cropland1950.tif ...
Processing cropland1960.tif ...
Processing grazing1950.tif ...
Processing grazing1960.tif ...
Processing ir_norice1950.tif ...
Processing ir_norice1960.tif ...
Processing ir_rice1950.tif ...
Processing ir_rice1960.tif ...
Processing pasture1950.tif ...
Processing pasture1960.tif ...
Processing rangeland1950.tif ...
Processing rangeland1960.tif ...
Processing rf_norice1950.tif ...
Processing rf_norice1960.tif ...
Processing rf_rice1950.tif ...
Processing rf_rice1960.tif ...
Processing tot_irri1950.tif ...
Processing tot_irri1960.tif ...
Processing tot_rainfed1950.tif ...
Processing tot_rainfed1960.tif ...
Processing tot_rice1950.tif ...
Processing tot_rice1960.tif ...

✅ Percent cover rasters saved to:
/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu/hyde_lu_percent
✅ Summary table saved to:
/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed

In [17]:
import rasterio
import numpy as np

path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/hyde_lu/grazing1950.tif"
with rasterio.open(path) as src:
    data = src.read(1).astype(np.float32)
    data[data == src.nodata] = np.nan
    print("Mean:", np.nanmean(data))
    print("Max:", np.nanmax(data))


Mean: 31.874432
Max: 69.62984
