## Technical check

### Checking that my covariate data prep procedures abide by fao norms

In [None]:
# bioclim --check how resampling was done --as i didn't change anything about the data, i don't think this will apply
bioclimraster_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/bioclimatic32733/"


In [3]:
import rasterio
import numpy as np
import os
import glob
import pandas as pd

# --- Folder with your DEM / terrain rasters ---
terrain_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/DEM_characteristics"

# --- List of files and their type (continuous vs categorical) ---
terrain_layers = {
    "MRVBF.tif": "continuous",
    "aspect_cos.tif": "continuous",
    "aspect_sin.tif": "continuous",
    "aspect.tif": "continuous",
    "aspect_classes.tif": "categorical",
    "slope.tif": "continuous",
    "RLD.tif": "continuous",
    "relief.tif": "continuous",
    "roughness.tif": "continuous",
    "DEM_filledfiltered.tif": "continuous",
    "TWI_300.tif": "continuous",
    "flow_accumulation.tif": "continuous",
    "curve_plan.tif": "continuous",
    "curv_profile.tif": "continuous",
    "ridge_levels.tif": "categorical",
    "valley_depth2.tif": "continuous"
}

# --- Function to check gdal/qgis hisotry to confirm resampling method resampling method ---


KeyboardInterrupt: 

In [1]:
import rasterio
import numpy as np
import os
import glob

# --- Input / output paths ---
raster_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/DEM_characteristics"
output_folder = os.path.join(raster_folder, "cleaned")
os.makedirs(output_folder, exist_ok=True)

# --- Define bad values per raster ---
bad_values_dict = {
    "MRVBF": -99999,
    "aspect_cos": -3.40282e+38,
    "aspect_sin": -3.40282e+38,
    "aspect": -9999,
    "slope": -9999,
    "RLD": -3.40282e+38,
    "relief": -9999,
    "roughness": -9999,
    "DEM_filledfiltered": -99999,
    "twi_300m": -3.40282e+38,
    "flow_accumulation": -99999,
    "curv_plan": -99999,
    "curv_profile": -99999,
    "valley_depth2": -99999,
    "ridge_levels": -99999
}

# --- Process each raster ---
raster_files = glob.glob(os.path.join(raster_folder, "*.tif"))

for raster_path in raster_files:
    raster_name = os.path.splitext(os.path.basename(raster_path))[0]

    # Identify if we have a bad value mapping
    bad_value = bad_values_dict.get(raster_name, None)

    with rasterio.open(raster_path) as src:
        meta = src.meta.copy()
        data = src.read(1)

        # Replace bad values with NaN
        if bad_value is not None:
            mask = (data == bad_value) | np.isnan(data)
        else:
            mask = np.isnan(data)  # fallback

        data = np.where(mask, np.nan, data)

        # Determine if categorical (integer) or continuous (float)
        if raster_name in ["aspect_classes", "ridge_levels"]:
            dtype = rasterio.int32
            nodata = -9999  # use a NoData integer placeholder for categorical
            data = np.where(np.isnan(data), nodata, data).astype(dtype)
        else:
            dtype = rasterio.float32
            nodata = np.nan
            data = data.astype(dtype)

        meta.update(dtype=dtype, nodata=nodata)

        # Save cleaned raster
        out_path = os.path.join(output_folder, os.path.basename(raster_path))
        with rasterio.open(out_path, 'w', **meta) as dst:
            dst.write(data, 1)

    print(f"✅ Cleaned {raster_name}")


  data = np.where(np.isnan(data), nodata, data).astype(dtype)


✅ Cleaned aspect_classes
✅ Cleaned aspect
✅ Cleaned MRVBF
✅ Cleaned twi_300m
✅ Cleaned slope
✅ Cleaned dem_filledfiltered
✅ Cleaned flow_accumulation
✅ Cleaned valleydepth2
✅ Cleaned ridge_levels
✅ Cleaned aspect_sin
✅ Cleaned aspect_cos
✅ Cleaned RLD
✅ Cleaned relief
✅ Cleaned roughness


## resolution check 
### checking resolution abides by fao norms

In [1]:
import rasterio
import glob
import os

# --- Input folder (your DEM rasters) ---
terrain_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/DEM_characteristics"

# --- Get list of DEM files ---
terrain_files = sorted(glob.glob(os.path.join(terrain_folder, "*.tif")))

print("📏 DEM raster resolutions:")
for raster_path in terrain_files:
    with rasterio.open(raster_path) as src:
        res_x, res_y = src.res  # pixel size in map units (usually meters)
        print(f"{os.path.basename(raster_path)} → {res_x} x {res_y}")


📏 DEM raster resolutions:
MRVBF.tif → 300.0 x 300.0
RLD.tif → 300.0 x 300.0
aspect.tif → 30.591177525448536 x 30.59117752441045
aspect_classes.tif → 0.0002777776034428655 x 0.00027777786828493456
aspect_cos.tif → 30.591177525448536 x 30.59117752441045
aspect_sin.tif → 30.591177525448536 x 30.59117752441045
dem_filledfiltered.tif → 300.0 x 300.0
flow_accumulation.tif → 300.0 x 300.0
relief.tif → 300.0 x 300.0
ridge_levels.tif → 300.0 x 300.0
roughness.tif → 300.0 x 300.0
slope.tif → 30.591177525448536 x 30.59117752441045
twi_300m.tif → 300.0 x 300.0
valleydepth2.tif → 300.0 x 300.0


In [2]:
# og dem resolutionimport rasterio
import os

# --- Path to your original DEM ---
og_dem = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_intermediary/dem_utm.tif"

print("📏 Original DEM resolution:")
with rasterio.open(og_dem) as src:
    res_x, res_y = src.res  # pixel size in map units (usually meters)
    print(f"{os.path.basename(og_dem)} → {res_x} x {res_y}")


📏 Original DEM resolution:
dem_utm.tif → 30.591177524925087 x 30.591177524925087


In [None]:
## setting resolution to 300m (the resolution that matches a majority of my covariates) 

import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import glob
import os

# --- Input and output folders ---
input_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/DEM_characteristics/"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/DEM_characteristics_300m/"

os.makedirs(output_folder, exist_ok=True)

# --- Target resolution ---
target_res = (300, 300)  # (xres, yres) in meters

# --- Loop through all .tif files ---
for raster_path in glob.glob(os.path.join(input_folder, "*.tif")):
    with rasterio.open(raster_path) as src:
        # Define target transform and metadata
        transform, width, height = calculate_default_transform(
            src.crs,
            src.crs,   # same CRS, just resampling
            src.width,
            src.height,
            *src.bounds,
            resolution=target_res
        )

        kwargs = src.meta.copy()
        kwargs.update({
            "crs": src.crs,
            "transform": transform,
            "width": width,
            "height": height
        })

        # Output path
        out_path = os.path.join(output_folder, os.path.basename(raster_path))

        # Perform reprojection / resampling
        with rasterio.open(out_path, "w", **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=src.crs,
                    resampling=Resampling.average  # use bilinear for continuous, nearest for categorical
                )

        print(f"✅ Resampled {os.path.basename(raster_path)} → 300 m")

print("🎯 All rasters saved in:", output_folder)

In [None]:
## all covariates resolution check

import rasterio
import geopandas as gpd
import glob
import os

# --- Paths ---
bioclimraster_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/bioclimatic32733/"
ecosyst_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/labelled_ecosystems32733.tif"
soil_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_data2_32733.gpkg"
lithology_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/lithology2.gpkg"
landsurface_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/landsurfaceforms/landsurfaceforms.tif"

print("📏 Checking raster resolutions...")

# --- Bioclimatic rasters ---
bioclim_files = sorted(glob.glob(os.path.join(bioclimraster_folder, "*.tif")))
for raster_path in bioclim_files:
    with rasterio.open(raster_path) as src:
        res_x, res_y = src.res
        print(f"Bioclim {os.path.basename(raster_path)} → {res_x:.2f} x {res_y:.2f}")

# --- Ecosystem raster ---
with rasterio.open(ecosyst_path) as src:
    res_x, res_y = src.res
    print(f"Ecosystem.tif → {res_x:.2f} x {res_y:.2f}")

# --- Landsurface raster ---
with rasterio.open(landsurface_path) as src:
    res_x, res_y = src.res
    print(f"Landsurfaceforms.tif → {res_x:.2f} x {res_y:.2f}")

print("\n🌍 Checking vector datasets (no true resolution)...")

# --- Soil vector ---
soil = gpd.read_file(soil_path)
print(f"Soil vector: {len(soil)} features, bounds = {soil.total_bounds}")

# --- Lithology vector ---
lith = gpd.read_file(lithology_path)
print(f"Lithology vector: {len(lith)} features, bounds = {lith.total_bounds}")
