<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/3_predictors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
base_dir = "/gdrive/Shareddrives/masfi"

# Mount Google Drive and set base directory
from google.colab import drive
import os
import sys
drive.mount('/gdrive', force_remount=True)
_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Installs
%%capture
!pip install astropy
!pip install earthengine-api
!pip install geopandas

In [None]:
!# Reload imports, replacing those in the cache
%load_ext autoreload
%autoreload 2
# Imports
from astropy.convolution import convolve, Gaussian2DKernel
import csv
import ee
import geopandas as gpd
import glob
from google.colab import runtime, userdata
import ipywidgets as widgets
from math import sqrt, cos, radians
import matplotlib.pyplot as plt
from numba import jit
import numpy as np
from os import makedirs, remove
from os.path import exists, join
from osgeo import gdal, ogr, gdalconst
import pandas as pd
from pathlib import Path
import requests
from scipy import ndimage
from scipy.ndimage import maximum_filter, minimum_filter, uniform_filter, distance_transform_edt
from scipy.ndimage import label, sum as ndi_sum
from shutil import copyfile
from time import sleep
import zipfile

In [None]:
# 1_areas directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
masks_dir = join(areas_dir, "masks")
template_dir = join(areas_dir, "template.tif")
# 3_predictors directories
predictors_dir = join(base_dir, "3_predictors")
ee_dir = join(predictors_dir, "earth_engine")
user_upload_dir = join(predictors_dir, "user_upload")
glad_lcluc_dir = join(predictors_dir, 'glad_lcluc')
resampled_dir = join(predictors_dir, "resampled")
continuous_final_dir = join(predictors_dir, "continuous_final")
binary_dir = join(predictors_dir, 'binary')
edge_effects_dir = join(predictors_dir, 'binary_edge_effects')
coast_dir = join(predictors_dir, 'coast')
topography_temp_dir = join(predictors_dir, "topo_temp")
topography_final_dir = join(predictors_dir, "topo_final")
topography_corrected_temp_dir = join(predictors_dir, "topo_corrected_temp")
topography_corrected_final_dir = join(predictors_dir, "topo_corrected_final")
predictor_final_dir = join(predictors_dir, 'final')
# 6_scenarios directories
scenarios_dir = join(base_dir, "6_scenarios")
# Create directories
makedirs(ee_dir, exist_ok=True)
makedirs(user_upload_dir, exist_ok=True)
makedirs(glad_lcluc_dir, exist_ok=True)
makedirs(resampled_dir, exist_ok=True)
makedirs(continuous_final_dir, exist_ok=True)
makedirs(binary_dir, exist_ok=True)
makedirs(edge_effects_dir, exist_ok=True)
makedirs(coast_dir, exist_ok=True)
makedirs(topography_temp_dir, exist_ok=True)
makedirs(topography_final_dir, exist_ok=True)
makedirs(predictor_final_dir, exist_ok=True)
makedirs(scenarios_dir, exist_ok=True)

In [None]:
# export_array_as_tif function
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_dir, nodatavalue=nodatavalue, compress=compress):
  template = gdal.Open(template)
  template_band = template.GetRasterBand(1)
  template_dimensions, template_projection = template.GetGeoTransform(), template.GetProjection()
  if compress: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32,
                                                options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
  if compress == False: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32)
  driver.GetRasterBand(1).WriteArray(input_array)
  driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
  driver.SetGeoTransform(template_dimensions)
  driver.SetProjection(template_projection)

# Burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = gdal.Open(raster_path, gdal.GA_Update)
    vector = ogr.Open(polygon_path)
    layer = vector.GetLayer()
    if all_touched: options = ["ALL_TOUCHED=TRUE"]
    else: options = []
    if not fixed: options.append(f"ATTRIBUTE={column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()}")
    gdal.RasterizeLayer(raster, [1], layer,
                        burn_values=[fixed_value] if fixed else None,
                        options=options)
    raster.FlushCache()
    raster = vector = None

# Download Earth Engine rasters

In [None]:
# Enable Google Earth Engine API at Google Cloud https://console.cloud.google.com/apis/dashboard
# See here for walkthrough: https://github.com/googlecolab/colabtools/issues/4228#issuecomment-1859068706
# Set project ID under 'secrets' tab on the left with the name 'google_cloud_project'
ee_project = userdata.get('google_cloud_project')

# Authenticate Earth Engine
ee.Authenticate()
ee.Initialize(project=ee_project)

In [None]:
# Edit this section to change which Earth Engine datasets are downloaded.

# Warning: Earth Engine uses 'nearest neighbour' to resample rasters to the desired extent and resolution before exporting.
# This creates artifacts if the data is continuous, such as DEMs (elevation) or other topographic metrics.
# These should be downloaded from the original source, uploaded to '/user_upload' and resampled in the next section, checking the option for 'bilinear'.

# Check datasets in https://code.earthengine.google.com/ with:
# var assetList = ee.data.listAssets("projects/JRC/TMF/v1_2022/");
# print(assetList);

ee_datasets = [

    {
        "ee_dataset_name": "tmf",
        "ee_dataset_type": "ImageCollection",
        "ee_paths": [
            "projects/JRC/TMF/v1_2023/AnnualChanges",
            "projects/JRC/TMF/v1_2023/AnnualDisruptionObs2023",
            "projects/JRC/TMF/v1_2023/TransitionMap_MainClasses",
            "projects/JRC/TMF/v1_2023/TransitionMap_Subtypes",
            "projects/JRC/TMF/v1_2023/Ndisturb_C2_1982_2022",
        ],
    }
    # {
    #     "ee_dataset_name": "glad",
    #     "ee_dataset_type": "Image",
    #     "ee_paths": [
    #                 # 'projects/glad/GLCLU2020/Forest_gain',
    #                 'projects/glad/GLCLU2020/Forest_height_2000',
    #                 'projects/glad/GLCLU2020/Forest_height_2005',
    #                 'projects/glad/GLCLU2020/Forest_height_2010',
    #                 'projects/glad/GLCLU2020/Forest_height_2015',
    #                 'projects/glad/GLCLU2020/Forest_height_2020',
    #                 'projects/glad/GLCLU2020/Forest_height_disturbance',
    #                 'projects/glad/GLCLU2020/Forest_height_netgain',
    #                 'projects/glad/GLCLU2020/Forest_height_netloss',
    #                 # 'projects/glad/GLCLU2020/Forest_loss',
    #                 # 'projects/glad/GLCLU2020/Forest_type',
    #                 'projects/glad/GLCLU2020/LCLUC',
    #                 'projects/glad/GLCLU2020/LCLUC_2000',
    #                 'projects/glad/GLCLU2020/LCLUC_2020',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_2000',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_2005',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_2010',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_2015',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_2020',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_gain',
    #                 'projects/glad/GLCLU2020/Vegetation_cover_loss',
    #                 'projects/glad/GLCLU2020/Water_2000',
    #                 'projects/glad/GLCLU2020/Water_2005',
    #                 'projects/glad/GLCLU2020/Water_2010',
    #                 'projects/glad/GLCLU2020/Water_2015',
    #                 'projects/glad/GLCLU2020/Water_2020',
    #                 # 'projects/glad/GLCLU2020/Water_dynamics',
    #                 # 'projects/glad/GLCLU2020/Water_dynamics_classes',
    #     ]
    # }
]

In [None]:
# Verify Earth Engine rasters that will be downloaded
ee_raster_list = []
for ee_dataset in ee_datasets:
    ee_dataset_name = ee_dataset['ee_dataset_name']
    ee_dataset_type = ee_dataset['ee_dataset_type']
    ee_paths = ee_dataset['ee_paths']
    for ee_path in ee_paths:
        if ee_dataset_type == 'ImageCollection':
            ee_image = ee.ImageCollection(ee_path)
            ee_bands = [b['id'] for b in ee_image.getInfo()['features'][0]['bands']]
        else:
            ee_image = ee.Image(ee_path)
            ee_bands = ee_image.bandNames().getInfo()
        for ee_band in ee_bands:
            ee_tif_filename = f"{ee_dataset_name}_{ee_path.split('/')[-1]}_{ee_band}.tif"
            ee_raster_list.append(ee_tif_filename)

ee_raster_list = list(reversed(ee_raster_list))
ee_raster_list

In [None]:
# Check whether using mydrive as opposed to a shared gdrive
mydrive_mounted = False

# Earth Engine download progress
ee_progress_index = 0
ee_progress_label = widgets.Label(f"Earth Engine download progress: {ee_progress_index}/{len(ee_raster_list)}")
display(ee_progress_label)

# Load template and set Earth Engine geometry
template_polygon_dir = join(polygons_dir, 'template.gpkg')
template_area = gpd.read_file(template_polygon_dir)["geometry"].iloc[0]
template_coords = list(gpd.read_file(template_polygon_dir)["geometry"].iloc[0].exterior.coords)
ee_geometry = ee.Geometry.Polygon(template_coords)

# Download Earth Engine datasets
for ee_dataset in ee_datasets:
    ee_dataset_name = ee_dataset['ee_dataset_name']
    ee_dataset_type = ee_dataset['ee_dataset_type']
    ee_paths = ee_dataset['ee_paths']
    # Loop through Earth Engine paths
    for ee_path in ee_paths:
        # identify bands
        if ee_dataset_type == 'ImageCollection':
            ee_image = ee.ImageCollection(ee_path)
            ee_bands = [b['id'] for b in ee_image.getInfo()['features'][0]['bands']]
        elif ee_dataset_type == 'Image':
            ee_image = ee.Image(ee_path)
            ee_bands = ee_image.bandNames().getInfo()
        # Loop through bands
        for ee_band in reversed(ee_bands):
            # Set filename and directory of downloaded raster and check if exists
            ee_tif_filename = f"{ee_dataset_name}_{ee_path.split('/')[-1]}_{ee_band}.tif"
            ee_tif_dir = join(ee_dir, ee_tif_filename)
            ee_temp_dir = join("/gdrive/MyDrive", ee_tif_filename)
            if mydrive_mounted: ee_temp_dir = join("/content/drive/MyDrive/", ee_tif_filename)
            # Check if temporary raster exists and needs copying
            if exists(ee_temp_dir):
              copyfile(ee_temp_dir, ee_tif_dir)
              remove(ee_temp_dir)
            # Check if copied raster exists, and if not download from Earth Engine.
            if not exists(ee_tif_dir):
              if ee_dataset_type == 'ImageCollection':
                image_selected = ee_image.qualityMosaic(ee_band).select([ee_band])
                resolution = ee_image.first().projection().nominalScale().getInfo()
              if ee_dataset_type == 'Image':
                image_selected = ee_image.select([ee_band])
                resolution = ee_image.select(0).projection().nominalScale().getInfo()
              ee_task = ee.batch.Export.image.toDrive(image=image_selected.toFloat(),
                                                    description=ee_tif_filename[:-4],
                                                    scale=resolution,
                                                    region=ee_geometry,
                                                    maxPixels=10000000000,
                                                    fileNamePrefix=ee_tif_filename[:-4],
                                                    crs='EPSG:4326',
                                                    fileFormat='GeoTIFF')
              ee_task.start()
              # Check whether the raster has downloaded yet
              while not exists(ee_temp_dir):
                  ee_task_status = ee_task.status()
                  # If the task is completed, continue
                  if ee_task_status["state"] == 'COMPLETED': break
                  # If it has failed or been cancelled, show an error
                  elif ee_task_status['state'] == 'FAILED' or ee_task_status['state'] == 'CANCELLED':
                      print(f"{ee_tif_filename}:{ee_task_status['error_message']}")
                      try: remove(ee_temp_dir)
                      except: pass
                      break
                  sleep(1)
              # Copy the raster to intended directory and remove the temporary raster
              while not exists(ee_temp_dir):
                sleep(1)
              copyfile(ee_temp_dir, ee_tif_dir)
              remove(ee_temp_dir)
            # Update Earth Engine download progress
            ee_progress_index += 1
            ee_progress_label.value = f"Earth Engine download progress: {ee_progress_index}/{len(ee_raster_list)}"

# Check Earth Engine tasks here: https://code.earthengine.google.com/tasks

# GLAD LCLUC

In [None]:
# LCLUC contains several land cover and land use types, each with continuous metrics.
# This splits them into categories for better modelling, based on the legend:
# https://glad.umd.edu/sites/default/files/legend_0.xlsx
# Should do before resampling.

lcluc_dict = {
    'terra_vegetation_cover_percent': (0, 24),
    'terra_stable_tree_m': (25, 48),
    'wetland_vegetation_cover_percent': (100, 124),
    'wetland_stable_tree_m': (125, 148),
    'open_surface_water_percent_of_year': (200, 207),
    'snow_ice': (241, 241),
    'cropland': (244, 244),
    'built_up': (250, 250),
    'ocean': (254, 254),
}

for lcluc_raster in os.listdir(ee_dir):
  if 'LCLUC' in lcluc_raster:
    lcluc_path = join(ee_dir, lcluc_raster)
    luluc_array = gdal.Open(lcluc_path).ReadAsArray()
    for key, (lower, upper) in lcluc_dict.items():
        split_luluc_filename = f"{lcluc_raster[:-4]}_{key}.tif"
        split_luluc_filename_binary = f"{lcluc_raster[:-4]}_{key}_binary.tif"
        split_luluc_dir = join(glad_lcluc_dir, split_luluc_filename)
        split_luluc_dir_binary = join(glad_lcluc_dir, split_luluc_filename_binary)
        if not exists(split_luluc_dir) and not exists(split_luluc_dir_binary):
          split_luluc_mask = np.logical_and(luluc_array >= lower, luluc_array <= upper)
          split_luluc_array = np.where(split_luluc_mask, luluc_array, 0) # outside the range set to 0
          non_zero_percentage = np.count_nonzero(split_luluc_array) / split_luluc_array.size * 100
          if non_zero_percentage >= 0.1:
            # Check if there's only one unique non-zero value, and convert to a 1-0 binary raster if true
            unique_non_zero_values = np.unique(split_luluc_array[split_luluc_array > 0])
            if len(unique_non_zero_values) == 1:
                split_luluc_array = np.where(split_luluc_array > 0, 1, 0)
                split_luluc_dir = split_luluc_dir_binary
            export_array_as_tif(split_luluc_array, split_luluc_dir, template=lcluc_path)

# Resample

In [None]:
# Create dictionary of all tifs in Earth Engine and user upload directory
resample_dict = {}
for resample_raster in os.listdir(ee_dir):
    resample_dict.update({f'{resample_raster}':"'categorical'"})
for resample_raster in os.listdir(user_upload_dir):
    resample_dict.update({f'{resample_raster}':"'categorical'"})
for resample_raster in os.listdir(glad_lcluc_dir):
    resample_dict.update({f'{resample_raster}':"'continuous'"})
resample_dict = {key: value for key, value in sorted(resample_dict.items())}

# Select rasters for resampling and verify data type (categorical or continuous)
print("selected_original_rasters = {")
for key, value in resample_dict.items():
    print(f'"{key}": {value},')
print("}")

In [None]:
selected_original_rasters = {
"tmf_AnnualChanges_Dec1990.tif": 'categorical',
"tmf_AnnualChanges_Dec1991.tif": 'categorical',
"tmf_AnnualChanges_Dec1992.tif": 'categorical',
"tmf_AnnualChanges_Dec1993.tif": 'categorical',
"tmf_AnnualChanges_Dec1994.tif": 'categorical',
"tmf_AnnualChanges_Dec1995.tif": 'categorical',
"tmf_AnnualChanges_Dec1996.tif": 'categorical',
"tmf_AnnualChanges_Dec1997.tif": 'categorical',
"tmf_AnnualChanges_Dec1998.tif": 'categorical',
"tmf_AnnualChanges_Dec1999.tif": 'categorical',
"tmf_AnnualChanges_Dec2000.tif": 'categorical',
"tmf_AnnualChanges_Dec2001.tif": 'categorical',
"tmf_AnnualChanges_Dec2002.tif": 'categorical',
"tmf_AnnualChanges_Dec2003.tif": 'categorical',
"tmf_AnnualChanges_Dec2004.tif": 'categorical',
"tmf_AnnualChanges_Dec2005.tif": 'categorical',
"tmf_AnnualChanges_Dec2006.tif": 'categorical',
"tmf_AnnualChanges_Dec2007.tif": 'categorical',
"tmf_AnnualChanges_Dec2008.tif": 'categorical',
"tmf_AnnualChanges_Dec2009.tif": 'categorical',
"tmf_AnnualChanges_Dec2010.tif": 'categorical',
"tmf_AnnualChanges_Dec2011.tif": 'categorical',
"tmf_AnnualChanges_Dec2012.tif": 'categorical',
"tmf_AnnualChanges_Dec2013.tif": 'categorical',
"tmf_AnnualChanges_Dec2014.tif": 'categorical',
"tmf_AnnualChanges_Dec2015.tif": 'categorical',
"tmf_AnnualChanges_Dec2016.tif": 'categorical',
"tmf_AnnualChanges_Dec2017.tif": 'categorical',
"tmf_AnnualChanges_Dec2018.tif": 'categorical',
"tmf_AnnualChanges_Dec2019.tif": 'categorical',
"tmf_AnnualChanges_Dec2020.tif": 'categorical',
"tmf_AnnualChanges_Dec2021.tif": 'categorical',
"tmf_AnnualChanges_Dec2022.tif": 'categorical',
"tmf_AnnualChanges_Dec2023.tif": 'categorical',
"tmf_AnnualDisruptionObs2023_y2023.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1982.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1983.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1984.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1985.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1986.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1987.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1988.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1989.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1990.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1991.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1992.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1993.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1994.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1995.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1996.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1997.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1998.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y1999.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2000.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2001.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2002.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2003.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2004.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2005.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2006.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2007.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2008.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2009.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2010.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2011.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2012.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2013.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2014.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2015.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2016.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2017.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2018.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2019.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2020.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2021.tif": 'categorical',
"tmf_Ndisturb_C2_1982_2022_y2022.tif": 'categorical',
"tmf_TransitionMap_MainClasses_TransitionMap_MainClasses.tif": 'categorical',
"tmf_TransitionMap_Subtypes_TransitionMap_Subtypes.tif": 'categorical',
}

In [None]:
# Set resample algorithms for different raster types
# See https://gdal.org/programs/gdalwarp.html
categorical_alg = 'near'
continuous_alg = 'bilinear'

template = gdal.Open(template_dir)
template_dimensions = template.GetGeoTransform()
xres, yres = template_dimensions[1], -template_dimensions[5]
xmin = template_dimensions[0]
ymin = template_dimensions[3] - template.RasterYSize * yres
xmax = xmin + template.RasterXSize * xres
ymax = template_dimensions[3]

# Resample progress
resample_progress_index = 0
resample_progress_label = widgets.Label(f"Resample progress: {resample_progress_index}/{len(selected_original_rasters.items())}")
display(resample_progress_label)

# Iterate over selected rasters
for original_raster_name, data_type in selected_original_rasters.items():
  resampled_raster_dir = join(resampled_dir, original_raster_name)
  if not exists(resampled_raster_dir):
    original_raster_dir = join(ee_dir, original_raster_name)
    if not exists(original_raster_dir): original_raster_dir = join(user_upload_dir, original_raster_name)
    if not exists(original_raster_dir): original_raster_dir = join(glad_lcluc_dir, original_raster_name)
    # Set resample type
    if data_type == 'categorical': resample_alg = categorical_alg
    if data_type == 'continuous': resample_alg = continuous_alg
    src = gdal.Warp(
        resampled_raster_dir,
        original_raster_dir,
        xRes=xres, yRes=yres,
        outputBounds=(xmin, ymin, xmax, ymax),
        resampleAlg=resample_alg,
        outputType=gdalconst.GDT_Float32)
    # Compress and close
    driver = gdal.GetDriverByName("GTiff")
    src = driver.CreateCopy(resampled_raster_dir, src, 0, options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
    src = None
  # Update resample progress
  resample_progress_index += 1
  resample_progress_label.value = f"Resample progress: {resample_progress_index}/{len(selected_original_rasters.items())}"

In [None]:
# Determine continous predictor precision

override_max_unique_values = False
max_unique_values = 5000 # Should be >=10

if override_max_unique_values == False:
  dem_base_path = join(areas_dir, "base_dem.tif")
  dem_base_array = gdal.Open(dem_base_path).ReadAsArray()
  max_unique_values = int(np.ptp(dem_base_array)) # Precision based on elevation variance
resampled_precision_dict = {}

for resampled_predictor, resample_type in selected_original_rasters.items():
  if resample_type == 'continuous':
    resampled_predictor_path = join(resampled_dir, resampled_predictor)
    print(f"Reading {resampled_predictor}...")
    # Read raster as array
    resampled_predictor_array = gdal.Open(resampled_predictor_path).ReadAsArray()
    # Convert 'nodata' values to nan
    resampled_predictor_array[resampled_predictor_array == nodatavalue] = np.nan
    resampled_predictor_array_masked = np.ma.array(resampled_predictor_array, mask=np.isnan(resampled_predictor_array))
    # Count unique values in raster
    unique_values = len(np.unique(resampled_predictor_array_masked))
    print(f"There are {unique_values} unique values in {resampled_predictor}")
    # Generate histogram from 100,000 random points
    random_selection = np.random.choice(resampled_predictor_array_masked.ravel(), size = 100_000, replace = False)
    _ = plt.hist(random_selection, bins='auto')  # arguments are passed to np.histogram
    plt.title(f"{resampled_predictor}")
    plt.show()
    # Remove 0 values for log10
    resampled_predictor_array_masked[resampled_predictor_array_masked == 0] = np.nan
    resampled_predictor_array_masked = np.ma.array(resampled_predictor_array, mask=np.isnan(resampled_predictor_array))
    # Create log10 array for determining positions for rounding
    array_log10 = np.log10(abs(resampled_predictor_array_masked))
    place_value_decimal = int(abs(np.min(array_log10)))
    place_value_integer = int(0 - np.max(array_log10))
    # Iterate down precision levels to determine optimal number of unique values
    min_starting_precision = len(str(max_unique_values))
    for precision in reversed(range(place_value_integer, max(min_starting_precision, place_value_decimal +1))):
      rounded_array = np.round(resampled_predictor_array, decimals=precision)
      round_unique_values = len(np.unique(rounded_array))
      optimal_precision = None
      if round_unique_values <= max_unique_values:
        optimal_precision = precision
        print(f"The optimal precison for {resampled_predictor} is {optimal_precision}, with {round_unique_values} unique values.")
        resampled_precision_dict.update({f'{resampled_predictor}':f'{optimal_precision}'})
        break
    if optimal_precision == None: print("There's a problem with setting precision.")
    print("___________________\n")

print("Dictionary for optimal rounding values:")
resampled_precision_dict

precision_dict_csv_path = join(resampled_dir, 'rounding_dictionary.csv')
# Save rounding dictionary to CSV
with open(precision_dict_csv_path, 'w', newline='') as precision_dict_csv:
    writer = csv.writer(precision_dict_csv)
    writer.writerow(resampled_precision_dict.keys())
    writer.writerow(resampled_precision_dict.values())

In [None]:
# Open rounding dictionary and verify
with open(precision_dict_csv_path, 'r') as file:
    keys, values = list(csv.reader(file))
    topo_precision_dict = dict(zip(keys, values))

# Verify precision and correct if necessary
print("topo_precision_dict = {")
for key, value in topo_precision_dict.items():
    print(f'"{key}": {value},')
print("}")

In [None]:
topo_precision_dict = {
"agbd_2022.tif": 0,
"agbd_diff_disturbance_1990_2022.tif": 0,
"agbd_diff_disturbance_before_1990.tif": 0,
}

In [None]:
# Set smoothing kernel
kernel = Gaussian2DKernel(x_stddev=1, y_stddev=1)

# Continuous progress
continuous_progress_index = 0
continuous_progress_label = widgets.Label(f"Continuous progress: {continuous_progress_index}/{len(topo_precision_dict.items())}")
display(continuous_progress_label)

# Iterate over selected continuous rasters
for continuous, precision in topo_precision_dict.items():
  cont_raster_resampled_path = join(resampled_dir, continuous)
  cont_raster_resampled_array = gdal.Open(cont_raster_resampled_path).ReadAsArray()
  # Convert nodata values to 0
  cont_raster_resampled_array[cont_raster_resampled_array == nodatavalue] = 0
  # Set path and check if exists
  cont_raster_unsmoothed_filename = f"{continuous[:-4]}_unsmooth.tif"
  cont_raster_unsmoothed_path = join(continuous_final_dir, cont_raster_unsmoothed_filename)
  if not exists(cont_raster_unsmoothed_path):
    # Round and export unsmoothed continuous raster
    cont_raster_unsmoothed_rounded = np.round(cont_raster_resampled_array, decimals=int(precision))
    export_array_as_tif(cont_raster_unsmoothed_rounded, cont_raster_unsmoothed_path)
  # Smooth using 2D spatial convolution
  cont_raster_smoothed_filename = f"{continuous[:-4]}_smooth.tif"
  cont_raster_smoothed_path = join(continuous_final_dir, cont_raster_smoothed_filename)
  if not exists(cont_raster_smoothed_path):
    cont_raster_smoothed = convolve(cont_raster_resampled_array, kernel, boundary='extend')
    # Round and export smoothed continuous raster
    cont_raster_smoothed_rounded = np.round(cont_raster_smoothed, decimals=int(precision))
    export_array_as_tif(cont_raster_smoothed_rounded, cont_raster_smoothed_path)
  # Update continuous progress
  continuous_progress_index += 1
  continuous_progress_label.value = f"Continuous progress: {continuous_progress_index}/{len(topo_precision_dict.items())}"

# TMF binary predictors

In [None]:
# Check TMF data users guide for classification. https://forobs.jrc.ec.europa.eu/static/tmf/TMF_DataUsersGuide.pdf

cell_size_x = gdal.Open(join(areas_dir, 'cell_size_x.tif')).ReadAsArray()
cell_size_y = gdal.Open(join(areas_dir, 'cell_size_y.tif')).ReadAsArray()
cell_size_ha = np.mean(cell_size_x) * np.mean(cell_size_y) / 10_000
sieve_size = int(np.ceil(0.5/cell_size_ha)) # Removes all forest patches smaller than 0.5 ha
print(f"Forest binary sieve size (>0.5 ha) is {sieve_size} pixels.")

# Generate list of valid TMF rasters to convert to binary
binary_list = []
for resampled_raster in os.listdir(resampled_dir):
  # Verify these are in the filenames
  if 'DisruptionObs' in resampled_raster or 'AnnualChanges' in resampled_raster or 'Ndisturb' in resampled_raster:
    # Verify this is the position of the year in the filename
    year = resampled_raster[-8:-4]  # Data prior to 1990 is poor
    if int(year) >= 1990: binary_list.append(resampled_raster)

# Binary progress
binary_progress_index = 0
binary_progress_label = widgets.Label(f"Binary progress: {binary_progress_index}/{len(binary_list)}")
display(binary_progress_label)

for resampled_raster in binary_list:
  year = resampled_raster[-8:-4]
  # Forest binary
  if 'AnnualChanges' in resampled_raster:
    forest_binary_path = join(binary_dir, f"forest_binary_{year}.tif")
    if not exists(forest_binary_path):
      ac_raster_path = join(resampled_dir, resampled_raster)
      ac_array = gdal.Open(ac_raster_path).ReadAsArray()
      # Set classes 1 & 2 as 1, all else as 0
      forest_binary_array = np.where((ac_array == 1) | (ac_array == 2), 1, 0)

      # Sieve to 0.5 ha, using 8-connectedness (3, 3)
      fb_array_labelled, fb_array_features = label(forest_binary_array, structure=np.ones((3, 3)))
      # Determine the size of each patch
      fb_array_sizes = ndi_sum(forest_binary_array, fb_array_labelled, range(fb_array_features + 1))
      # Create a mask to remove patches smaller than the threshold
      fb_array_mask_sizes = fb_array_sizes >= sieve_size
      fb_array_mask_sizes[0] = 0 # Ensure non-forest (0) is excluded
      fb_array_mask = fb_array_mask_sizes[fb_array_labelled]
      # Apply the mask to the forest binary array and export
      fb_array_sieved = forest_binary_array * fb_array_mask
      export_array_as_tif(fb_array_sieved, forest_binary_path)

  # Disturbance binary
  if 'DisruptionObs' in resampled_raster or 'Ndisturb' in resampled_raster:
    disturbance_binary_path = join(binary_dir, f"disturbance_binary_{year}.tif")
    if not exists(disturbance_binary_path):
      ac_raster_path = glob.glob(f"{resampled_dir}/*AnnualChanges*{year}*")
      ac_array = gdal.Open(ac_raster_path[0]).ReadAsArray()
      do_raster_path = join(resampled_dir, resampled_raster)
      do_array = gdal.Open(do_raster_path).ReadAsArray()
      # Set all disruption events to '1' if they're not classed as undisturbed forest or water in AnnualChanges
      disturbance_binary_array = np.where((do_array >= 1) & ((ac_array != 1) & (ac_array != 5)), 1, 0)
      export_array_as_tif(disturbance_binary_array, disturbance_binary_path)

  # Update binary progress
  binary_progress_index += 1
  binary_progress_label.value = f"Binary progress: {binary_progress_index}/{len(binary_list)}"

# PA binary predictors

In [None]:
# Selected the 'protected area' polygon.
# This can be multiple combined PAs / polygons that have no or minimal history of human disturbance

polygons_to_exclude = ['template.gpkg', 'project_area.gpkg', 'project_area_buffered_bbox.gpkg', 'gedi_area.gpkg']
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    print(f"protected_area_polygon = '{polygon}'")

In [None]:
protected_area_polygon = 'pa_ais.gpkg'

# Set paths
pa_polygon_path = join(polygons_dir, protected_area_polygon)
pa_binary_path = join(binary_dir, f"{protected_area_polygon[:-5]}_binary.tif")
# Convert all template values to 'nodata'
template_tif = gdal.Open(template_dir)
template_mask_array = gdal.Open(template_dir).ReadAsArray()
template_mask_array[template_mask_array != None] = 0
export_array_as_tif(template_mask_array, pa_binary_path)
# Burn the value '1' where it overlaps with the project area polygon
burn_polygon_to_raster(pa_binary_path, pa_polygon_path, fixed=True, fixed_value=1, all_touched=False)
print(f"PA binary raster has been created: {pa_binary_path}")

# Binary predictor edge effects

In [None]:
# Set smoothing kernel
kernel = Gaussian2DKernel(x_stddev=3, y_stddev=3)
# Set precision
precision = 2

binary_list = []
for binary_raster in os.listdir(binary_dir) + os.listdir(resampled_dir):
  if "binary" in binary_raster:
    binary_list.append(binary_raster)

# Edge effect progress
edge_effect_progress_index = 0
edge_effect_progress_label = widgets.Label(f"Edge effect progress: {edge_effect_progress_index}/{len(binary_list)}")
display(edge_effect_progress_label)

for binary_raster in binary_list:
  if "binary" in binary_raster:
    edge_effects_filename = binary_raster.replace('binary', 'with_edge_effects')
    edge_effects_path = join(edge_effects_dir, edge_effects_filename)
    if not exists(edge_effects_path):
      binary_raster_path = join(binary_dir, binary_raster)
      if not exists(binary_raster_path): binary_raster_path = join(resampled_dir, binary_raster)
      binary_array = gdal.Open(binary_raster_path).ReadAsArray()
      # Reclassify for binary differentiation after proximity conversion
      differentiator_array = binary_array.copy()
      differentiator_array[differentiator_array == 1] = 10
      # Positive proximity
      positive_distances = distance_transform_edt(binary_array == 0) # target pixels
      positive_proximity_array = np.where(positive_distances > 2, 0, positive_distances) # max distance 2
      # Negative proximity
      negative_distances = distance_transform_edt(binary_array == 1) # target pixels
      negative_proximity_array = np.where(negative_distances > 2, 0, negative_distances) # max distance 2
      # Sum proximities and differentiator
      pixel_prox_summed =  differentiator_array + positive_proximity_array + negative_proximity_array
      # Reclassify for better semantic understanding of pixel proximity
      pixel_prox_reclassed = pixel_prox_summed.copy()
      pixel_prox_reclass_table = [(0, 0, -4), (1, 1, -1), (1.4, 1.5, -2), (2, 2, -3), (10, 10, 3), (11, 11, 0), (11.4, 11.5, 1), (12, 12, 2)]
      for min_value, max_value, new_value in pixel_prox_reclass_table:
        pixel_prox_reclassed[(pixel_prox_reclassed >= min_value) & (pixel_prox_reclassed <= max_value)] = new_value
      # Smooth binary array using 2D convolution
      binary_smoothed = convolve(binary_array, kernel, boundary='extend')
      # Sum pixel proximity and smoothed binary array
      edge_effects_array = np.round(pixel_prox_reclassed + binary_smoothed, precision)
      # Export edge effects predictors
      export_array_as_tif(edge_effects_array, edge_effects_path)

  # Update binary progress
  edge_effect_progress_index += 1
  edge_effect_progress_label.value = f"Edge effect progress: {edge_effect_progress_index}/{len(binary_list)}"

# Distance from coast

In [None]:
# Download global coast data from https://osmdata.openstreetmap.de/data/coastlines.html
coastlines_url = 'https://osmdata.openstreetmap.de/download/coastlines-split-4326.zip'
coastlines_global_file_path = join(coast_dir, 'coastlines-split-4326.zip')
if not exists(coastlines_global_file_path):
  request = requests.get(coastlines_url, allow_redirects=True)
  open(coastlines_global_file_path, 'wb').write(request.content)

coastlines_global_dir = join(coast_dir, 'coastlines-split-4326')
if not exists(coastlines_global_dir):
  with zipfile.ZipFile(coastlines_global_file_path, 'r') as zip_ref:
      zip_ref.extractall(coast_dir)

# Upload and select a polygon with full coastline extent.
# It must be the template.gpkg polygon OR a polygon that entirely contains template.gpkg.
polygons_to_exclude = ['project_area.gpkg', 'project_area_buffered_bbox.gpkg', 'gedi_area.gpkg']
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    print(f"coastline_extent_polygon = '{polygon}'")

In [None]:
coastline_extent_polygon = 'peninsular_malaysia.gpkg'

# Get extent of polygon
coastline_extent_polygon_path = join(polygons_dir, coastline_extent_polygon)
coastline_extent_bounds = gpd.read_file(coastline_extent_polygon_path).total_bounds
coastline_min_x, coastline_max_x = coastline_extent_bounds[0], coastline_extent_bounds[2]
coastline_min_y, coastline_max_y = coastline_extent_bounds[1], coastline_extent_bounds[3]

# Set precision (in km) of distance
precision = 1

# Load template
template = gdal.Open(template_dir)

# Path of the new coast raster (where coastline will be rasterized)
rasterized_coast_path = join(coast_dir, 'rasterized_coast.tif')

# Create a new empty raster based on the coastline extent polygon
if coastline_extent_polygon != 'template.gpkg':
  # Get dimensions of template
  template_band = template.GetRasterBand(1)
  template_dimensions, template_projection = template.GetGeoTransform(), template.GetProjection()
  template_size_x, template_size_y = template_band.XSize, template_band.YSize
  template_res_x, template_res_y = template_dimensions[1], -template_dimensions[5]

  # Calculate the minimum x and y of the template
  template_min_x = template_dimensions[0]
  template_max_y = template_dimensions[3]

  # Calculate the centre x and y of the template
  template_centre_x = template_min_x + ((template_size_x / 2) * template_res_x)
  template_centre_y = template_max_y - ((template_size_y / 2) * template_res_y)

  # Calculate the size (in pixels) difference between the polygon minimum and template minimum
  coastline_min_diff_x = template_min_x - coastline_min_x
  coastline_max_diff_y = coastline_max_y - template_max_y
  coastline_min_diff_x_size = int(np.ceil(coastline_min_diff_x / template_res_x))
  coastline_max_diff_y_size = int(np.ceil(coastline_max_diff_y / template_res_y))

  # Calculate when the coastline raster should start while maintaining template resolution and position
  coastline_start_x = template_min_x - (coastline_min_diff_x_size * template_res_x)
  coastline_start_y = template_max_y + (coastline_max_diff_y_size * template_res_y)

  # Calculate the size of the coastline raster
  coastline_size_x = int(np.ceil((coastline_max_x - coastline_start_x)/template_res_x))
  coastline_size_y = int(np.ceil((coastline_start_y - coastline_min_y)/template_res_y))

  if not exists(rasterized_coast_path):
    # Create coast raster dataset
    driver = gdal.GetDriverByName("GTiff").Create(rasterized_coast_path, coastline_size_x, coastline_size_y, bands=1, eType=gdal.GDT_Float32,
                                                    options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
    driver.SetProjection(template_projection)
    driver.SetGeoTransform((coastline_start_x, template_res_x, 0, coastline_start_y, 0, -template_res_y))

    #  Create and write array (all pixels with value 1)
    raster_data = np.ones((coastline_size_y, coastline_size_x), dtype=np.float32)
    driver.GetRasterBand(1).WriteArray(raster_data)

    # Close coast raster dataset
    driver.FlushCache()
    driver = None
    print("A blank raster at the extent of coastlines polygon has been generated, ready for rasterization.")
  else: print("A rasterization raster already exists.")

else: # If just using the template area, copy the template.
  if not exists(rasterized_coast_path):
    template_array = template.ReadAsArray()
    export_array_as_tif(template_array, rasterized_coast_path)
  else: print("A rasterization raster already exists.")

# Clip coastlines polygon to the extent (speeds up rasterization)
coastlines_clipped_path = join(coast_dir, 'clipped_coastlines.gpkg')
if not exists(coastlines_clipped_path):
  coastlines_shp_path = join(coastlines_global_dir, 'lines.shp')
  coastlines_shp_df = gpd.read_file(coastlines_shp_path)
  coastlines_clipped_df = gpd.clip(coastlines_shp_df, coastline_extent_bounds)
  coastlines_clipped_df.to_file(coastlines_clipped_path, driver='GPKG')
  print(f"Coastlines clipped to the polygon: {coastlines_clipped_path}")
else: print(f"Coastlines have already been clipped to the polygon: {coastlines_clipped_path}")

# Rasterize coastlines (2), if not already
rasterized_coast_array = gdal.Open(rasterized_coast_path).ReadAsArray()
if not np.any(rasterized_coast_array == 2):
  burn_polygon_to_raster(rasterized_coast_path, coastlines_clipped_path, fixed_value=2)
  print(f"Coastlines rasterized: {rasterized_coast_path}")
else: print(f"Coastlines have already been rasterized: {rasterized_coast_path}")

# Calculate proximity in pixels
cost_proximity_pixels_path = join(coast_dir, "coast_proximity_pixels.tif")
if not exists(cost_proximity_pixels_path):
  rasterized_coast_array = gdal.Open(rasterized_coast_path).ReadAsArray()
  # Target '0' pixels, away from the coastal '1' pixels. May require high RAM.
  coast_proximity_pixels = distance_transform_edt(rasterized_coast_array != 2)
  # Clip the array to the template size
  if coastline_extent_polygon != 'template.gpkg':
    clip_start_x = coastline_min_diff_x_size-1
    clip_start_y = coastline_max_diff_y_size-1
    clip_size_x = template_size_x
    clip_size_y = template_size_y
    coast_proximity_pixels = coast_proximity_pixels[clip_start_y:clip_start_y + clip_size_y, clip_start_x:clip_start_x + clip_size_x]
  export_array_as_tif(coast_proximity_pixels, cost_proximity_pixels_path)
  print(f"A proximity (pixel number) raster has been generated at: {cost_proximity_pixels_path}")
else: print(f"A proximity (pixel number) raster already exists at: {cost_proximity_pixels_path}")

# Convert proximity to km (distance from coast)
coast_distance_path = join(coast_dir, "coast_proximity_km.tif")
if not exists(coast_distance_path):
  coast_proximity_array = gdal.Open(cost_proximity_pixels_path).ReadAsArray()
  cell_size_x = gdal.Open(join(areas_dir, 'cell_size_x.tif')).ReadAsArray()
  cell_size_y = gdal.Open(join(areas_dir, 'cell_size_y.tif')).ReadAsArray()
  cell_size_mean_km = ((np.mean(cell_size_x) + np.mean(cell_size_y)) / 2) / 1000
  coast_proximity_km = coast_proximity_array * cell_size_mean_km
  coast_proximity_round = np.round(coast_proximity_km, precision)
  export_array_as_tif(coast_proximity_round, coast_distance_path)
  print(f"A distance from coast (km) raster has been generated at: {coast_distance_path}")
else: print(f"A distance from coast (km) raster already exists at: {coast_distance_path}")

# Topography

## Define base DEM

In [None]:
# If using a GEDI corrected DEM, copy the prediction from scenarios directory
enable_gedi_corrected = False

# Select which prediction path to use as the GEDI corrected DEM
if enable_gedi_corrected:
  gedi_corrected_dem_exists = False
  for subdir, dirs, files in os.walk(scenarios_dir):
    for raster in files:
      if raster.endswith('.tif'):
        if 'elevation_corrected' in raster:
          print(f'gedi_corrected_dem_path = "{subdir}/{raster}"')
          gedi_corrected_dem_exists = True
  if not gedi_corrected_dem_exists: print("No GEDI corrected DEM found in scenarios folder. Defaulting to uncorrected DEM.")
else: print(f"Using the uncorrected DEM in {areas_dir}.")

In [None]:
gedi_corrected_dem_path = "/gdrive/Shareddrives/masec/6_scenarios/elevation_corrected_230822_125308/taman/scenario_predictions_unmasked/2014__taman_elevation_corrected_230822_125308_unmasked.tif"

dem_base_path = join(areas_dir, "base_dem.tif")
dem_base_corrected_path = join(areas_dir, "base_dem_corrected.tif")
# Copy GEDI corrected DEM to areas directory
if enable_gedi_corrected and gedi_corrected_dem_exists:
  if not exists(dem_base_corrected_path):
    # Sea level is post-processed back to 0 m, and areas that might have been predicted
    # Below this. At ~sea level, the original DEM was likely the true terrain height.

    # Define a low transition zone between original and corrected DEM.
    # This delineates a transition from 0 to 100 % corrected DEM values.
    low_transition_lower_limit = 0
    low_transition_upper_limit = 5

    # Higher elevations are poorly predicted in some study areas due to low sample size.
    # Therefore original values should be used, as regardless there's minimal difference
    # between surface height (vegetation) and terrain height.

    # Define a montane transition zone between original and corrected DEM.
    # This delineates a transition from 0 to 100 % original DEM values.
    montane_transition_lower_limit = 1500
    montane_transition_upper_limit = 1800

    # Read original base DEM
    dem_base_array = gdal.Open(dem_base_path).ReadAsArray()

    # Scale <=0 to >=5 m values to a ratio (0 - 1) of corrected DEM values
    dem_base_array_low_ratio = dem_base_array.copy()
    dem_base_array_low_ratio[dem_base_array_low_ratio <= low_transition_lower_limit] = low_transition_lower_limit
    dem_base_array_low_ratio[dem_base_array_low_ratio >= low_transition_upper_limit] = low_transition_upper_limit
    dem_base_array_low_ratio = dem_base_array_low_ratio / low_transition_upper_limit

    # Scale <=0 to >=5 m values to a ratio (0 - 1) of corrected DEM values
    dem_base_array_montane_ratio = dem_base_array.copy()
    dem_base_array_montane_ratio[dem_base_array_montane_ratio <= montane_transition_lower_limit] = montane_transition_lower_limit
    dem_base_array_montane_ratio[dem_base_array_montane_ratio >= montane_transition_upper_limit] = montane_transition_upper_limit
    dem_base_array_montane_ratio = (montane_transition_upper_limit - dem_base_array_montane_ratio) / (montane_transition_upper_limit - montane_transition_lower_limit)

    # Use original DEM values for surface water.
    # The Copernicus DEM rounds all surface water values to 1 or 0 decimal places.
    # This is used to differentiate them from land values, creating a 'land binary'.
    dem_base_array_land_binary = dem_base_array.copy()
    dem_base_array_land_binary = np.floor(dem_base_array_land_binary * 10) / 10 # Round DOWN 1 decimal place
    dem_base_array_land_binary = dem_base_array - dem_base_array_land_binary
    dem_base_array_land_binary[dem_base_array_land_binary > 0] = 1
    # Invert the binary array to target 0 values for sieving single water pixels (usually erroneous)
    dem_base_array_land_binary_inverted = np.logical_not(dem_base_array_land_binary)
    # Sieve to 0.5 ha, using 8-connectedness (3, 3)
    lb_array_labelled, lb_array_features = label(dem_base_array_land_binary_inverted, structure=np.ones((3, 3)))
    # Determine the size of each patch
    lb_array_sizes = ndi_sum(dem_base_array_land_binary_inverted, lb_array_labelled, range(lb_array_features + 1))
    # Create a mask to remove patches smaller than the threshold
    lb_array_mask_sizes = lb_array_sizes >= 2
    lb_array_mask_sizes[0] = 0 # Ensure non-target values are excluded
    lb_array_mask = lb_array_mask_sizes[lb_array_labelled]
    # Apply the mask to the inverted binary array
    lb_array_sieved_inverted = dem_base_array_land_binary_inverted * lb_array_mask
    # Invert the array back to original representation
    dem_base_array_land_binary = np.logical_not(lb_array_sieved_inverted)

    # Read the corrected base DEM and create the final modifier
    gedi_corrected_dem_array = gdal.Open(gedi_corrected_dem_path).ReadAsArray()
    dem_base_corrected_array_modifier = gedi_corrected_dem_array.copy()
    # Change all corrected DEM values < sea level to 0 (most are erroneous)
    dem_base_corrected_array_modifier[dem_base_corrected_array_modifier < 0] = 0
    # Sutract corrected DEM from original as the 'corrected' modifier
    dem_base_corrected_array_modifier = dem_base_array - dem_base_corrected_array_modifier
    # Multiply the corrected modifier by low ratio, montane ratio and land binary
    dem_base_corrected_array_modifier = dem_base_corrected_array_modifier * dem_base_array_low_ratio * dem_base_array_montane_ratio * dem_base_array_land_binary

    # Apply the modifier
    dem_base_corrected_array = dem_base_array - dem_base_corrected_array_modifier

    # Export uncompressed for further topographic metrics
    export_array_as_tif(dem_base_corrected_array, dem_base_corrected_path, compress=False)
    print(f"GEDI corrected DEM has been postprocessed and uncompressed to: {dem_base_corrected_path}")

  else: print(f"A corrected base DEM already exists, first remove from {areas_dir} for replacement.")

else: print(f"A GEDI corrected DEM does not exist in the scenarios directory. Proceeding with uncorrected base DEM.")

In [None]:
# Define base DEM and properties
if enable_gedi_corrected:
  print("Post-processed GEDI corrected base DEM enabled.")
  if not exists(dem_base_corrected_path):
    print("A post-processed GEDI corrected based DEM does not exist. Defaulting to the original base DEM.")
    dem_base = gdal.Open(dem_base_path)
    topography_temp_dir = join(predictors_dir, 'topo_temp')
    topography_final_dir = join(predictors_dir, 'topo_final')
  else:
    dem_base = gdal.Open(dem_base_corrected_path)
    topography_temp_dir = join(predictors_dir, 'topo_corrected_temp')
    topography_final_dir = join(predictors_dir, 'topo_corrected_final')
    makedirs(topography_temp_dir, exist_ok=True)
    makedirs(topography_final_dir, exist_ok=True)
else:
  print("Post-processed GEDI corrected based DEM disabled. Using the original base DEM.")
  dem_base = gdal.Open(dem_base_path)
  topography_temp_dir = join(predictors_dir, 'topo_temp')
  topography_final_dir = join(predictors_dir, 'topo_final')

# Get base DEM attributes
dem_base_array = dem_base.ReadAsArray()
dem_dimensions = dem_base.GetGeoTransform()
y_origin, pixel_height, raster_height = dem_dimensions[3], dem_dimensions[5], len(dem_base_array)
dem_central_latitude = y_origin + (raster_height // 2) * pixel_height

## Empty temporary dir

In [None]:
# for raster in Path(topography_temp_dir).glob("**/*"):
#   if raster.is_file(): raster.unlink()

## Whitebox algorithms

In [None]:
%%capture
!pip install whitebox
import whitebox
wbt = whitebox.WhiteboxTools()

In [None]:
%%capture
# https://www.whiteboxgeo.com/manual/wbt_book/preface.html

# Elevation
elevation_path_temp = join(topography_temp_dir, "elevation.tif")
if not exists(elevation_path_temp):
  elevation = dem_base_array
  export_array_as_tif(elevation, elevation_path_temp, compress=False)

# Slope
slope_path_temp = join(topography_temp_dir, "slope.tif")
if not exists(slope_path_temp):
  wbt.slope(elevation_path_temp, slope_path_temp, units = "degrees")

# Aspect
aspect_path_temp = join(topography_temp_dir, "aspect.tif")
if not exists(aspect_path_temp):
  wbt.aspect(elevation_path_temp, aspect_path_temp)

# Profile Curvature
profile_curvature_path_temp = join(topography_temp_dir, "profile_curvature.tif")
if not exists(profile_curvature_path_temp):
  wbt.profile_curvature(elevation_path_temp, profile_curvature_path_temp, log=False)

# Tangential Curvature
tangential_curvature_path_temp = join(topography_temp_dir, "tangential_curvature.tif")
if not exists(tangential_curvature_path_temp):
  wbt.tangential_curvature(elevation_path_temp, tangential_curvature_path_temp, log=False)

# Topographic Ruggedness Index
topographic_ruggedness_index_path_temp = join(topography_temp_dir, "topographic_ruggedness_index.tif")
if not exists(topographic_ruggedness_index_path_temp):
  wbt.ruggedness_index(elevation_path_temp, topographic_ruggedness_index_path_temp)

# Deviation from Mean Elevation
dev_kernel_sizes = [3, 7, 11]
for kernel_size in dev_kernel_sizes:
  deviation_mean_elevation_path_temp = join(topography_temp_dir, f"deviation_mean_elevation_{str(kernel_size).rjust(2, '0')}.tif")
  if not exists(deviation_mean_elevation_path_temp):
    wbt.dev_from_mean_elev(elevation_path_temp, deviation_mean_elevation_path_temp, filterx=kernel_size, filtery=kernel_size)

# Circular Variance of Aspect
cva_kernel_sizes = [3, 7, 11]
for kernel_size in cva_kernel_sizes:
  circular_variance_aspect_path_temp = join(topography_temp_dir, f"circular_variance_aspect_{str(kernel_size).rjust(2, '0')}.tif")
  if not exists(circular_variance_aspect_path_temp):
    wbt.circular_variance_of_aspect(elevation_path_temp, circular_variance_aspect_path_temp, filter=kernel_size)

# Fill Single Cell Pits for Breach Depressions
dem_fill_single_cell_pits_path_temp = join(topography_temp_dir, "dem_fill_single_cell_pits.tif")
if not exists(dem_fill_single_cell_pits_path_temp):
  wbt.fill_single_cell_pits(elevation_path_temp, dem_fill_single_cell_pits_path_temp)
  # Raw output doesn't work, needs to be saved again.
  dem_fill_single_cell_pits = gdal.Open(dem_fill_single_cell_pits_path_temp).ReadAsArray()
  export_array_as_tif(dem_fill_single_cell_pits, dem_fill_single_cell_pits_path_temp, compress=False)

# Breach Depressions for Specific Contributing Area
max_search_dist = 2 # Maximum search distance for breach paths in cells (pixels)
dem_breach_depressions_path_temp = join(topography_temp_dir, "dem_breach_depressions.tif")
if not exists(dem_breach_depressions_path_temp):
  wbt.breach_depressions_least_cost(dem_fill_single_cell_pits_path_temp, dem_breach_depressions_path_temp, dist=max_search_dist)

# Specific Contributing Area (Qin) (for TWI and SPI)
specific_contributing_area_qin_path_temp = join(topography_temp_dir, "specific_contributing_area_qin.tif")
if not exists(specific_contributing_area_qin_path_temp):
  wbt.qin_flow_accumulation(dem_breach_depressions_path_temp, specific_contributing_area_qin_path_temp, out_type="specific contributing area")

# Topographic Wetness Index (TWI)
topographic_wetness_index_path_temp = join(topography_temp_dir, "topographic_wetness_index.tif")
if not exists(topographic_wetness_index_path_temp):
  wbt.wetness_index(specific_contributing_area_qin_path_temp, slope_path_temp, topographic_wetness_index_path_temp)

# Stream Power Index (SPI)
exponent = 1.0
stream_power_index_path_temp = join(topography_temp_dir, "stream_power_index.tif")
if not exists(stream_power_index_path_temp):
  wbt.stream_power_index(specific_contributing_area_qin_path_temp, slope_path_temp, stream_power_index_path_temp, exponent=exponent)

## Further algorithms

In [None]:
# Arrays to use
elevation_path_temp = join(topography_temp_dir, "elevation.tif")
slope_path_temp = join(topography_temp_dir, "slope.tif")
aspect_path_temp = join(topography_temp_dir, "aspect.tif")
stream_power_index_path_temp = join(topography_temp_dir, "stream_power_index.tif")

# Calculate Aspect Sine
aspect_sine_path_temp = join(topography_temp_dir,"aspect_sine.tif")
if not exists(aspect_sine_path_temp):
  aspect = gdal.Open(aspect_path_temp).ReadAsArray()
  aspect_sine = np.sin(np.radians(aspect))
  export_array_as_tif(aspect_sine, aspect_sine_path_temp, compress=False)

# Calculate Aspect Cosine
aspect_cosine_path_temp = join(topography_temp_dir,"aspect_cosine.tif")
if not exists(aspect_cosine_path_temp):
  aspect = gdal.Open(aspect_path_temp).ReadAsArray()
  aspect_cosine = np.cos(np.radians(aspect))
  export_array_as_tif(aspect_cosine, aspect_cosine_path_temp, compress=False)

# Calculate Eastness
eastness_path_temp = join(topography_temp_dir,"eastness.tif")
if not exists(eastness_path_temp):
  slope = gdal.Open(slope_path_temp).ReadAsArray()
  aspect_sine = gdal.Open(aspect_sine_path_temp).ReadAsArray()
  eastness = aspect_sine * np.sin(np.radians(slope))
  export_array_as_tif(eastness, eastness_path_temp, compress=False)

# Calculate Northness
northness_temp = join(topography_temp_dir,"northness.tif")
if not exists(northness_temp):
  slope = gdal.Open(slope_path_temp).ReadAsArray()
  aspect_cosine = gdal.Open(aspect_cosine_path_temp).ReadAsArray()
  northness = aspect_cosine * np.sin(np.radians(slope))
  export_array_as_tif(northness, northness_temp, compress=False)

elevation = gdal.Open(elevation_path_temp).ReadAsArray()

# Calculate Roughness
roughness_kernel_sizes = [3, 7, 11]
for kernel_size in roughness_kernel_sizes:
  roughness_path_temp = join(topography_temp_dir,f"roughness_{str(kernel_size).rjust(2, '0')}.tif")
  if not exists(roughness_path_temp):
    roughness = maximum_filter(elevation, size=kernel_size) - minimum_filter(elevation, size=kernel_size)
    export_array_as_tif(roughness, roughness_path_temp, compress=False)

# Calculate Topographic Position Index (TPI)
tpi_kernel_sizes = [3, 7, 11]
for kernel_size in tpi_kernel_sizes:
  tpi_path_temp = join(topography_temp_dir, f"topographic_position_index_{str(kernel_size).rjust(2, '0')}.tif")
  if not exists(tpi_path_temp):
    topographic_position_index = elevation - uniform_filter(elevation, size=kernel_size)
    export_array_as_tif(topographic_position_index, tpi_path_temp, compress=False)

# Calculate Stream Power Index (SPI) log10
spi_log10_path_temp = join(topography_temp_dir,"stream_power_index_log10.tif")
if not exists(spi_log10_path_temp):
  stream_power_index = gdal.Open(stream_power_index_path_temp).ReadAsArray()
  stream_power_index[stream_power_index == 0] = 1.0e-30 # Convert 0 values to one that's below any other non-zero value
  stream_power_index_log10 = np.log10(stream_power_index)
  export_array_as_tif(stream_power_index_log10, spi_log10_path_temp, compress=False)

## Surface Area Ratio

In [None]:
# The whitebox algorithm below currently not working correctly.
# wbt.surface_area_ratio(elevation_path_temp, surface_area_ratio_path_temp)

# Based on source code of whitebox SAR (algorithm doesn't output correctly):
# https://github.com/jblindsay/whitebox-tools/blob/master/whitebox-tools-app/src/tools/terrain_analysis/surface_area_ratio.rs

elevation_path_temp = join(topography_temp_dir, "elevation.tif")
surface_area_ratio_path_temp = join(topography_temp_dir, "surface_area_ratio.tif")

elevation_raster = gdal.Open(elevation_path_temp)
transform = elevation_raster.GetGeoTransform()
elevation_array = elevation_raster.ReadAsArray()

@jit(nopython=True)
def calculate_surface_area_ratio(dem, transform, nodata):
    resx, resy = transform[1], -transform[5]
    output = np.full(dem.shape, nodata, dtype=np.float32)

    for i in range(1, dem.shape[0]-1):
        mid_lat = transform[3] + i*transform[5]
        resx_adjusted = abs(resx) * 111_111.0 * cos(radians(mid_lat))
        resy_adjusted = abs(resy) * 111_111.0
        res_diag = sqrt(resx_adjusted**2 + resy_adjusted**2)
        cell_area = resx_adjusted * resy_adjusted
        eigth_area = cell_area / 8.0
        for j in range(1, dem.shape[1]-1):
            if dem[i, j] == nodata:
                continue

            window = dem[i-1:i+2, j-1:j+2]

            dx = np.array([-1, 0, 1, -1, 0, 1, -1, 0, 1])
            dy = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1])
            zvals = np.array([window[dy[k]+1, dx[k]+1] for k in range(9)])
            dist_planar = np.array([resx_adjusted]*6 + [resy_adjusted]*6 + [res_diag]*4)
            dist_pairs = [(0, 1), (1, 2), (3, 4), (4, 5), (6, 7), (7, 8), (0, 3), (1, 4), (2, 5), (3, 6), (4, 7), (5, 8), (0, 4), (2, 4), (6, 4), (8, 4)]
            distances = np.array([sqrt(dist_planar[k]**2 + (zvals[i] - zvals[j])**2) / 2 for k, (i, j) in enumerate(dist_pairs) if zvals[i] != nodata and zvals[j] != nodata])

            triangle_sides = [(0, 7, 12), (1, 7, 13), (2, 6, 12), (3, 8, 13), (2, 9, 14), (3, 11, 15), (4, 10, 14), (5, 10, 15)]
            area = 0.0
            cell_area2 = cell_area
            for a, b, c in triangle_sides:
                if a < len(distances) and b < len(distances) and c < len(distances):
                    s = (distances[a] + distances[b] + distances[c]) / 2.0
                    area += sqrt(s * (s - distances[a]) * (s - distances[b]) * (s - distances[c]))
                else:
                    cell_area2 -= eigth_area

            if cell_area2 > 0.0:
                output[i, j] = area / cell_area2

        resx, resy = transform[1], -transform[5]

    return output

# Run the jitted function on the entire array
surface_area_ratio_array = calculate_surface_area_ratio(elevation_array, transform, nodatavalue)

export_array_as_tif(surface_area_ratio_array, surface_area_ratio_path_temp, template=elevation_path_temp, compress=False)

## Further hydrography (optional)

In [None]:
%%capture
minimum_length_of_stream_m = 100

dem_breach_depressions_path_temp = join(topography_temp_dir, "dem_breach_depressions.tif")

# # d8 Pointer (for basins)
d8_pointer_path_temp = join(topography_temp_dir, "d8_pointer.tif")
wbt.d8_pointer(dem_breach_depressions_path_temp, d8_pointer_path_temp)

# # Basins
basins_path_temp = join(topography_temp_dir, "basins.tif")
wbt.basins(d8_pointer_path_temp, basins_path_temp)

# # Specific Contributing Area (d8) (for streams)
specific_contributing_area_d8_path_temp = join(topography_temp_dir, "specific_contributing_area_d8.tif")
wbt.d8_flow_accumulation(dem_breach_depressions_path_temp, specific_contributing_area_d8_path_temp, out_type="specific contributing area")

# # Streams
flow_accumulation_threshold = 0.01 # Lowest FA (usually 0 - 0.5) to include as stream
streams_path_temp = join(topography_temp_dir, "streams.tif")
wbt.extract_streams(specific_contributing_area_d8_path_temp, streams_path_temp, threshold=flow_accumulation_threshold)

# Streams - short removed
minimum_length_of_stream = minimum_length_of_stream_m / 111_111
streams_short_removed_path_temp = join(topography_temp_dir, "streams_short_removed.tif")
wbt.remove_short_streams(d8_pointer_path_temp, streams_path_temp, streams_short_removed_path_temp, min_length=minimum_length_of_stream)

# Strahler stream order
strahler_stream_order_path_temp = join(topography_temp_dir, "strahler_stream_order.tif")
wbt.strahler_stream_order(d8_pointer_path_temp, streams_short_removed_path_temp, strahler_stream_order_path_temp)

# River Centre Lines (not working)
# minimum_river_length = 5
# minimum_pixel_search_radius_connection = 2
# river_centre_lines_path_temp = join(topography_temp_dir, "river_centre_lines.tif")
# wbt.river_centerlines(streams_path_temp, river_centre_lines_path_temp,
#                       min_length=minimum_river_length, radius=minimum_pixel_search_radius_connection)

In [None]:
%%capture
# May need to experiment with order depending on study area

# Base paths
d8_pointer_path_temp = join(topography_temp_dir, "d8_pointer.tif")

# Strahler stream order limit (for sub-basins)
strahler_order = 6
strahler_stream_order_path_temp = join(topography_temp_dir, "strahler_stream_order.tif")
strahler_stream_order_limit_path_temp = join(topography_temp_dir, "strahler_stream_order_limit.tif")
strahler_stream_order = gdal.Open(strahler_stream_order_path_temp).ReadAsArray()
strahler_stream_order_limit = strahler_stream_order
mask_below = strahler_stream_order_limit < strahler_order
strahler_stream_order_limit[strahler_stream_order_limit >= strahler_order] = 1
strahler_stream_order_limit[mask_below] = np.nan
export_array_as_tif(strahler_stream_order_limit, strahler_stream_order_limit_path_temp, compress=False)

# Sub-basins
subbasins_path_temp = join(topography_temp_dir, "subbasins.tif")
wbt.subbasins(d8_pointer_path_temp, strahler_stream_order_limit_path_temp, subbasins_path_temp)

In [None]:
# %%capture
# # To determine pour point (lowest point) of a basin:
# # 1. Download depressionless DEM. Determine range of values in depression (e.g. 50.2 - 50.3)
# # 2. Reclassify so that this range = 1, all else 0, with 0 as nodata.
# # 3. Convert pixels to points, and remove any outside the basin. Now sample raster values of DEM.
# # 4. Sort the points by lowest raster value, check it's within basin. This is the pour point needed.

# # Create watershed by pour points
# pour_points = join(base_dir, "pour_points/pour_points.shp")
# snap_distance_m = 5

# # Breached depressions DEM and d8 pointer paths
# dem_breach_depressions_path_temp = join(topography_temp_dir, "dem_breach_depressions.tif")
# d8_pointer_path_temp = join(topography_temp_dir, "d8_pointer.tif")
# streams_path_temp = join(topography_temp_dir, "streams.tif")

# #  Jenson snap pour points
# snap_distance = snap_distance_m / 111_111
# jenson_snap_pour_points_dir_temp = join(topography_temp_dir, 'jenson_snap_pour_points')
# makedirs(jenson_snap_pour_points_dir_temp, exist_ok=True)
# jenson_snap_pour_points_path_temp = join(jenson_snap_pour_points_dir_temp, "jenson_snap_pour_points.shp")
# wbt.jenson_snap_pour_points(pour_points, streams_path_temp, jenson_snap_pour_points_path_temp, snap_dist=snap_distance)

# # Watershed
# watershed_path_temp = join(topography_temp_dir, "watershed.tif")
# wbt.watershed(d8_pointer_path_temp, jenson_snap_pour_points_path_temp, watershed_path_temp)

## Visualise temporary dir

In [None]:
# Get a list of all files in the directory
raster_files = [os.path.join(topography_temp_dir, file) for file in os.listdir(topography_temp_dir) if file.endswith('.tif')]

for raster_file in raster_files:
    # Open the raster file
    ds = gdal.Open(raster_file)
    if ds is None:
        print('Could not open ' + raster_file)
        continue
    # Read the raster data
    band = ds.GetRasterBand(1)
    raster_data = band.ReadAsArray()
    # Close the dataset
    ds = None

    # Compute the 2% and 98% percentiles
    p2, p98 = np.percentile(raster_data, [2, 98])

    # Create a new figure
    plt.figure()
    # Display the data
    plt.imshow(raster_data, cmap='viridis', vmin=p2, vmax=p98)
    # Add a colorbar
    plt.colorbar()
    # Add a title
    plt.title(os.path.basename(raster_file))
    # Show the figure
    plt.show()

## Automate precision

In [None]:
topography_list = []
for temp_tif in os.listdir(topography_temp_dir):
    topography_list.append(str(temp_tif))
topography_list = sorted(topography_list)
print("topography_final_list = [")
for topography in topography_list:
    print(f"'{topography}',")
print("]")

In [None]:
topography_final_list = [
# 'aspect.tif',
'aspect_cosine.tif',
'aspect_sine.tif',
# 'basins.tif',
'circular_variance_aspect_03.tif',
'circular_variance_aspect_07.tif',
'circular_variance_aspect_11.tif',
# 'd8_pointer.tif',
# 'dem_breach_depressions.tif',
# 'dem_fill_single_cell_pits.tif',
'deviation_mean_elevation_03.tif',
'deviation_mean_elevation_07.tif',
'deviation_mean_elevation_11.tif',
'eastness.tif',
'elevation.tif',
'northness.tif',
'profile_curvature.tif',
'roughness_03.tif',
'roughness_07.tif',
'roughness_11.tif',
'slope.tif',
# 'specific_contributing_area_d8.tif',
# 'specific_contributing_area_qin.tif',
# 'strahler_stream_order.tif',
# 'strahler_stream_order_limit.tif',
# 'stream_power_index.tif',
'stream_power_index_log10.tif',
# 'streams.tif',
# 'streams_short_removed.tif',
# 'subbasins.tif',
'surface_area_ratio.tif',
'tangential_curvature.tif',
'topographic_position_index_03.tif',
'topographic_position_index_07.tif',
'topographic_position_index_11.tif',
'topographic_ruggedness_index.tif',
'topographic_wetness_index.tif',
]

In [None]:
override_max_unique_values = False
max_unique_values = 5000 # Should be >=10

if override_max_unique_values == False:
  max_unique_values = int(np.ptp(dem_base_array)) # Precision based on elevation variance
topo_precision_dict = {}


for topography_final in topography_final_list:
  print(f"Reading {topography_final}...")
  # Read raster as array
  topography_raster_path = join(topography_temp_dir, topography_final)
  topography_raster_array = gdal.Open(topography_raster_path).ReadAsArray()
  # Convert 'nodata' values to nan
  topography_raster_array[topography_raster_array == nodatavalue] = np.nan
  topography_raster_array_masked = np.ma.array(topography_raster_array, mask=np.isnan(topography_raster_array))
  # Count unique values in raster
  unique_values = len(np.unique(topography_raster_array_masked))
  print(f"There are {unique_values} unique values in {topography_final}")
  # Generate histogram from 100,000 random points
  random_selection = np.random.choice(topography_raster_array_masked.ravel(), size = 100_000, replace = False)
  _ = plt.hist(random_selection, bins='auto')  # arguments are passed to np.histogram
  plt.title(f"{topography_final}")
  plt.show()
  # Remove 0 values for log10
  topography_raster_array_masked[topography_raster_array_masked == 0] = np.nan
  topography_raster_array_masked = np.ma.array(topography_raster_array, mask=np.isnan(topography_raster_array))
  # Create log10 array for determining positions for rounding
  array_log10 = np.log10(abs(topography_raster_array_masked))
  place_value_decimal = int(abs(np.min(array_log10)))
  place_value_integer = int(0 - np.max(array_log10))
  # Iterate down precision levels to determine optimal number of unique values
  min_starting_precision = len(str(max_unique_values))
  for precision in reversed(range(place_value_integer, max(min_starting_precision, place_value_decimal +1))):
    rounded_array = np.round(topography_raster_array, decimals=precision)
    round_unique_values = len(np.unique(rounded_array))
    optimal_precision = None
    if round_unique_values <= max_unique_values:
      optimal_precision = precision
      print(f"The optimal precison for {topography_final} is {optimal_precision}, with {round_unique_values} unique values.")
      topo_precision_dict.update({f'{topography_final}':f'{optimal_precision}'})
      break
  if optimal_precision == None: print("There's a problem with setting precision.")
  print("___________________\n")

print("Dictionary for optimal rounding values:")
topo_precision_dict

precision_dict_csv_path = join(topography_temp_dir, 'rounding_dictionary.csv')
# Save rounding dictionary to CSV
with open(precision_dict_csv_path, 'w', newline='') as precision_dict_csv:
    writer = csv.writer(precision_dict_csv)
    writer.writerow(topo_precision_dict.keys())
    writer.writerow(topo_precision_dict.values())

## Round and smooth

In [None]:
# Open rounding dictionary and verify
with open(precision_dict_csv_path, 'r') as file:
    keys, values = list(csv.reader(file))
    topo_precision_dict = dict(zip(keys, values))

# Verify precision and correct if necessary
print("topo_precision_dict = {")
for key, value in topo_precision_dict.items():
    print(f'"{key}": {value},')
print("}")

In [None]:
topo_precision_dict = {
"aspect_cosine.tif": 2,
"aspect_sine.tif": 2,
"circular_variance_aspect_03.tif": 3,
"circular_variance_aspect_07.tif": 3,
"circular_variance_aspect_11.tif": 3,
"deviation_mean_elevation_03.tif": 2,
"deviation_mean_elevation_07.tif": 2,
"deviation_mean_elevation_11.tif": 2,
"eastness.tif": 2,
"elevation.tif": 0,
"northness.tif": 2,
"profile_curvature.tif": 4,
"roughness_03.tif": 1,
"roughness_07.tif": 0,
"roughness_11.tif": 0,
"slope.tif": 1,
"stream_power_index_log10.tif": 1,
"surface_area_ratio.tif": 3,
"tangential_curvature.tif": 4,
"topographic_position_index_03.tif": 1,
"topographic_position_index_07.tif": 1,
"topographic_position_index_11.tif": 1,
"topographic_ruggedness_index.tif": 1,
"topographic_wetness_index.tif": 1,
}

In [None]:
# Set smoothing kernel
kernel = Gaussian2DKernel(x_stddev=1, y_stddev=1)

# Topography progress
topography_progress_index = 0
topography_progress_label = widgets.Label(f"Topography progress: {topography_progress_index}/{len(topo_precision_dict.items())}")
display(topography_progress_label)

# Iterate over selected topography rasters
for topography, precision in topo_precision_dict.items():
  topo_raster_temp_path = join(topography_temp_dir, topography)
  topo_raster_temp_array = gdal.Open(topo_raster_temp_path).ReadAsArray()
  # Convert nodata values to 0
  topo_raster_temp_array[topo_raster_temp_array == nodatavalue] = 0
  # Set path and check if exists
  if topography_temp_dir.endswith("corrected_temp"): topo_raster_unsmoothed_filename = f"topo_cor_unsmooth_{topography}"
  else: topo_raster_unsmoothed_filename = f"topo_uncor_unsmooth_{topography}"
  topo_raster_unsmoothed_path = join(topography_final_dir, topo_raster_unsmoothed_filename)
  if not exists(topo_raster_unsmoothed_path):
    # Round and export unsmoothed topography raster
    topo_raster_unsmoothed_rounded = np.round(topo_raster_temp_array, decimals=int(precision))
    export_array_as_tif(topo_raster_unsmoothed_rounded, topo_raster_unsmoothed_path)
  # Smooth using 2D spatial convolution
  if topography_temp_dir.endswith("corrected_temp"): topo_raster_smoothed_filename = f"topo_cor_smooth_{topography}"
  else: topo_raster_smoothed_filename = f"topo_uncor_smooth_{topography}"
  topo_raster_smoothed_path = join(topography_final_dir, topo_raster_smoothed_filename)
  if not exists(topo_raster_smoothed_path):
    topo_raster_smoothed = convolve(topo_raster_temp_array, kernel, boundary='extend')
    # Round and export smoothed topography raster
    topo_raster_smoothed_rounded = np.round(topo_raster_smoothed, decimals=int(precision))
    export_array_as_tif(topo_raster_smoothed_rounded, topo_raster_smoothed_path)
  # Update topography progress
  topography_progress_index += 1
  topography_progress_label.value = f"Topography progress: {topography_progress_index}/{len(topo_precision_dict.items())}"

# Finalise predictor rasters

In [None]:
# Round latitude and longitude rasters and finalise
precision = 3
coordinates = ['latitude.tif', 'longitude.tif']
for raster in coordinates:
  raster_path = join(areas_dir, raster)
  array = gdal.Open(raster_path).ReadAsArray()
  np.round(array, precision)
  export_array_as_tif(array, join(predictor_final_dir, raster))

In [None]:
# Compile and verify final predictor list
predictor_list = [join(coast_dir,'coast_proximity_km.tif')]
for predictor in os.listdir(continuous_final_dir):
    predictor_list.append(join(continuous_final_dir, predictor))
for predictor in os.listdir(edge_effects_dir):
  predictor_list.append(join(edge_effects_dir, predictor))
for predictor in os.listdir(topography_final_dir):
  predictor_list.append(join(topography_final_dir, predictor))
if exists(topography_corrected_final_dir):
  for predictor in os.listdir(topography_corrected_final_dir):
    predictor_list.append(join(topography_corrected_final_dir, predictor))
predictor_list = sorted(predictor_list)

print("predictor_list = [")
for predictor in predictor_list:
  print(f"'{predictor}',")
print(']')

In [None]:
predictor_list = [
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1990.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1991.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1992.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1993.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1994.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1995.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1996.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1997.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1998.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_1999.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2000.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2001.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2002.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2003.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2004.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2005.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2006.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2007.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2008.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2009.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2010.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2011.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2012.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2013.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2014.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2015.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2016.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2017.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2018.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2019.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2020.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2021.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2022.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/disturbance_with_edge_effects_2023.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1990.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1991.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1992.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1993.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1994.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1995.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1996.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1997.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1998.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_1999.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2000.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2001.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2002.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2003.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2004.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2005.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2006.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2007.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2008.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2009.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2010.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2011.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2012.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2013.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2014.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2015.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2016.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2017.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2018.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2019.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2020.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2021.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2022.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/forest_with_edge_effects_2023.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/pa_ais_with_edge_effects.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/pa_taman_krau_ais_with_edge_effects.tif',
'/gdrive/Shareddrives/masfi/3_predictors/binary_edge_effects/pa_taman_krau_with_edge_effects.tif',
'/gdrive/Shareddrives/masfi/3_predictors/coast/coast_proximity_km.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_aspect_cosine.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_aspect_sine.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_circular_variance_aspect_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_circular_variance_aspect_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_circular_variance_aspect_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_deviation_mean_elevation_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_deviation_mean_elevation_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_deviation_mean_elevation_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_eastness.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_elevation.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_northness.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_profile_curvature.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_roughness_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_roughness_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_roughness_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_slope.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_stream_power_index_log10.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_surface_area_ratio.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_tangential_curvature.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_topographic_position_index_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_topographic_position_index_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_topographic_position_index_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_topographic_ruggedness_index.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_smooth_topographic_wetness_index.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_aspect_cosine.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_aspect_sine.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_circular_variance_aspect_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_circular_variance_aspect_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_circular_variance_aspect_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_deviation_mean_elevation_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_deviation_mean_elevation_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_deviation_mean_elevation_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_eastness.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_elevation.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_northness.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_profile_curvature.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_roughness_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_roughness_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_roughness_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_slope.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_stream_power_index_log10.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_surface_area_ratio.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_tangential_curvature.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_topographic_position_index_03.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_topographic_position_index_07.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_topographic_position_index_11.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_topographic_ruggedness_index.tif',
'/gdrive/Shareddrives/masfi/3_predictors/topo_final/topo_uncor_unsmooth_topographic_wetness_index.tif',
]

In [None]:
for predictor in predictor_list:
  predictor_name = predictor.split('/')[-1]
  predictor_destination = join(predictor_final_dir, predictor_name)
  if not exists(predictor_destination):
    copyfile(predictor, predictor_destination)
print("All predictors finalised.")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()