<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/7_uncertainty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and directories

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas
!pip install rasterio
!pip install xgboost

In [None]:
# Imports
try: import cupy # Only works on GPU runtime
except: None
from contextlib import contextmanager
import gc
import geopandas as gpd
from google.colab import runtime
import ipywidgets as widgets
import json
import numpy as np
from numpy import random
from numpy.random import normal
from os import makedirs
from os.path import exists, join
from osgeo import gdal, ogr
gdal.UseExceptions()
import pickle
import pandas as pd
import rasterio
from rasterio.features import rasterize
import scipy.stats as st
import shutil
from shutil import copyfile
import xgboost as xgb

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
feature_dir = join(base_dir, "3_features")
feature_final_dir = join(feature_dir, "final")
models_dir = join(base_dir, "5_models")
scenarios_dir = join(base_dir, "6_scenarios")
masks_dir = join(scenarios_dir, "scenario_masks")
uncertainty_dir = join(base_dir, "7_uncertainty")

# Create directories
makedirs(uncertainty_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ["COMPRESS=DEFLATE", "PREDICTOR=3", "ZLEVEL=9"]
    else: options = []
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Global function: burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = vector = None
    try:
        raster = gdal.Open(raster_path, gdal.GA_Update)
        vector = ogr.Open(polygon_path)
        if not raster or not vector:
            raise ValueError("Cannot open input files")
        layer = vector.GetLayer()
        options = ["ALL_TOUCHED=TRUE"] if all_touched else []
        if fixed:
            gdal.RasterizeLayer(raster, [1], layer, burn_values=[fixed_value], options=options)
        else:
            attr_name = column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()
            options.append(f"ATTRIBUTE={attr_name}")
            gdal.RasterizeLayer(raster, [1], layer, options=options)
    finally:
        if raster: raster.FlushCache()
        raster = vector = None

# Select model

In [None]:
# Select a baseline model, tested and trained in advance.
# The target must have an uncertainty metric - otherwise
# skip to the next notebook '8_statistics' and use the outputs
# of the '6_scenarios' notebook.
model_exists = False
for subdir, dirs, files in os.walk(models_dir):
  for file in files:
    if file == 'model.json':
      print(f'selected_model = "{subdir.split(f"{models_dir}/",1)[1]}"')
      model_exists = True
if not model_exists:
  print("No model exists.")

In [None]:
selected_model = "agbd_tekai_250625_003858"

# Define model directories
selected_model_dir = join(models_dir,selected_model)
selected_model_json = join(selected_model_dir, "model.json")
selected_model_descr_dir = join(selected_model_dir, "model_description.json")
selected_model_dataset_path = join(selected_model_dir, f"{selected_model}.pkl")
selected_model_dataset = pd.read_pickle(selected_model_dataset_path)

# Read description for model dataset attributes
with open(join(selected_model_dir,"model_dataset_description.json")) as model_dataset_description_json:
  model_dataset_description = json.load(model_dataset_description_json)
model_dataset_name = model_dataset_description["model_dataset_name"]
number_of_columns = model_dataset_description["number_of_columns"]
number_of_rows = model_dataset_description["number_of_rows"]
id_column = model_dataset_description["id_column"]
selected_target = model_dataset_description["selected_target"]
uncertainty = model_dataset_description["uncertainty"]
covariates_renamed = model_dataset_description["covariates_renamed"]
covariates_categorised = model_dataset_description["covariates_categorised"]
selected_features = model_dataset_description["selected_features"] + model_dataset_description["covariates_renamed"]
categorical_columns = model_dataset_description["categorical_columns"]
descriptive_parameters = model_dataset_description["descriptive_parameters"]
filter_parameter = model_dataset_description["filter_parameter"]
filter_values_to_include = model_dataset_description["filter_values_to_include"]
sample_imported_dataset = model_dataset_description["sample_imported_dataset"]
sample_imported_dataset_by_percent = model_dataset_description["sample_imported_dataset_by_percent"]
sample_imported_dataset_value = model_dataset_description["sample_imported_dataset_value"]

# Reload hyperparameters
with open(selected_model_descr_dir) as model_description_json:
  model_description = json.load(model_description_json)
final_hyperparameters = eval(model_description["hyperparameters"])

# Remove early stopping and replace with mean n_estimators
if "early_stopping_rounds" in final_hyperparameters:
  final_hyperparameters = {k:v for k, v in final_hyperparameters.items() if k != "early_stopping_rounds"}
  final_hyperparameters["n_estimators"] = round(model_description["n_estimators mean"])

# Define directories
scenarios_model_dir = join(scenarios_dir,selected_model)
features_dir = join(scenarios_model_dir, "features")
tile_templates_dir = join(scenarios_model_dir, 'tile_templates')
tile_feature_stacks_dir = join(scenarios_model_dir, 'tile_feature_stacks')
uncertainty_selected_model_dir = join(uncertainty_dir, selected_model)
model_iterations_dir = join(uncertainty_selected_model_dir, "model_iterations")
tile_prediction_cache_dir = join(uncertainty_selected_model_dir, "tile_prediction_cache")
uncertainty_predictions_unmasked_dir = join(uncertainty_selected_model_dir, "uncertainty_predictions_unmasked")
predictions_dir = join(uncertainty_selected_model_dir, "uncertainty_predictions")

# Create directories
makedirs(uncertainty_selected_model_dir, exist_ok=True)
makedirs(model_iterations_dir, exist_ok=True)
makedirs(tile_prediction_cache_dir, exist_ok=True)
makedirs(uncertainty_predictions_unmasked_dir, exist_ok=True)
makedirs(predictions_dir, exist_ok=True)

# Model iterations

In [None]:
# Verify that the target is equal to the mean
print(f'mean = "{selected_target}"')

# Calculate se from columns flagged 'uncertainty'
if len(uncertainty)==0:
  print("There are no flagged uncertainty columns to calculate SE from.")
  print("Manually create the metric from the available columns.")
  for col in selected_model_dataset.columns:
    print(f"{col}")
else:
  for col in selected_model_dataset.columns:
    if col in uncertainty and col not in selected_target:
      print(f'se = "{col}"')

In [None]:
mean = "tar_agbd"
se = "tar_agbd_se"
# GEDI L4A provides standard errors (agbd_se) for each footprint prediction that incorporate
# both model residual error and prediction uncertainty (GEDI L4A ATBD, Eq. 9, Kellner et al. 2021)
# Following Liang et al. (2023), these standard errors are used as standard deviations for Monte Carlo sampling

# Set model iterations
model_iterations = 100

# Define model (y axis changes for each iteration based on mean and se arrays)
model_dataset_x = selected_model_dataset[selected_features].copy()
for col in categorical_columns:
    if col in model_dataset_x.columns:
        model_dataset_x[col] = model_dataset_x[col].astype('category')
mean_array = selected_model_dataset[mean].values
se_array = selected_model_dataset[se].values

# Detect model type from existing model or determine from target
existing_model_path = join(model_iterations_dir, "model_iteration_1.json")
if exists(existing_model_path):
    # Load existing model and detect type
    temp_booster = xgb.Booster()
    temp_booster.load_model(existing_model_path)
    model_config = json.loads(temp_booster.save_config())

    objective_name = model_config['learner']['objective']['name']
    num_class = int(model_config['learner']['learner_model_param'].get('num_class', '0'))
    classification = any(obj_type in objective_name for obj_type in ['logistic', 'softprob', 'softmax'])
    multiclass = classification and num_class > 2
else:
    # Determine from target variable characteristics
    unique_values = len(np.unique(mean_array))
    if unique_values <= 10 and all(val == int(val) for val in np.unique(mean_array)):
        classification = True
        multiclass = unique_values > 2
        num_class = unique_values if multiclass else 0
    else:
        classification = False
        multiclass = False
        num_class = 0

# Set model type
if classification:
    XGBPredictor = xgb.XGBClassifier(**final_hyperparameters)
    if multiclass: print(f"Model type: Multiclass classification ({num_class} classes)")
    else: print("Model type: Binary classification")
else:
    XGBPredictor = xgb.XGBRegressor(**final_hyperparameters)
    print("Model type: Regression")

model_params = XGBPredictor.get_params()
model_params['eval_metric'] = model_description['metric_used_for_training']
# Default fix for new XGBoost version
[model_params.pop(key, None) for key in ['n_estimators', 'enable_categorical', 'missing']]

# Progress label
model_progress_index = 0
model_progress_label = widgets.Label(f"Model iteration: {model_progress_index}/{model_iterations}")
display(model_progress_label)

for model_iteration in range(1,model_iterations+1):
  # Set model iteration filename and check if already exists
  model_iteration_filename = f"model_iteration_{model_iteration}.json"
  model_iteration_path = join(model_iterations_dir, model_iteration_filename)
  # If model iteration does not exist...
  if not exists(model_iteration_path):
    # Set the random seed based on iteration for replicability
    np.random.seed(model_iteration)
    # Set a normal distribution sample as the y for this iteration
    model_dataset_y = normal(mean_array, se_array)
    # Create DMatrix objects
    model_dtrain = xgb.DMatrix(model_dataset_x, model_dataset_y, enable_categorical=True)
    # Train the model iteration using the tested hyperparameters
    model = xgb.train(model_params,
                        model_dtrain,
                        num_boost_round=final_hyperparameters['n_estimators'],
                        verbose_eval=True)
    # Save the model iteration
    model.save_model(model_iteration_path)
  # Update progress
  model_progress_index += 1
  model_progress_label.value = f"Model iteration: {model_progress_index}/{model_iterations}"
print("All model iterations have been trained and saved.")

# Scenario iterations

In [None]:
# Scenarios must be designed and tested using 06_scenarios first.

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)
assert n_tiles > 0, "# There are no template tiles. Run the template tiles section, even if only one is created."
if n_tiles == 1: print(f"# There is 1 template tile.\n")
if n_tiles > 1: print(f"# There are {n_tiles} template tiles.\n")

# Collect available scenarios from the feature stack tiles directory
scenario_stacks_list = []
for scenario in os.listdir(tile_feature_stacks_dir):
    if len(os.listdir(join(tile_feature_stacks_dir, scenario))) == n_tiles:
        scenario_stacks_list.append(scenario)

# Select scenarios to predict
print("scenarios_to_predict = [")
for scenario in sorted(scenario_stacks_list):
  print(f'  "{scenario}",')
print("]")

In [None]:
# There is 1 template tile.

scenarios_to_predict = [
  "2018",
  "2019",
  "2020",
  "2021",
  "2021_no_degradation_since_1993",
  "2021_no_disturbance_since_1993",
  "2021_oldgrowth_1",
  "2021_oldgrowth_2",
  "2021_oldgrowth_all_land_1",
  "2021_oldgrowth_all_land_2",
  "2022",
  "2023",
  "2024",
  "2024_no_degradation_since_1996",
  "2024_no_disturbance_since_1996",
  "2024_no_disturbance_since_1997",
  "2024_no_disturbance_since_1998",
  "2024_no_disturbance_since_1999",
  "2024_no_disturbance_since_2000",
  "2024_no_disturbance_since_2001",
  "2024_no_disturbance_since_2002",
  "2024_no_disturbance_since_2003",
  "2024_no_disturbance_since_2004",
  "2024_no_disturbance_since_2005",
  "2024_no_disturbance_since_2006",
  "2024_no_disturbance_since_2007",
  "2024_no_disturbance_since_2008",
  "2024_no_disturbance_since_2009",
  "2024_no_disturbance_since_2010",
  "2024_no_disturbance_since_2011",
  "2024_no_disturbance_since_2012",
  "2024_no_disturbance_since_2013",
  "2024_no_disturbance_since_2014",
  "2024_no_disturbance_since_2015",
  "2024_no_disturbance_since_2016",
  "2024_no_disturbance_since_2017",
  "2024_no_disturbance_since_2018",
  "2024_no_disturbance_since_2019",
  "2024_no_disturbance_since_2020",
  "2024_no_disturbance_since_2021",
  "2024_no_disturbance_since_2022",
  "2024_no_disturbance_since_2023",
  "2024_no_disturbance_since_2024",
  "2024_oldgrowth_1",
  "2024_oldgrowth_2",
  "2024_oldgrowth_all_land_1",
  "2024_oldgrowth_all_land_2",
  "2024_road_mat_daling_deforestation_2023_30m_degradation_buffer",
]

# Check the number of model iterations available
model_iterations_available = len(os.listdir(model_iterations_dir))
print(f"\nThere are {len(os.listdir(model_iterations_dir))} model iterations available.")

In [None]:
# Set the number of scenario iterations. It must be <= the number of model iterations available.
scenario_iterations = 100

assert scenario_iterations <= model_iterations_available, f"Reduce the number of scenario iterations to <= {model_iterations_available}."

# GEDI covariates need to be changed to a set value for all predictions.
add_covariates = True # Adds a selected covariate value as the feature
sensitivity_value = 0.99
# Higher sensitivity indicative of GEDI footprint 'quality'
# However it may also overestimate vegetation metrics like AGBD.
# If predictions appear to have a positive bias, lower this and run again.
beam_value = 5
# 5 is the first of the full beams, which appears to have the least bias on vegetation metrics.
# Cover beams 1 - 4 underestimate. Full beams 7 - 8 overestimate. 5 - 6 tend give average values.

# Probabilities instead of classes IF binary classification
predict_probabilities = False

# Classification threshold IF binary classification
classification_threshold = 0.5

# Detect GPU availability and set predictor type
try:
    test_array = cupy.array([1, 2, 3])
    del test_array
    predictor_type = 'gpu_predictor'
    gpu_id, use_gpu = 0, True
    print("GPU detected and accessible - using GPU to load the feature stack and to predict.")
    @contextmanager
    def gpu_memory_context():
        try: yield
        finally:
            cupy.cuda.Device().synchronize()
            gc.collect()
            cupy.get_default_memory_pool().free_all_blocks()
            cupy.get_default_pinned_memory_pool().free_all_blocks()
except:
    predictor_type = 'cpu_predictor'
    gpu_id, use_gpu = -1, False
    print("GPU not accessible - using CPU prediction")
xgb.set_config(verbosity=0, use_rmm=use_gpu)

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

if n_tiles < 1: print("There are currently no template tiles.")
if n_tiles >= 1:
  template_tile = gdal.Open(join(tile_templates_dir,'template_tile_1.tif'))
  template_tile_x = template_tile.GetRasterBand(1).XSize
  print(f"There are {n_tiles} template tiles.")

# Tile progress
if n_tiles > 1:
  tile_progress_index, tile_progress_label = 0, widgets.Label(value=f"Tile progress: 0 / {n_tiles}")
  display(tile_progress_label)

# Loop through each scenario
for scenario in scenarios_to_predict:
  scenario_feature_stack_dir = join(tile_feature_stacks_dir, scenario)
  # Create scenario iterations directory
  scenario_iterations_dir = join(uncertainty_selected_model_dir,f"{scenario}_iterations")
  makedirs(scenario_iterations_dir, exist_ok=True)
  # Iteration progress
  iteration_progress_index = 0
  iteration_progress_label = widgets.Label(f"{scenario} iteration progress: {iteration_progress_index} / {scenario_iterations}")
  display(iteration_progress_label)

  # Check if all scenario iterations already exist, if not then load feature stack
  scenario_iteration_list = []
  for model_iteration in range(1,scenario_iterations+1):
    prediction_iteration_filename = f"{scenario}__{selected_model}_unmasked_iteration_{model_iteration}.tif"
    prediction_iteration_path = join(scenario_iterations_dir, prediction_iteration_filename)
    scenario_iteration_list.append(prediction_iteration_path)
  all_scenario_iterations_exist = True
  for scenario_iteration in scenario_iteration_list:
    if not exists(scenario_iteration): all_scenario_iterations_exist = False
  if not all_scenario_iterations_exist:
    if n_tiles == 1:
    # Load template parameters
      template_tile_dir = join(tile_templates_dir, f"template_tile_1.tif")
      template_tile = gdal.Open(template_tile_dir)
      template_tile_y = template_tile.GetRasterBand(1).YSize
      template_tile_x = template_tile.GetRasterBand(1).XSize
      # Load feature tile stack with GPU/CPU fallback - use memory mapping to avoid loading entire file
      stack_filename = f"feature_stack_{scenario}_1.npy"
      stack_path = join(scenario_feature_stack_dir, stack_filename)
      # Load with memory mapping first to check dimensions without full load
      temp_stack = np.load(stack_path, mmap_mode='r')
      n_rows, n_cols = temp_stack.shape
      final_n_cols = n_cols + 2 if add_covariates else n_cols
      feature_stack_on_gpu = False # Track if feature stack loaded to GPU
      if use_gpu:
        try:
          # Pre-allocate final array size to avoid hstack copy
          feature_stack = cupy.empty((n_rows, final_n_cols), dtype=temp_stack.dtype)
          feature_stack[:, :n_cols] = cupy.asarray(temp_stack)
          if add_covariates:
            feature_stack[:, -2] = beam_value
            feature_stack[:, -1] = sensitivity_value
          feature_stack_on_gpu = True  # Only set to True after successful completion
        except Exception as e:
          if "Memory allocation error" in str(e) or "Out of memory" in str(e):
            print("GPU memory insufficient for feature stack, loading with CPU.")
            feature_stack = np.empty((n_rows, final_n_cols), dtype=temp_stack.dtype)
            feature_stack[:, :n_cols] = temp_stack
            if add_covariates:
              feature_stack[:, -2] = beam_value
              feature_stack[:, -1] = sensitivity_value
          else: raise
      else:
        feature_stack = np.empty((n_rows, final_n_cols), dtype=temp_stack.dtype)
        feature_stack[:, :n_cols] = temp_stack
        if add_covariates:
          feature_stack[:, -2] = beam_value
          feature_stack[:, -1] = sensitivity_value
      temp_stack = None # Flush memory mapped array
    # Predict scenario for each model iteration
    for model_iteration in range(1,scenario_iterations+1):
      # Clear GPU memory if feature stack not on GPU
      if use_gpu and not feature_stack_on_gpu:
        with gpu_memory_context(): pass
      # Define the model
      model_dir = join(model_iterations_dir,f"model_iteration_{model_iteration}.json")
      prediction_iteration_filename = f"{scenario}__{selected_model}_unmasked_iteration_{model_iteration}.tif"
      prediction_iteration_path = join(scenario_iterations_dir, prediction_iteration_filename)
      # If scenario iteration does not exist:
      if not exists(prediction_iteration_path):
        # Load model and detect type
        booster = xgb.Booster()
        booster.load_model(selected_model_json)
        model_config = json.loads(booster.save_config())

        objective_name = model_config['learner']['objective']['name']
        num_class = int(model_config['learner']['learner_model_param'].get('num_class', '0'))
        classification = any(obj_type in objective_name for obj_type in ['logistic', 'softprob', 'softmax'])
        multiclass = classification and num_class > 2
        if classification and multiclass: print(f"Model type: Multiclass classification ({num_class} classes)")
        elif classification: print("Model type: Binary classification")
        else: print("Model type: Regression")

        # Select appropriate predictor type
        if classification:
            XGBPredictor = xgb.XGBClassifier()
            XGBPredictor.load_model(selected_model_json)
            XGBPredictor.set_params(predictor=predictor_type)
            if use_gpu: XGBPredictor.set_params(device='cuda:0')
        else:
            XGBPredictor = xgb.XGBRegressor()
            XGBPredictor.load_model(selected_model_json)
            XGBPredictor.set_params(predictor=predictor_type)
            if use_gpu: XGBPredictor.set_params(device='cuda:0')

        # Get number of stacks
        n_stacks = len(os.listdir(scenario_feature_stack_dir))
        if n_stacks == 1:
          # Predict - terminate runtime if GPU prediction fails
          # Predict - terminate runtime if GPU prediction fails
          try:
              if classification and predict_probabilities and not multiclass:
                  # Get probability of class 1 for binary classification
                  prediction_proba = XGBPredictor.predict_proba(feature_stack)
                  prediction = prediction_proba[:, 1] # Probability of class 1
              else:
                  if classification and not multiclass:
                      # Use predict_proba for better accuracy in binary classification
                      prediction_proba = XGBPredictor.predict_proba(feature_stack)
                      prediction = (prediction_proba[:, 1] > classification_threshold).astype(int)
                  else:
                      prediction = XGBPredictor.predict(feature_stack)
                      if classification:
                          # Check if prediction is 2D (probabilities) and convert to class labels
                          if prediction.ndim > 1 and prediction.shape[1] > 1: prediction = np.argmax(prediction, axis=1)
                          # Ensure prediction is integer type for classification
                          prediction = prediction.astype(int)
          except Exception as e:
            if "Memory allocation error" in str(e) or "Out of memory" in str(e):
              print("GPU memory insufficient for prediction. Terminating runtime to save compute units, restart with TPU.")
              runtime.unassign()
            else: raise
          prediction_array = prediction.reshape((template_tile_y, template_tile_x))
          prediction = None # Flush prediction

        # Tiling for if feature stacks and separated into chunks
        if n_stacks > 1:
          # Create a tile cache directory for the prediction
          tile_cache_iteration_dir = join(tile_prediction_cache_dir, prediction_iteration_filename[:-4])
          makedirs(tile_cache_iteration_dir, exist_ok=True)
          # Create a tile count to match the feature stack chunk
          for stack in range(1, n_stacks+1):
            iteration_tile_filename = f"scenario_tile_{stack}.tif"
            # Check if tile already exists
            scenario_tile_exists = False
            for scenario_tile in os.listdir(tile_cache_iteration_dir):
              if scenario_tile == iteration_tile_filename: scenario_tile_exists=True
            # If scenario prediction tile does not exist:
            if scenario_tile_exists == False:
              # Clear GPU memory before new stack
              if use_gpu:
                with gpu_memory_context(): pass
              # Load template tile parameters
              template_tile_dir = join(tile_templates_dir, f"template_tile_{stack}.tif")
              template_tile = gdal.Open(template_tile_dir)
              template_tile_y = template_tile.GetRasterBand(1).YSize
              template_tile_x = template_tile.GetRasterBand(1).XSize
              # Load feature tile stack with GPU/CPU fallback - use memory mapping to avoid loading entire file
              stack_filename = f"feature_stack_{scenario}_{stack}.npy"
              stack_path = join(scenario_feature_stack_dir, stack_filename)
              # Check if the .npy file actually exists
              if not os.path.exists(stack_path):
                  print(f"Warning: {stack_filename} not found in {scenario_feature_stack_dir}. Skipping tile {stack}.")
                  continue
              # Load with memory mapping first to check dimensions without full load
              temp_stack = np.load(stack_path, mmap_mode='r')
              n_rows, n_cols = temp_stack.shape
              final_n_cols = n_cols + 2 if add_covariates else n_cols
              if use_gpu:
                try:
                  # Pre-allocate final array size to avoid hstack copy
                  feature_stack = cupy.empty((n_rows, final_n_cols), dtype=temp_stack.dtype)
                  feature_stack[:, :n_cols] = cupy.asarray(temp_stack)
                  if add_covariates:
                    feature_stack[:, -2] = beam_value
                    feature_stack[:, -1] = sensitivity_value
                except Exception as e:
                  if "Memory allocation error" in str(e) or "Out of memory" in str(e):
                    print("GPU memory insufficient for feature stack, loading stack with CPU.")
                    feature_stack = np.empty((n_rows, final_n_cols), dtype=temp_stack.dtype)
                    feature_stack[:, :n_cols] = temp_stack
                    if add_covariates:
                      feature_stack[:, -2] = beam_value
                      feature_stack[:, -1] = sensitivity_value
                    with gpu_memory_context(): pass
                  else: raise
              else:
                feature_stack = np.empty((n_rows, final_n_cols), dtype=temp_stack.dtype)
                feature_stack[:, :n_cols] = temp_stack
                # Add covariates (sensitivity and BEAM)
                if add_covariates:
                  feature_stack[:, -2] = beam_value
                  feature_stack[:, -1] = sensitivity_value
              temp_stack = None # Flush memory mapped array
              # Predict - terminate runtime if GPU prediction fails
              try:
                  if classification and predict_probabilities and not multiclass:
                      # Get probability of class 1 for binary classification
                      prediction_proba = XGBPredictor.predict_proba(feature_stack)
                      prediction = prediction_proba[:, 1] # Probability of class 1
                  else:
                      if classification and not multiclass:
                          # Use predict_proba for better accuracy in binary classification
                          prediction_proba = XGBPredictor.predict_proba(feature_stack)
                          prediction = (prediction_proba[:, 1] > classification_threshold).astype(int)
                      else:
                          prediction = XGBPredictor.predict(feature_stack)
                          if classification:
                              # Check if prediction is 2D (probabilities) and convert to class labels
                              if prediction.ndim > 1 and prediction.shape[1] > 1: prediction = np.argmax(prediction, axis=1)
                              # Ensure prediction is integer type for classification
                              prediction = prediction.astype(int)
              except Exception as e:
                if "Memory allocation error" in str(e) or "Out of memory" in str(e):
                  print("GPU memory insufficient for prediction. Terminating runtime to save compute units, restart with TPU.")
                  runtime.unassign()
                else: raise
              feature_stack = None # Flush feature stack
              prediction_tile = prediction.reshape((template_tile_y, template_tile_x))
              prediction = None # Flush prediction
              # Export prediction array as .tif
              export_array_as_tif(prediction_tile, join(tile_cache_iteration_dir, iteration_tile_filename), template = template_tile_dir, compress = False)
              prediction_tile = None # Flush prediction tile
              # Update progress
            tile_progress_index += 1
            tile_progress_label.value = f"Tile progress: {tile_progress_index} / {n_stacks}"
          # Prepare empty array for merging tiles
          prediction_array = np.empty((0,template_tile_x))
          # Read each tile .tif as an array, stack, then export as a .tif
          for subdir in os.listdir(tile_cache_iteration_dir):
            if subdir.endswith('.tif'):
              tile_dir = join(tile_cache_iteration_dir, subdir)
              prediction_array = np.vstack((prediction_array, gdal.Open(tile_dir).ReadAsArray()))
          # Delete scenario tile cache directory and reset index
          shutil.rmtree(tile_cache_iteration_dir)
          tile_progress_index = 0
          tile_progress_label.value = f"Tile progress: 0 / {n_tiles}"

        # Define scenario template
        scenario_template = join(features_dir, os.listdir(features_dir)[0])
        export_array_as_tif(prediction_array, prediction_iteration_path, template = scenario_template, compress = True)

      iteration_progress_index += 1
      iteration_progress_label.value = f"{scenario} iteration progress: {iteration_progress_index} / {scenario_iterations}"
  else:
    iteration_progress_label.value = f"{scenario} iteration progress: 100 / {scenario_iterations}"
print("\nScenario iterations complete.")

# Scenario statistics


In [None]:
# Collect scenarios with iterations
scenarios_iterations_list = []
for subdir in os.listdir(uncertainty_selected_model_dir):
  if subdir.endswith("_iterations"):
    if "model_iterations" not in subdir:
      scenarios_iterations_list.append(subdir[:-11])
# Select scenarios to calculate mean and standard deviation
print("scenarios_to_calculate = [")
for scenario in sorted(scenarios_iterations_list):
  print(f'  "{scenario}",')
print("]")

In [None]:
scenarios_to_calculate = [
  "2018",
  "2019",
  "2020",
  "2021",
  "2021_no_degradation_since_1993",
  "2021_no_disturbance_since_1993",
  "2021_oldgrowth_1",
  "2021_oldgrowth_2",
  "2021_oldgrowth_all_land_1",
  "2021_oldgrowth_all_land_2",
  "2022",
  "2023",
  "2024",
  "2024_no_degradation_since_1996",
  "2024_no_disturbance_since_1996",
  "2024_no_disturbance_since_1997",
  "2024_no_disturbance_since_1998",
  "2024_no_disturbance_since_1999",
  "2024_no_disturbance_since_2000",
  "2024_no_disturbance_since_2001",
  "2024_no_disturbance_since_2002",
  "2024_no_disturbance_since_2003",
  "2024_no_disturbance_since_2004",
  "2024_no_disturbance_since_2005",
  "2024_no_disturbance_since_2006",
  "2024_no_disturbance_since_2007",
  "2024_no_disturbance_since_2008",
  "2024_no_disturbance_since_2009",
  "2024_no_disturbance_since_2010",
  "2024_no_disturbance_since_2011",
  "2024_no_disturbance_since_2012",
  "2024_no_disturbance_since_2013",
  "2024_no_disturbance_since_2014",
  "2024_no_disturbance_since_2015",
  "2024_no_disturbance_since_2016",
  "2024_no_disturbance_since_2017",
  "2024_no_disturbance_since_2018",
  "2024_no_disturbance_since_2019",
  "2024_no_disturbance_since_2020",
  "2024_no_disturbance_since_2021",
  "2024_no_disturbance_since_2022",
  "2024_no_disturbance_since_2023",
  "2024_no_disturbance_since_2024",
  "2024_oldgrowth_1",
  "2024_oldgrowth_2",
  "2024_oldgrowth_all_land_1",
  "2024_oldgrowth_all_land_2",
  "2024_road_mat_daling_deforestation_2023_30m_degradation_buffer",
]

In [None]:
# Check iteration number

# Exact '0' pixels without decimals can be an indicator that an iteration was incorrectly generated.
# These must be deleted and predicted again to avoid incorrect statistics.
check_problems = False

# Exact '0' pixels might have been genuinely predicted (though unlikely in a regression),
# In which case set fix_problems to True and add these iterations to problem_arrays list.
# The code below will add 0.001 so they won't trigger the problem checker again.
fix_problems = False
problem_rasters = [

]

if len(problem_rasters) > 0:
  for problem_raster in problem_rasters:
    problem_raster_path = join(uncertainty_selected_model_dir, f"{scenario}_iterations", problem_raster)
    problem_raster_array = gdal.Open(problem_raster_path).ReadAsArray()
    problem_raster_array[problem_raster_array == 0] = 0.001
    export_array_as_tif(problem_raster_array, problem_raster_path, template = problem_raster_path, compress=True)

# Check the number of prediction iterations
for scenario in scenarios_to_calculate:
  scenario_iterations_dir = join(uncertainty_selected_model_dir,f"{scenario}_iterations")
  iterations = 0
  for subdir in os.listdir(scenario_iterations_dir):
    if subdir.endswith(".tif"):
      # Check whether the prediction iteration is valid
      if check_problems:
        iteration = join(scenario_iterations_dir,subdir)
        iteration_array = gdal.Open(iteration).ReadAsArray()
        assert np.count_nonzero(iteration_array==0) == 0, f"{subdir} contains 0 values, so the iteration may not have predicted correctly.\n Check the file, delete and repredict if necessary.\n If they are valid 0 values, run the cell below on:\n {iteration}."
      iterations += 1
  print(f"There are {iterations} prediction iterations for scenario {scenario} statistics.")

In [None]:
confidence_interval = 0.95

# Statistics progress
stats_progress_index = 0
stats_progress_label = widgets.Label(f"Scenario stats progress: {stats_progress_index}/{len(scenarios_to_calculate)}")
display(stats_progress_label)

for scenario in scenarios_to_calculate:
    stat_base_filename = f"{scenario}__{selected_model}"
    scenario_iterations_dir = join(uncertainty_selected_model_dir,f"{scenario}_iterations")

    # Define statistics raster directories
    stat_mean_filename = f"mean__{stat_base_filename}_unmasked.tif"
    stat_mean_dir = join(uncertainty_predictions_unmasked_dir,stat_mean_filename)
    stat_uncertainty_filename = f"uncertainty__{stat_base_filename}_unmasked.tif"
    stat_uncertainty_dir = join(uncertainty_predictions_unmasked_dir,stat_uncertainty_filename)

    # Check whether statistics rasters already exist
    stat_mean_tif_exists, stat_uncertainty_tif_exists = False, False
    for stat_tif in os.listdir(uncertainty_predictions_unmasked_dir):
        if stat_tif == stat_mean_filename: stat_mean_tif_exists = True
        if stat_tif == stat_uncertainty_filename: stat_uncertainty_tif_exists = True

    # If either mean or uncertainty do not exist
    if stat_mean_tif_exists == False or stat_uncertainty_tif_exists == False:
        stat_sum = None
        stat_sum_sq = None
        iteration_n = 0

        for subdir in os.listdir(scenario_iterations_dir):
            if subdir.endswith(".tif"):
                iteration = os.path.join(scenario_iterations_dir, subdir)
                # Create float64 array for precise calculations
                iteration_array = gdal.Open(iteration).ReadAsArray().astype(np.float64)
                if stat_sum is None:
                    stat_sum = np.zeros_like(iteration_array, dtype=np.float64)
                    stat_sum_sq = np.zeros_like(iteration_array, dtype=np.float64)
                # Sum and sum of squares
                stat_sum += iteration_array
                stat_sum_sq += iteration_array ** 2
                iteration_n += 1

        # Calculate mean: sum / count
        stat_mean = stat_sum / iteration_n
        if stat_mean_tif_exists == False:
            export_array_as_tif(stat_mean, stat_mean_dir, template = iteration)
            print(f"{stat_mean_filename} has been exported.")
        else: print(f"{stat_mean_filename} already exists.")

        if stat_uncertainty_tif_exists == False:
            # Calculate variance: E[X^2] - (E[X])^2
            stat_variance = np.maximum(0, (stat_sum_sq - stat_sum ** 2 / iteration_n) / (iteration_n - 1))
            # Standard error: σ / sqrt(n)
            stat_se = np.sqrt(stat_variance) / np.sqrt(iteration_n)
            # Handle zero SE to avoid scipy warnings
            min_se_threshold = 1e-15
            stat_se_clean = np.maximum(stat_se, min_se_threshold)
            # Calculate confidence intervals using t-distribution
            stat_ci_lower, stat_ci_upper = st.t.interval(confidence_interval, iteration_n - 1, loc=stat_mean, scale=stat_se_clean)
            # Reset CI bounds to mean for pixels that originally had zero SE
            zero_se_mask = (stat_se == 0.0)
            stat_ci_lower[zero_se_mask] = stat_mean[zero_se_mask]
            stat_ci_upper[zero_se_mask] = stat_mean[zero_se_mask]
            # CI width: (upper - lower) / 2
            stat_ci = (stat_ci_upper - stat_ci_lower) / 2
            # Uncertainty: (CI / mean) * 100%
            stat_uncertainty = np.zeros_like(stat_mean)
            valid_mask = np.abs(stat_mean) > 1e-15
            stat_uncertainty[valid_mask] = (stat_ci[valid_mask] / stat_mean[valid_mask]) * 100

            # Export statistics arrays as rasters
            export_array_as_tif(stat_se, join(uncertainty_predictions_unmasked_dir,f"se__{stat_base_filename}_unmasked.tif"), template = iteration)
            print(f"se__{stat_base_filename}_unmasked.tif has been exported.")
            export_array_as_tif(stat_ci_lower, join(uncertainty_predictions_unmasked_dir,f"ci_lower__{stat_base_filename}_unmasked.tif"), template = iteration)
            print(f"ci_lower__{stat_base_filename}_unmasked.tif has been exported.")
            export_array_as_tif(stat_ci_upper, join(uncertainty_predictions_unmasked_dir,f"ci_upper__{stat_base_filename}_unmasked.tif"), template = iteration)
            print(f"ci_upper__{stat_base_filename}_unmasked.tif has been exported.")
            export_array_as_tif(stat_ci, join(uncertainty_predictions_unmasked_dir,f"ci__{stat_base_filename}_unmasked.tif"), template = iteration)
            print(f"ci__{stat_base_filename}_unmasked.tif has been exported.")
            export_array_as_tif(stat_uncertainty, stat_uncertainty_dir, template = iteration)
            print(f"{stat_uncertainty_filename} has been exported.")
        else: print(f"{stat_uncertainty_filename} already exists.")

    else: print(f"{stat_mean_filename} and {stat_uncertainty_filename} already exist.")
    stats_progress_index += 1
    stats_progress_label.value = (f"Scenario stats progress: {stats_progress_index}/{len(scenarios_to_calculate)}")

print("Statistics calculations and .tif exports complete.")

# Mask scenario statistics

In [None]:
# Use polygons for masking, only areas inside the polygons will be included
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']
print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"  '{polygon[:-5]}',")
print("]")

In [None]:
mask_polygons = [
  # 'project_area',
  'gedi_area',
  # 'peninsular_malaysia',
  # 'pa_taman_krau',
  # 'pa_ais',
]

# GEDI L4A AGBD precision is 0
mean_precision = 0
# Non-negligible percentage uncertainty sometimes <1
uncertainty_precision = 1

# Create an inverse project area path for masking
template_polygon_path = join(polygons_dir, "template.gpkg")
for polygon in mask_polygons:
  inverse_polygon_path = join(polygons_dir, f"{polygon}_inverse.gpkg")
  if not exists(inverse_polygon_path):
    polygon_path = join(polygons_dir, f"{polygon}.gpkg")
    template_polygon = gpd.read_file(template_polygon_path)
    polygon_read = gpd.read_file(polygon_path)
    polygon_crs = polygon_read.crs.to_epsg()
    inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
    inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
    inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
    print(f"An inverse masking polygon for {polygon} has been created in {polygons_dir}.")
  else: print(f"An inverse masking polygon for {polygon} already exists.")


# If only [scenario]_oldgrowth_1 exists, simply all disturbance from all disturbance features
# This will be masked to [scenario]_oldgrowth.

# If both oldgrowth_1 and oldgrowth_2 exist,
# oldgrowth_1 uses an area-based proxy for pre-Landsat undisturbed forest
# oldgrowth_2 simply removes all disturbance from all disturbance features
# The final masked [scenario]_oldgrowth chooses the maximum pixel values from comparing each.

# Identify all oldgrowth 1 files (both mean and uncertainty)
oldgrowth_oldgrowth_1_files = [f for f in os.listdir(uncertainty_predictions_unmasked_dir)
                     if ('_1__' in f) and
                        (f.startswith('mean__') or f.startswith('uncertainty__')) and
                        f.endswith('_unmasked.tif')]

# First find all mean oldgrowth_1 files
mean_oldgrowth_1_files = [f for f in os.listdir(uncertainty_predictions_unmasked_dir)
                if f.startswith('mean__') and '_1__' in f and f.endswith('_unmasked.tif')]

for mean_oldgrowth_1_file in mean_oldgrowth_1_files:
    # Get corresponding uncertainty oldgrowth_1 file
    uncertainty_oldgrowth_1_file = mean_oldgrowth_1_file.replace('mean__', 'uncertainty__')
    # Get oldgrowth_2 files
    mean_oldgrowth_2_file = mean_oldgrowth_1_file.replace('_1__', '_2__')
    uncertainty_oldgrowth_2_file = uncertainty_oldgrowth_1_file.replace('_1__', '_2__')
    # Create merged filenames
    mean_merged_file = mean_oldgrowth_1_file.replace('_1__', '__')
    uncertainty_merged_file = uncertainty_oldgrowth_1_file.replace('_1__', '__')
    # Paths
    mean_merged_path = join(uncertainty_predictions_unmasked_dir, mean_merged_file)
    uncertainty_merged_path = join(uncertainty_predictions_unmasked_dir, uncertainty_merged_file)
    # Skip if merged files already exist
    if exists(mean_merged_path) and exists(uncertainty_merged_path):
        print(f"Merged files already exist for {mean_oldgrowth_1_file}")
        continue
    print(f"Processing {mean_oldgrowth_1_file}")

    # Check if oldgrowth_2 exists
    if exists(join(uncertainty_predictions_unmasked_dir, mean_oldgrowth_2_file)):
        # Process with oldgrowth_2
        mean_oldgrowth_1_array = gdal.Open(join(uncertainty_predictions_unmasked_dir, mean_oldgrowth_1_file)).ReadAsArray()
        mean_oldgrowth_2_array = gdal.Open(join(uncertainty_predictions_unmasked_dir, mean_oldgrowth_2_file)).ReadAsArray()
        # Which one is greater?
        oldgrowth_1_is_greater = mean_oldgrowth_1_array > mean_oldgrowth_2_array
        # Take maximum value
        merged_mean = np.maximum(mean_oldgrowth_1_array, mean_oldgrowth_2_array)
        # Save merged mean
        export_array_as_tif(merged_mean, mean_merged_path, compress=True)
        print(f"Saved merged mean: {mean_merged_file}")
        # Process uncertainty if oldgrowth_1 exists
        if exists(join(uncertainty_predictions_unmasked_dir, uncertainty_oldgrowth_1_file)):
            uncertainty_oldgrowth_1_array = gdal.Open(join(uncertainty_predictions_unmasked_dir, uncertainty_oldgrowth_1_file)).ReadAsArray()
            # If oldgrowth_2 uncertainty exists, use it where appropriate
            if exists(join(uncertainty_predictions_unmasked_dir, uncertainty_oldgrowth_2_file)):
                uncertainty_oldgrowth_2_array = gdal.Open(join(uncertainty_predictions_unmasked_dir, uncertainty_oldgrowth_2_file)).ReadAsArray()
                merged_uncertainty = np.where(oldgrowth_1_is_greater, uncertainty_oldgrowth_1_array, uncertainty_oldgrowth_2_array)
            else: merged_uncertainty = uncertainty_oldgrowth_1_array
            # Save merged uncertainty
            export_array_as_tif(merged_uncertainty, uncertainty_merged_path, compress=True)
            print(f"Saved merged uncertainty: {uncertainty_merged_file}")
    else: # Just use '_1'
        if not exists(mean_merged_path):
            shutil.copy2(join(uncertainty_predictions_unmasked_dir, mean_oldgrowth_1_file), mean_merged_path)
            print(f"Copied oldgrowth_1 mean to {mean_merged_file}")
        if exists(join(uncertainty_predictions_unmasked_dir, uncertainty_oldgrowth_1_file)) and not exists(uncertainty_merged_path):
            shutil.copy2(join(uncertainty_predictions_unmasked_dir, uncertainty_oldgrowth_1_file), uncertainty_merged_path)
            print(f"Copied oldgrowth_1 uncertainty to {uncertainty_merged_file}")


unmasked_predictions = []
for scenario_prediction in os.listdir(uncertainty_predictions_unmasked_dir):
  # Only mask mean, uncertainty and ci for visualisation and calculating statistics
  if ('mean__' in scenario_prediction) or ('uncertainty__' in scenario_prediction):
      # Skip oldgrowth version files
      if scenario_prediction.split('__')[0].endswith(('_1__', '_2__')):
        continue
      unmasked_predictions.append(scenario_prediction)

# Determine last feature year for masking future scenarios
final_feature_years = []
for final_feature in os.listdir(feature_final_dir):
  if final_feature.endswith('.tif') and final_feature[-9] == '_':
    try: final_feature_years.append(int(final_feature[-8:-4]))
    except: continue
last_feature_year = max(final_feature_years)

# Masking progress
masking_progress_index = 0
masking_progress_label = widgets.Label(f"Masking progress: {masking_progress_index}/{len(unmasked_predictions)}")
display(masking_progress_label)

# Mask scenario statistics with the relevant mask
for scenario_prediction in unmasked_predictions: # Loop through each unmasked scenario
  scenario_masked_filename = f"{scenario_prediction[:-13]}.tif"
  scenario_masked_dir = join(predictions_dir, scenario_masked_filename)
  scenario_year = int(scenario_prediction.split('__')[1][:4])
  if not exists(scenario_masked_dir):
    mask_exists = False
    for mask in os.listdir(masks_dir):
      mask_year = int(mask[12:16])

      # Match 'no disturbance since' scenarios (use mask from last year of disturbance)
      if 'no_disturbance_since' in scenario_prediction:
        disturbance_since_year = int(scenario_prediction.split('__')[1][-4:])
        if disturbance_since_year == mask_year:
          selected_mask_filename = mask
          selected_mask_dir = join(masks_dir, selected_mask_filename)
          mask_exists = True
          break

      # Match 'all land' old-growth scenarios
      elif 'oldgrowth_all_land' in scenario_prediction:
        if 'oldgrowth_all_land' in mask and f'{scenario_year}_oldgrowth_all_land' in mask:
          selected_mask_filename = mask
          selected_mask_dir = join(masks_dir, selected_mask_filename)
          mask_exists = True
          break

      # Match area-based deforestation scenarios
      elif 'deforestation' in scenario_prediction:
        if 'deforestation' in mask:
          mask_middle = mask[12:-4]  # Remove "mask_forest_" and ".tif"
          if scenario_prediction.split('__')[1].startswith(mask_middle):
            selected_mask_filename = mask
            selected_mask_dir = join(masks_dir, selected_mask_filename)
            mask_exists = True
            break

      # Match future scenarios with most recent forest mask
      elif scenario_year > last_feature_year:
        if last_feature_year == mask_year and 'oldgrowth_all_land' not in mask:
          selected_mask_filename = mask
          selected_mask_dir = join(masks_dir, selected_mask_filename)
          mask_exists = True
          break

      # Match all other historic scenarios (exclude specialised masks)
      elif scenario_year == mask_year and 'oldgrowth_all_land' not in mask:
        selected_mask_filename = mask
        selected_mask_dir = join(masks_dir, selected_mask_filename)
        mask_exists = True
        break

    if mask_exists == False: print(f"A suitable mask for {scenario_prediction} does not exist.\n")
    else: # Mask the scenario prediction
      print(f"Masking {scenario_prediction} with {selected_mask_filename}...")
      mask_array = gdal.Open(selected_mask_dir).ReadAsArray()
      scenario_prediction_unmasked_dir = join(uncertainty_predictions_unmasked_dir, scenario_prediction)
      scenario_prediction_array = gdal.Open(scenario_prediction_unmasked_dir).ReadAsArray()
      if 'mean__' in scenario_prediction: scenario_prediction_array = np.round(scenario_prediction_array, mean_precision)
      if 'uncertainty__' in scenario_prediction: scenario_prediction_array = np.round(scenario_prediction_array, uncertainty_precision)
      # Mask where the mask array is not 1
      scenario_masked_array = np.where(mask_array != 1, nodatavalue, scenario_prediction_array)
      export_array_as_tif(scenario_masked_array, scenario_masked_dir, compress = True)
      if len(mask_polygons) > 0:
        for polygon_mask in mask_polygons:
          inverse_gedi_area_path = join(polygons_dir, f"{polygon_mask}_inverse.gpkg")
          print(f"Masking {scenario_prediction} with {polygon_mask}...")
          burn_polygon_to_raster(scenario_masked_dir, inverse_gedi_area_path, fixed_value=nodatavalue, all_touched=False)
        # Recompress the prediction after burning the polygon masks
        scenario_masked_array_2 = gdal.Open(scenario_masked_dir).ReadAsArray()
        export_array_as_tif(scenario_masked_array_2, scenario_masked_dir, compress = True)
      print(f"{scenario_masked_filename} exported.")
  # Update masking progress
  masking_progress_index += 1
  masking_progress_label.value = f"Masking progress: {masking_progress_index}/{len(unmasked_predictions)}"

# Disconnect runtime

In [None]:
# Useful for stopping background execution upon completion
runtime.unassign()