<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/dev/8_differences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas
!pip install rasterio
!apt-get install -y gdal-bin

In [None]:
# Reload imports, replacing those in the cache
%reload_ext autoreload
%autoreload 2
# Imports
import geopandas as gpd
from google.colab import runtime
from os import makedirs
from os.path import join, exists
from osgeo import gdal, ogr
gdal.UseExceptions()
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import rasterize
from shutil import copyfile

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
scenarios_dir = join(base_dir, "6_scenarios")
masks_dir = join(scenarios_dir, "scenario_masks")
uncertainty_dir = join(base_dir, "7_uncertainty")
differences_dir = join(base_dir, "8_differences")

# Create directories
makedirs(differences_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ["COMPRESS=DEFLATE", "PREDICTOR=3", "ZLEVEL=9"]
    else: options = []
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Global function: burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = vector = None
    try:
        raster = gdal.Open(raster_path, gdal.GA_Update)
        vector = ogr.Open(polygon_path)
        if not raster or not vector:
            raise ValueError("Cannot open input files")
        layer = vector.GetLayer()
        options = ["ALL_TOUCHED=TRUE"] if all_touched else []
        if fixed:
            gdal.RasterizeLayer(raster, [1], layer, burn_values=[fixed_value], options=options)
        else:
            attr_name = column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()
            options.append(f"ATTRIBUTE={attr_name}")
            gdal.RasterizeLayer(raster, [1], layer, options=options)
    finally:
        if raster: raster.FlushCache()
        raster = vector = None

# Select source and model

In [None]:
# Select if to source predictions from scenarios_dir or uncertainty_dir
# If available, uncertainty_dir should be selected so that uncertainty can
# be propagated and scenario 'mean' iteration values used.

# source_dir = uncertainty_dir
source_dir = scenarios_dir

print(f"{source_dir.split('/')[-1]} has been selected as the source directory for predictions")
print("to calculate disturbance and intactness.\n")

# If uncertainty selected, check it exists
if not exists(uncertainty_dir) and source_dir == uncertainty_dir:
  print("The uncertainty directory does not yet exist. Defaulting to scenarios directory.")
  source_dir = scenarios_dir

source_dir_name = f"{source_dir.split('_')[-1]}_dir"

# Select the model
for subdir in os.listdir(source_dir):
  if 'scenario_masks' not in subdir:
    print(f"selected_model = '{subdir}'")

In [None]:
selected_model = 'agbd_tekai_250625_003858'

selected_model_dir = join(source_dir, selected_model)
if source_dir == scenarios_dir: predictions_dir = join(selected_model_dir, 'scenario_predictions')
if source_dir == uncertainty_dir: predictions_dir = join(selected_model_dir, 'uncertainty_predictions')

# Check predictions exist to calculate differences
if len(os.listdir(predictions_dir)) < 2: print(f"At least 2 predictions must exist in {source_dir} to calculate differences.")
else:
  model_differences_dir = join(differences_dir, f"{selected_model}_{source_dir_name}")
  disturbance_dir = join(model_differences_dir, 'disturbance')
  intactness_dir = join(model_differences_dir, 'intactness')
  makedirs(model_differences_dir, exist_ok=True)
  makedirs(disturbance_dir, exist_ok=True)
  makedirs(intactness_dir, exist_ok=True)

# Disturbance

In [None]:
# Disturbance is measured as absolute AGBD loss
# This block builds dictionaries of disturbance options based on available files

# Extract all available scenarios from scenarios predictions directory
if source_dir == scenarios_dir:
  scenarios = set()
  for file in os.listdir(predictions_dir):
      scenarios.add(file.split("__")[0])

# OR Extract all available scenarios from uncertainty predictions directory
if source_dir == uncertainty_dir:
  prediction_stats = {}
  for file in os.listdir(predictions_dir):
      parts = file.split("__")
      if len(parts) >= 2:
          stat, scenario = parts[0], parts[1]
          if scenario not in prediction_stats:
              prediction_stats[scenario] = set()
          prediction_stats[scenario].add(stat)
  # Only keep scenarios that have both 'uncertainty' and 'mean' prediction stats
  scenarios = {prediction for prediction, stats in prediction_stats.items()
              if 'uncertainty' in stats and 'mean' in stats}

# Categorise years from scenarios
years = set()
plain_years = set()
oldgrowth_years = set()
oldgrowth_all_land_years = set()

for s in scenarios:
    if s.isdigit():
        years.add(int(s))
        plain_years.add(int(s))
    elif "_oldgrowth_all_land" in s:
        year = s.split("_oldgrowth_all_land")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_all_land_years.add(int(year))
    elif "_oldgrowth" in s:
        year = s.split("_oldgrowth")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_years.add(int(year))
    elif any(pattern in s for pattern in ["_no_disturbance_since_", "_no_degradation_since_"]):
        year = s.split("_")[0]
        if year.isdigit():
            years.add(int(year))
        if "_since_" in s:
            since_year = s.split("_since_")[1]
            if since_year.isdigit():
                years.add(int(since_year) - 1)

years_sorted = sorted(list(years))

# Output dictionaries
disturbance_since = {}
degradation_since_dictionary = {}
deforestation_since_dictionary = {}
before_dictionary = {}

print("disturbance_since_dictionary = {")
print("")

# 1. Process disturbance_since scenarios
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)

        if year_a in plain_years and f"{a_str}_no_disturbance_since_{b_plus1}" in scenarios:
            print(f"# Disturbance in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{a_str}_no_disturbance_since_{b_plus1}'):")
            print(f"    '{a_str}_disturbance_since_{b_plus1}',")
            print("")
            disturbance_since[(a_str, f"{a_str}_no_disturbance_since_{b_plus1}")] = f"{a_str}_disturbance_since_{b_plus1}"

# Process disturbance_since_oldgrowth scenarios
oldgrowth_printed = False
for year in years_sorted:
    y_str = str(year)
    if (year in plain_years and year in oldgrowth_all_land_years and f"{y_str}_oldgrowth_all_land" in scenarios):
        if not oldgrowth_printed:
            print(f"# Disturbance in {y_str} caused by events since an oldgrowth state.")
            oldgrowth_printed = True
        print(f"  ('{y_str}', '{y_str}_oldgrowth_all_land'):")
        print(f"    '{y_str}_disturbance_since_oldgrowth',")
        print("")
        disturbance_since[(y_str, f"{y_str}_oldgrowth_all_land")] = f"{y_str}_disturbance_since_oldgrowth"

print("}\n")

# 2. Degradation since dictionary
print("degradation_since_dictionary = {\n")

for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)

        if year_a in plain_years and f"{a_str}_no_degradation_since_{b_plus1}" in scenarios:
            print(f"# Degradation in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{a_str}_no_degradation_since_{b_plus1}'):")
            print(f"    '{a_str}_degradation_since_{b_plus1}',")
            print("")
            degradation_since_dictionary[(a_str, f"{a_str}_no_degradation_since_{b_plus1}")] = f"{a_str}_degradation_since_{b_plus1}"

# Process degradation_since_oldgrowth scenarios
oldgrowth_printed = False
for year in years_sorted:
    y_str = str(year)
    if (year in plain_years and year in oldgrowth_years and f"{y_str}_oldgrowth" in scenarios):
        if not oldgrowth_printed:
            print(f"# Degradation in {y_str} caused by events since an old-growth state.")
            oldgrowth_printed = True
        print(f"  ('{y_str}', '{y_str}_oldgrowth'):")
        print(f"    '{y_str}_degradation_since_oldgrowth',")
        print("")
        degradation_since_dictionary[(y_str, f"{y_str}_oldgrowth")] = f"{y_str}_degradation_since_oldgrowth"

print("}\n")

# 3. Deforestation since dictionary
print("deforestation_since_dictionary = {\n")

for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)

        deg_key = (a_str, f"{a_str}_no_degradation_since_{b_plus1}")
        dist_key = (a_str, f"{a_str}_no_disturbance_since_{b_plus1}")

        if deg_key in degradation_since_dictionary and dist_key in disturbance_since:
            deg_result = degradation_since_dictionary[deg_key]
            dist_result = disturbance_since[dist_key]
            defor_result = f"{a_str}_deforestation_since_{b_plus1}"

            print(f"# Deforestation in {a_str} caused by events since {b_plus1}")
            print(f"  ('{deg_result}', '{dist_result}'):")
            print(f"    '{defor_result}',")
            print("")
            deforestation_since_dictionary[(deg_result, dist_result)] = defor_result

# Process deforestation_since_oldgrowth scenarios
oldgrowth_printed = False
for year in years_sorted:
    y_str = str(year)
    deg_key = (y_str, f"{y_str}_oldgrowth")
    dist_key = (y_str, f"{y_str}_oldgrowth_all_land")

    if deg_key in degradation_since_dictionary and dist_key in disturbance_since:
        if not oldgrowth_printed:
            print(f"# Deforestation in {y_str} caused by events since an old-growth state.")
            oldgrowth_printed = True
        deg_result = degradation_since_dictionary[deg_key]
        dist_result = disturbance_since[dist_key]
        defor_result = f"{y_str}_deforestation_since_oldgrowth"
        print(f"  ('{deg_result}', '{dist_result}'):")
        print(f"    '{defor_result}',")
        print("")
        deforestation_since_dictionary[(deg_result, dist_result)] = defor_result

print("}\n")

# 4. Disturbance before the baseline years
print("before_dictionary = {\n")

for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)

        # Check all three types of before calculations
        checks = [
            (disturbance_since, f"{a_str}_no_disturbance_since_{b_plus1}", f"{a_str}_oldgrowth_all_land", "disturbance"),
            (degradation_since_dictionary, f"{a_str}_no_degradation_since_{b_plus1}", f"{a_str}_oldgrowth", "degradation")
        ]

        printed_header = False
        for dictionary, since_scenario, oldgrowth_scenario, dist_type in checks:
            since_key = (a_str, since_scenario)
            oldgrowth_key = (a_str, oldgrowth_scenario)

            if since_key in dictionary and oldgrowth_key in dictionary:
                if not printed_header:
                    print(f"# Disturbances in {a_str} caused by events before {b_plus1}.")
                    printed_header = True
                since_result = dictionary[since_key]
                oldgrowth_result = dictionary[oldgrowth_key]
                before_result = f"{a_str}_{dist_type}_before_{b_plus1}"
                print(f"  ('{oldgrowth_result}', '{since_result}'):")
                print(f"    '{before_result}',\n")
                before_dictionary[(oldgrowth_result, since_result)] = before_result

        # Check deforestation before
        defor_since_name = f"{a_str}_deforestation_since_{b_plus1}"
        defor_oldgrowth_name = f"{a_str}_deforestation_since_oldgrowth"

        if (defor_since_name in deforestation_since_dictionary.values() and
            defor_oldgrowth_name in deforestation_since_dictionary.values()):
            if not printed_header:
                print(f"# Disturbances in {a_str} caused by events before {b_plus1}.")
                printed_header = True
            before_result = f"{a_str}_deforestation_before_{b_plus1}"
            print(f"  ('{defor_oldgrowth_name}', '{defor_since_name}'):")
            print(f"    '{before_result}',\n")
            before_dictionary[(defor_oldgrowth_name, defor_since_name)] = before_result

        if printed_header:
            print("")

print("}\n")

# 5. Area-based dictionary (unchanged)
print("area_based_dictionary = {")

# Get polygon names from polygons directory
polygon_names = set()
if os.path.exists(polygons_dir):
    for file in os.listdir(polygons_dir):
        if file.endswith('.gpkg'):
            polygon_names.add(file[:-5])

area_based_entries = []

for scenario in scenarios:
    parts = scenario.split('_')
    # Check for deforestation (ends with "Xm_degradation_buffer")
    if len(parts) >= 5 and parts[-1] == 'buffer' and parts[-2] == 'degradation' and parts[-3].endswith('m'):
        alt_year, year_affix, dist_type = parts[0], parts[-4], parts[-5]
        polygon_name = '_'.join(parts[1:-5])
        if polygon_name in polygon_names and dist_type == 'deforestation':
            output_name = f"{alt_year}_deforestation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_name))
    # Check for degradation (ends with "degradation_YYYY")
    elif len(parts) >= 3 and parts[-2] == 'degradation' and parts[-1].isdigit() and len(parts[-1]) == 4:
        alt_year, year_affix = parts[0], parts[-1]
        polygon_name = '_'.join(parts[1:-2])
        if polygon_name in polygon_names:
            output_name = f"{alt_year}_degradation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_name))

if area_based_entries:
    print("\n# Area-based disturbance from alternate scenarios")
    for scenario, alt_year, output_name in sorted(area_based_entries):
        print(f"  ('{scenario}', '{alt_year}'):")
        print(f"    '{output_name}',")

print("}\n")

In [None]:
disturbance_since_dictionary = {

# Disturbance in 2024 caused by events since 2022
  ('2024', '2024_no_disturbance_since_2022'):
    '2024_disturbance_since_2022',

# Disturbance in 2024 caused by events since 2023
  ('2024', '2024_no_disturbance_since_2023'):
    '2024_disturbance_since_2023',

# Disturbance in 2024 caused by events since 2024
  ('2024', '2024_no_disturbance_since_2024'):
    '2024_disturbance_since_2024',

# Disturbance in 2021 caused by events since an oldgrowth state.
  ('2021', '2021_oldgrowth_all_land'):
    '2021_disturbance_since_oldgrowth',

  ('2024', '2024_oldgrowth_all_land'):
    '2024_disturbance_since_oldgrowth',

}

degradation_since_dictionary = {

# Degradation in 2021 caused by events since 1993
  ('2021', '2021_no_degradation_since_1993'):
    '2021_degradation_since_1993',

# Degradation in 2024 caused by events since 1996
  ('2024', '2024_no_degradation_since_1996'):
    '2024_degradation_since_1996',

# Degradation in 2024 caused by events since 2024
  ('2024', '2024_no_degradation_since_2024'):
    '2024_degradation_since_2024',

# Degradation in 2021 caused by events since an old-growth state.
  ('2021', '2021_oldgrowth'):
    '2021_degradation_since_oldgrowth',

  ('2024', '2024_oldgrowth'):
    '2024_degradation_since_oldgrowth',

}

deforestation_since_dictionary = {

# Deforestation in 2024 caused by events since 2024
  ('2024_degradation_since_2024', '2024_disturbance_since_2024'):
    '2024_deforestation_since_2024',

# Deforestation in 2021 caused by events since an old-growth state.
  ('2021_degradation_since_oldgrowth', '2021_disturbance_since_oldgrowth'):
    '2021_deforestation_since_oldgrowth',

  ('2024_degradation_since_oldgrowth', '2024_disturbance_since_oldgrowth'):
    '2024_deforestation_since_oldgrowth',

}

before_dictionary = {

# Disturbances in 2021 caused by events before 1993.
  ('2021_degradation_since_oldgrowth', '2021_degradation_since_1993'):
    '2021_degradation_before_1993',


# Disturbances in 2024 caused by events before 1996.
  ('2024_degradation_since_oldgrowth', '2024_degradation_since_1996'):
    '2024_degradation_before_1996',


# Disturbances in 2024 caused by events before 2022.
  ('2024_disturbance_since_oldgrowth', '2024_disturbance_since_2022'):
    '2024_disturbance_before_2022',


# Disturbances in 2024 caused by events before 2023.
  ('2024_disturbance_since_oldgrowth', '2024_disturbance_since_2023'):
    '2024_disturbance_before_2023',


# Disturbances in 2024 caused by events before 2024.
  ('2024_disturbance_since_oldgrowth', '2024_disturbance_since_2024'):
    '2024_disturbance_before_2024',

  ('2024_degradation_since_oldgrowth', '2024_degradation_since_2024'):
    '2024_degradation_before_2024',

  ('2024_deforestation_since_oldgrowth', '2024_deforestation_since_2024'):
    '2024_deforestation_before_2024',


}

area_based_dictionary = {

# Area-based disturbance from alternate scenarios
  ('2024_road_mat_daling_deforestation_2023_30m_degradation_buffer', '2024'):
    '2024_deforestation_of_road_mat_daling_2023',
}


In [None]:
# Functions for subtract calculations
def subtract_arrays(array1, array2):
    dist_array = array1 - array2
    return dist_array

# Relative uncertainty propagation for change estimates
# Based on IPCC and CEOS Land Product Validation Protocol methods
# Follows Liang et al. (2023) Quantifying aboveground biomass dynamics from
# charcoal degradation in Mozambique using GEDI Lidar and Landsat.
def propagate_uncertainty(mean1, uncertainty1, mean2, uncertainty2):
    absolute_uncertainty1 = np.multiply(mean1, uncertainty1, dtype='float64')
    absolute_uncertainty2 = np.multiply(mean2, uncertainty2, dtype='float64')
    sums_of_squares = np.square(absolute_uncertainty1, dtype='float64') + np.square(absolute_uncertainty2, dtype='float64')
    # Avoid division by zero
    denominator = np.abs(mean1 - mean2)
    epsilon = np.finfo(np.float64).eps
    denominator = np.where(denominator == 0, epsilon, denominator)
    return np.sqrt(sums_of_squares, dtype='float64') / denominator

# Determine processing mode based on source directory
use_uncertainty = source_dir == uncertainty_dir

# Progress tracking
total_operations = len(disturbance_since) + len(degradation_since_dictionary) + len(deforestation_since_dictionary) + len(before_dictionary) + len(area_based_dictionary)
progress_index = 0
progress_label = widgets.Label(f"Disturbance calculation progress: {progress_index}/{total_operations}")

display(progress_label)

# 1. Process disturbance_since calculations
for (scenario1, scenario2), disturbance_name in disturbance_since.items():
    if use_uncertainty:
        # Define filenames and paths for disturbance mean and uncertainty
        mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
        mean_path = join(disturbance_dir, mean_filename)
        uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
        uncertainty_path = join(disturbance_dir, uncertainty_filename)
        # Skip if both files already exist
        if exists(mean_path) and exists(uncertainty_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue

        scenario1_base = f"{scenario1}__{selected_model}"
        scenario2_base = f"{scenario2}__{selected_model}"
        # Define scenario paths, assert that both exist for both scenarios
        scenario1_mean_path = join(predictions_dir, f"mean__{scenario1_base}.tif")
        scenario1_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario1_base}.tif")
        scenario2_mean_path = join(predictions_dir, f"mean__{scenario2_base}.tif")
        scenario2_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario2_base}.tif")
        assert exists(scenario1_mean_path), f"mean__{scenario1_base}.tif does not exist."
        assert exists(scenario1_uncertainty_path), f"uncertainty__{scenario1_base}.tif does not exist."
        assert exists(scenario2_mean_path), f"mean__{scenario2_base}.tif does not exist."
        assert exists(scenario2_uncertainty_path), f"uncertainty__{scenario2_base}.tif does not exist."
        # Read arrays
        scenario1_mean = gdal.Open(scenario1_mean_path).ReadAsArray()
        scenario1_uncertainty = gdal.Open(scenario1_uncertainty_path).ReadAsArray()
        scenario2_mean = gdal.Open(scenario2_mean_path).ReadAsArray()
        scenario2_uncertainty = gdal.Open(scenario2_uncertainty_path).ReadAsArray()
        # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
        scenario1_mean = np.where((scenario1_mean == nodatavalue) & (scenario2_mean != nodatavalue), 0, scenario1_mean)
        scenario1_uncertainty = np.where((scenario1_uncertainty == nodatavalue) & (scenario2_uncertainty != nodatavalue), 0, scenario1_uncertainty)
        scenario2_mean = np.where((scenario2_mean == nodatavalue) & (scenario1_mean != nodatavalue), 0, scenario2_mean)
        scenario2_uncertainty = np.where((scenario2_uncertainty == nodatavalue) & (scenario1_uncertainty != nodatavalue), 0, scenario2_uncertainty)
        # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
        dist_mean_array = np.where(scenario1_mean == nodatavalue, nodatavalue, subtract_arrays(scenario1_mean, scenario2_mean))
        dist_uncertainty_array = np.where(scenario1_mean == nodatavalue, nodatavalue,
                                         propagate_uncertainty(scenario1_mean, scenario1_uncertainty, scenario2_mean, scenario2_uncertainty))
        # Set uncertainty to 0 where mean disturbance is 0
        dist_uncertainty_array = np.where(dist_mean_array == 0, 0, dist_uncertainty_array)
        # Export disturbance rasters
        export_array_as_tif(dist_mean_array, mean_path, template=scenario1_mean_path)
        export_array_as_tif(dist_uncertainty_array, uncertainty_path, template=scenario1_mean_path)
    else:
        # Define filenames and paths for disturbance
        dist_filename = f"{disturbance_name}__{selected_model}.tif"
        dist_path = join(disturbance_dir, dist_filename)
        # Skip if file already exists
        if exists(dist_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define scenario paths, assert that both exist for both scenarios
        scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
        assert exists(scenario1_path), f"{scenario1_path} does not exist."
        scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
        assert exists(scenario2_path), f"{scenario2_path} does not exist."
        # Read arrays
        scenario1_array_temp = gdal.Open(scenario1_path).ReadAsArray()
        scenario2_array_temp = gdal.Open(scenario2_path).ReadAsArray()
        # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
        scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
        scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)
        # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
        dist_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))
        # Export disturbance raster
        export_array_as_tif(dist_array, dist_path, template = scenario1_path)

    # Update progress
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 2. Process degradation_since calculations
for (scenario1, scenario2), disturbance_name in degradation_since_dictionary.items():
    if use_uncertainty:
        # Define filenames and paths for disturbance mean and uncertainty
        mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
        mean_path = join(disturbance_dir, mean_filename)
        uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
        uncertainty_path = join(disturbance_dir, uncertainty_filename)
        # Skip if both files already exist
        if exists(mean_path) and exists(uncertainty_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue

        scenario1_base = f"{scenario1}__{selected_model}"
        scenario2_base = f"{scenario2}__{selected_model}"
        # Define scenario paths, assert that both exist for both scenarios
        scenario1_mean_path = join(predictions_dir, f"mean__{scenario1_base}.tif")
        scenario1_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario1_base}.tif")
        scenario2_mean_path = join(predictions_dir, f"mean__{scenario2_base}.tif")
        scenario2_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario2_base}.tif")
        assert exists(scenario1_mean_path), f"mean__{scenario1_base}.tif does not exist."
        assert exists(scenario1_uncertainty_path), f"uncertainty__{scenario1_base}.tif does not exist."
        assert exists(scenario2_mean_path), f"mean__{scenario2_base}.tif does not exist."
        assert exists(scenario2_uncertainty_path), f"uncertainty__{scenario2_base}.tif does not exist."
        # Read arrays
        scenario1_mean = gdal.Open(scenario1_mean_path).ReadAsArray()
        scenario1_uncertainty = gdal.Open(scenario1_uncertainty_path).ReadAsArray()
        scenario2_mean = gdal.Open(scenario2_mean_path).ReadAsArray()
        scenario2_uncertainty = gdal.Open(scenario2_uncertainty_path).ReadAsArray()
        # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
        scenario1_mean = np.where((scenario1_mean == nodatavalue) & (scenario2_mean != nodatavalue), 0, scenario1_mean)
        scenario1_uncertainty = np.where((scenario1_uncertainty == nodatavalue) & (scenario2_uncertainty != nodatavalue), 0, scenario1_uncertainty)
        scenario2_mean = np.where((scenario2_mean == nodatavalue) & (scenario1_mean != nodatavalue), 0, scenario2_mean)
        scenario2_uncertainty = np.where((scenario2_uncertainty == nodatavalue) & (scenario1_uncertainty != nodatavalue), 0, scenario2_uncertainty)
        # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
        dist_mean_array = np.where(scenario1_mean == nodatavalue, nodatavalue, subtract_arrays(scenario1_mean, scenario2_mean))
        dist_uncertainty_array = np.where(scenario1_mean == nodatavalue, nodatavalue,
                                         propagate_uncertainty(scenario1_mean, scenario1_uncertainty, scenario2_mean, scenario2_uncertainty))
        # Set uncertainty to 0 where mean disturbance is 0
        dist_uncertainty_array = np.where(dist_mean_array == 0, 0, dist_uncertainty_array)
        # Export disturbance rasters
        export_array_as_tif(dist_mean_array, mean_path, template=scenario1_mean_path)
        export_array_as_tif(dist_uncertainty_array, uncertainty_path, template=scenario1_mean_path)
    else:
        # Define filenames and paths for disturbance
        dist_filename = f"{disturbance_name}__{selected_model}.tif"
        dist_path = join(disturbance_dir, dist_filename)
        # Skip if file already exists
        if exists(dist_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define scenario paths, assert that both exist for both scenarios
        scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
        assert exists(scenario1_path), f"{scenario1_path} does not exist."
        scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
        assert exists(scenario2_path), f"{scenario2_path} does not exist."
        # Read arrays
        scenario1_array_temp = gdal.Open(scenario1_path).ReadAsArray()
        scenario2_array_temp = gdal.Open(scenario2_path).ReadAsArray()
        # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
        scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
        scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)
        # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
        dist_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))
        # Export disturbance raster
        export_array_as_tif(dist_array, dist_path, template = scenario1_path)

    # Update progress
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 3. Process deforestation_since calculations
for (dist1_name, dist2_name), disturbance_name in deforestation_since_dictionary.items():
    if use_uncertainty:
        # Define filenames and paths of disturbance .tifs
        mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
        mean_path = join(disturbance_dir, mean_filename)
        uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
        uncertainty_path = join(disturbance_dir, uncertainty_filename)
        # Skip if both files already exist
        if exists(mean_path) and exists(uncertainty_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define disturbance paths, assert that both exist
        dist1_mean_path = join(disturbance_dir, f"mean__{dist1_name}__{selected_model}.tif")
        dist1_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist1_name}__{selected_model}.tif")
        dist2_mean_path = join(disturbance_dir, f"mean__{dist2_name}__{selected_model}.tif")
        dist2_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist2_name}__{selected_model}.tif")
        assert exists(dist1_mean_path), f"{dist1_mean_path} does not exist."
        assert exists(dist1_uncertainty_path), f"{dist1_uncertainty_path} does not exist."
        assert exists(dist2_mean_path), f"{dist2_mean_path} does not exist."
        assert exists(dist2_uncertainty_path), f"{dist2_uncertainty_path} does not exist."
        # Read arrays
        dist1_mean = gdal.Open(dist1_mean_path).ReadAsArray()
        dist1_uncertainty = gdal.Open(dist1_uncertainty_path).ReadAsArray()
        dist2_mean = gdal.Open(dist2_mean_path).ReadAsArray()
        dist2_uncertainty = gdal.Open(dist2_uncertainty_path).ReadAsArray()
        # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
        dist1_mean = np.where((dist1_mean == nodatavalue) & (dist2_mean != nodatavalue), 0, dist1_mean)
        dist1_uncertainty = np.where((dist1_uncertainty == nodatavalue) & (dist2_uncertainty != nodatavalue), 0, dist1_uncertainty)
        dist2_mean = np.where((dist2_mean == nodatavalue) & (dist1_mean != nodatavalue), 0, dist2_mean)
        dist2_uncertainty = np.where((dist2_uncertainty == nodatavalue) & (dist1_uncertainty != nodatavalue), 0, dist2_uncertainty)
        # Create disturbance arrays where the value is not 'nodatavalue' in disturbance (second array)
        result_mean = np.where(dist2_mean == nodatavalue, nodatavalue, subtract_arrays(dist2_mean, dist1_mean))
        result_uncertainty = np.where(dist2_mean == nodatavalue, nodatavalue,
                                     propagate_uncertainty(dist2_mean, dist2_uncertainty, dist1_mean, dist1_uncertainty))
        # Set uncertainty to 0 where mean disturbance is 0
        result_uncertainty = np.where(result_mean == 0, 0, result_uncertainty)
        # Export disturbance rasters
        export_array_as_tif(result_mean, mean_path, template=dist2_mean_path)
        export_array_as_tif(result_uncertainty, uncertainty_path, template=dist2_mean_path)
    else:
        # Define filenames and paths of disturbance .tifs
        output_filename = f"{disturbance_name}__{selected_model}.tif"
        output_path = join(disturbance_dir, output_filename)
        # Skip if file already exists
        if exists(output_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define disturbance paths, assert that both exist
        dist1_path = join(disturbance_dir, f"{dist1_name}__{selected_model}.tif")
        assert exists(dist1_path), f"{dist1_path} does not exist."
        dist2_path = join(disturbance_dir, f"{dist2_name}__{selected_model}.tif")
        assert exists(dist2_path), f"{dist2_path} does not exist."
        # Read arrays
        dist1_array_temp = gdal.Open(dist1_path).ReadAsArray()
        dist2_array_temp = gdal.Open(dist2_path).ReadAsArray()
        # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
        dist1_array = np.where((dist1_array_temp == nodatavalue) & (dist2_array_temp != nodatavalue), 0, dist1_array_temp)
        dist2_array = np.where((dist2_array_temp == nodatavalue) & (dist1_array != nodatavalue), 0, dist2_array_temp)
        # Create disturbance arrays where the value is not 'nodatavalue' in disturbance (second array)
        result_array = np.where(dist2_array==nodatavalue, nodatavalue, subtract_arrays(dist2_array, dist1_array))
        # Export disturbance raster
        export_array_as_tif(result_array, output_path, template = dist2_path)

    # Update progress
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 4. Process before baseline disturbances
for (dist1_name, dist2_name), disturbance_name in before_dictionary.items():
    if use_uncertainty:
        # Define filenames and paths of disturbance .tifs
        mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
        mean_path = join(disturbance_dir, mean_filename)
        uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
        uncertainty_path = join(disturbance_dir, uncertainty_filename)
        # Skip if both files already exist
        if exists(mean_path) and exists(uncertainty_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define disturbance paths, assert that both exist
        dist1_mean_path = join(disturbance_dir, f"mean__{dist1_name}__{selected_model}.tif")
        dist1_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist1_name}__{selected_model}.tif")
        dist2_mean_path = join(disturbance_dir, f"mean__{dist2_name}__{selected_model}.tif")
        dist2_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist2_name}__{selected_model}.tif")
        assert exists(dist1_mean_path), f"{dist1_mean_path} does not exist."
        assert exists(dist1_uncertainty_path), f"{dist1_uncertainty_path} does not exist."
        assert exists(dist2_mean_path), f"{dist2_mean_path} does not exist."
        assert exists(dist2_uncertainty_path), f"{dist2_uncertainty_path} does not exist."
        # Read arrays
        dist1_mean = gdal.Open(dist1_mean_path).ReadAsArray()
        dist1_uncertainty = gdal.Open(dist1_uncertainty_path).ReadAsArray()
        dist2_mean = gdal.Open(dist2_mean_path).ReadAsArray()
        dist2_uncertainty = gdal.Open(dist2_uncertainty_path).ReadAsArray()
        # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
        dist1_mean = np.where((dist1_mean == nodatavalue) & (dist2_mean != nodatavalue), 0, dist1_mean)
        dist1_uncertainty = np.where((dist1_uncertainty == nodatavalue) & (dist2_uncertainty != nodatavalue), 0, dist1_uncertainty)
        dist2_mean = np.where((dist2_mean == nodatavalue) & (dist1_mean != nodatavalue), 0, dist2_mean)
        dist2_uncertainty = np.where((dist2_uncertainty == nodatavalue) & (dist1_uncertainty != nodatavalue), 0, dist2_uncertainty)
        # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
        result_mean = np.where(dist1_mean == nodatavalue, nodatavalue, subtract_arrays(dist1_mean, dist2_mean))
        result_uncertainty = np.where(dist1_mean == nodatavalue, nodatavalue,
                                     propagate_uncertainty(dist1_mean, dist1_uncertainty, dist2_mean, dist2_uncertainty))
        # Set uncertainty to 0 where mean disturbance is 0
        result_uncertainty = np.where(result_mean == 0, 0, result_uncertainty)
        # Export disturbance rasters
        export_array_as_tif(result_mean, mean_path, template=dist1_mean_path)
        export_array_as_tif(result_uncertainty, uncertainty_path, template=dist1_mean_path)
    else:
        # Define filenames and paths of disturbance .tifs
        output_filename = f"{disturbance_name}__{selected_model}.tif"
        output_path = join(disturbance_dir, output_filename)
        # Skip if file already exists
        if exists(output_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define disturbance paths, assert that both exist
        dist1_path = join(disturbance_dir, f"{dist1_name}__{selected_model}.tif")
        assert exists(dist1_path), f"{dist1_path} does not exist."
        dist2_path = join(disturbance_dir, f"{dist2_name}__{selected_model}.tif")
        assert exists(dist2_path), f"{dist2_path} does not exist."
        # Read arrays
        dist1_array_temp = gdal.Open(dist1_path).ReadAsArray()
        dist2_array_temp = gdal.Open(dist2_path).ReadAsArray()
        # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
        dist1_array = np.where((dist1_array_temp == nodatavalue) & (dist2_array_temp != nodatavalue), 0, dist1_array_temp)
        dist2_array = np.where((dist2_array_temp == nodatavalue) & (dist1_array != nodatavalue), 0, dist2_array_temp)
        # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
        result_array = np.where(dist1_array==nodatavalue, nodatavalue, subtract_arrays(dist1_array, dist2_array))
        # Export disturbance raster
        export_array_as_tif(result_array, output_path, template = dist1_path)

    # Update progress
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 5. Process area-based disturbances
for (scenario1, scenario2), disturbance_name in area_based_dictionary.items():
    if use_uncertainty:
        mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
        mean_path = join(disturbance_dir, mean_filename)
        uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
        uncertainty_path = join(disturbance_dir, uncertainty_filename)
        if exists(mean_path) and exists(uncertainty_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue

        scenario1_base = f"{scenario1}__{selected_model}"
        scenario2_base = f"{scenario2}__{selected_model}"
        scenario1_mean_path = join(predictions_dir, f"mean__{scenario1_base}.tif")
        scenario1_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario1_base}.tif")
        scenario2_mean_path = join(predictions_dir, f"mean__{scenario2_base}.tif")
        scenario2_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario2_base}.tif")
        assert exists(scenario1_mean_path), f"mean__{scenario1_base}.tif does not exist."
        assert exists(scenario1_uncertainty_path), f"uncertainty__{scenario1_base}.tif does not exist."
        assert exists(scenario2_mean_path), f"mean__{scenario2_base}.tif does not exist."
        assert exists(scenario2_uncertainty_path), f"uncertainty__{scenario2_base}.tif does not exist."

        scenario1_mean = gdal.Open(scenario1_mean_path).ReadAsArray()
        scenario1_uncertainty = gdal.Open(scenario1_uncertainty_path).ReadAsArray()
        scenario2_mean = gdal.Open(scenario2_mean_path).ReadAsArray()
        scenario2_uncertainty = gdal.Open(scenario2_uncertainty_path).ReadAsArray()

        scenario1_mean = np.where((scenario1_mean == nodatavalue) & (scenario2_mean != nodatavalue), 0, scenario1_mean)
        scenario1_uncertainty = np.where((scenario1_uncertainty == nodatavalue) & (scenario2_uncertainty != nodatavalue), 0, scenario1_uncertainty)
        scenario2_mean = np.where((scenario2_mean == nodatavalue) & (scenario1_mean != nodatavalue), 0, scenario2_mean)
        scenario2_uncertainty = np.where((scenario2_uncertainty == nodatavalue) & (scenario1_uncertainty != nodatavalue), 0, scenario2_uncertainty)

        dist_mean_array = np.where(scenario1_mean == nodatavalue, nodatavalue, subtract_arrays(scenario1_mean, scenario2_mean))
        dist_uncertainty_array = np.where(scenario1_mean == nodatavalue, nodatavalue,
                                         propagate_uncertainty(scenario1_mean, scenario1_uncertainty, scenario2_mean, scenario2_uncertainty))
        dist_uncertainty_array = np.where(dist_mean_array == 0, 0, dist_uncertainty_array)

        export_array_as_tif(dist_mean_array, mean_path, template=scenario1_mean_path)
        export_array_as_tif(dist_uncertainty_array, uncertainty_path, template=scenario1_mean_path)
    else:
        dist_filename = f"{disturbance_name}__{selected_model}.tif"
        dist_path = join(disturbance_dir, dist_filename)
        if exists(dist_path):
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue

        scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
        assert exists(scenario1_path), f"{scenario1_path} does not exist."
        scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
        assert exists(scenario2_path), f"{scenario2_path} does not exist."

        scenario1_array_temp = gdal.Open(scenario1_path).ReadAsArray()
        scenario2_array_temp = gdal.Open(scenario2_path).ReadAsArray()

        scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
        scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)

        dist_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))
        export_array_as_tif(dist_array, dist_path, template = scenario1_path)

    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

print("All disturbances calculated.")

# Intactness

In [None]:
# Intactness is measured as relative percentage loss of AGBD within an area of interest

# Select which baseline and disturbance raster to use for calculating intactness
# percentage and relative intactness. Ideally this is the scenario with the least disturbance
# and the difference between that and the current reality.

for baseline in os.listdir(predictions_dir):
  if source_dir == scenarios_dir: print(f"selected_baseline = '{baseline}'")
  if source_dir == uncertainty_dir:
    if 'mean' in baseline: print(f"selected_baseline = '{baseline}'")
for dist in os.listdir(disturbance_dir):
  if source_dir == scenarios_dir: print(f"selected_dist = '{dist}'")
  if source_dir == uncertainty_dir:
    if 'mean' in dist:print(f"selected_dist = '{dist}'")

In [None]:
# selected_baseline = '2021_oldgrowth_all_land__agbd_tekai_250625_003858.tif'
selected_baseline = '2021_no_degradation_since_1993__agbd_tekai_250625_003858.tif'
# selected_dist = '2021_degradation_deforestation_total__agbd_tekai_250625_003858.tif'
selected_dist = '2021_degradation_since_1993__agbd_tekai_250625_003858.tif'

forest_mask_year = '2021'

# Define the baseline name based on source directory
if source_dir == scenarios_dir:
  base_dist_name = f"{selected_baseline.split('__')[0]}__{selected_dist.split('__')[0]}"
if source_dir == uncertainty_dir:
  base_dist_name = f"{selected_baseline.split('__')[1]}__{selected_dist.split('__')[1]}"

intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
makedirs(intactness_baseline_dist_dir, exist_ok=True)

percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}.tif"
percentage_path = join(intactness_baseline_dist_dir, percentage_filename)

if not exists(percentage_path):
  # Define filenames and directories
  selected_baseline_path = join(predictions_dir, selected_baseline)
  selected_dist_path = join(disturbance_dir, selected_dist)
  selected_mask_path = join(masks_dir, f"mask_forest_{forest_mask_year}.tif")

  # Convert to arrays
  selected_baseline_array = gdal.Open(selected_baseline_path).ReadAsArray()
  selected_dist_array = gdal.Open(selected_dist_path).ReadAsArray()
  selected_mask_array = gdal.Open(selected_mask_path).ReadAsArray()

  # Create percentage array where the value is not 'nodatavalue' in any of the inputs
  percentage_array = np.where((selected_mask_array==nodatavalue) | (selected_baseline_array==nodatavalue) | (selected_dist_array==nodatavalue), nodatavalue,
                              selected_dist_array/selected_baseline_array*100)
  export_array_as_tif(percentage_array, percentage_path, template = selected_baseline_path)
  print(f"{percentage_filename} has been exported.")

else: print(f"{percentage_filename} already exists.")

In [None]:
# Use additional polygons for masking relative intactness quantiles
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']

# Select baseline / disturbance pairs to measure relative intactness
print("baseline_disturbance_pairs = [")
for dir in os.listdir(intactness_dir):
  print(f"'{dir}',")
print("]\n")

# Select polygons to mask and calculate quantiles
print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"'{polygon}',")
print(None)
print("]")

In [None]:
baseline_disturbance_pairs = [
'2021_oldgrowth_all_land__2021_degradation_deforestation_total',
'2021_no_degradation_since_1993__2021_degradation_since_1993',
]

mask_polygons = [
# 'project_area.gpkg',
# 'peninsular_malaysia.gpkg',
# 'lu_yong.gpkg',
# 'lu_yong_lipis.gpkg',
# 'lu_berkelah_jerantut.gpkg',
# 'lu_tekai_tembeling.gpkg',
# 'lu_ais.gpkg',
# 'lu_pa_taman_negara_krau.gpkg',
# 'lu_tekam.gpkg',
# 'lu_berkelah_temerloh.gpkg',
# 'lu_remen_chereh.gpkg',
# 'lu_berkelah_kuantan.gpkg',
'forest_reserves.gpkg',
# 'gedi_area.gpkg',
# None
]

# Define number of quantiles for intactness rating (e.g. 10 for 1 - 10)
num_quantiles = 10


for mask_polygon in mask_polygons:
  if mask_polygon is not None:
    # Create an inverse project area path for masking
    template_polygon_path = join(polygons_dir, "template.gpkg")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    if not exists(inverse_polygon_path):
      polygon_path = join(polygons_dir, mask_polygon)
      template_polygon = gpd.read_file(template_polygon_path)
      polygon_read = gpd.read_file(polygon_path)
      polygon_crs = polygon_read.crs.to_epsg()
      inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
      inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
      inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
      print(f"An inverse masking polygon for {mask_polygon} has been created in {polygons_dir}.")
    else: print(f"An inverse masking polygon for {mask_polygon} already exists.")

for base_dist_name in baseline_disturbance_pairs:
  intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
  percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}"
  percentage_path = join(intactness_baseline_dist_dir, f"{percentage_filename}.tif")

  for mask_polygon in mask_polygons:

    if mask_polygon is not None:
      # Copy the percentage raster for potential masking
      percentage_masked_filename = f"{percentage_filename}__masked_{mask_polygon[:-5]}.tif"
      percentage_masked_path = join(intactness_baseline_dist_dir, percentage_masked_filename)
      if not exists(percentage_masked_path):
        print(f"Copying {percentage_filename} for masking...")
        copyfile(percentage_path, percentage_masked_path)
        print(f"Masking {percentage_filename} with {mask_polygon}...")
        inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
        burn_polygon_to_raster(percentage_masked_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
        # Recompress the prediction after burning the polygon masks
        percentage_masked_array = gdal.Open(percentage_masked_path).ReadAsArray()
        export_array_as_tif(percentage_masked_array, percentage_masked_path, compress = True)
        print(f"{percentage_filename} masked.")
      else: print(f"{percentage_masked_filename} already exists.")

    # Define paths and arrays
    if mask_polygon is None: relative_intactness_name = f'intactness__{num_quantiles}_quantiles__{base_dist_name}__{selected_model}'
    else: relative_intactness_name = f'intactness__{mask_polygon[:-5]}_{num_quantiles}_quantiles__{base_dist_name}__{selected_model}'
    relative_intactness_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.tif')
    if not exists(relative_intactness_path):
      if mask_polygon is None: percentage_array = gdal.Open(percentage_path).ReadAsArray()
      else: percentage_array = gdal.Open(percentage_masked_path).ReadAsArray()
      relative_intactness_array = np.empty_like(percentage_array, dtype=object)

      # Set all values above 0 to 0, assuming negative values are not intact
      percentage_array[percentage_array > 0] = 0

      # Separate valid and invalid (nodatavalue) elements
      valid_elements = percentage_array[percentage_array != nodatavalue]
      invalid_elements = percentage_array == nodatavalue

      # Calculate quantiles for valid elements
      quantiles = np.percentile(valid_elements, np.linspace(0, 100, num_quantiles + 1)[1:-1]) if len(valid_elements) > 0 else []
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          relative_intactness_array[(percentage_array > lower_bound) & (percentage_array <= upper_bound)] = i
          relative_intactness_array[invalid_elements] = nodatavalue
          # Set all perfectly intact pixels (0 % change) to max score
          relative_intactness_array[percentage_array == 0] = num_quantiles
      export_array_as_tif(relative_intactness_array, relative_intactness_path)

      # Prepare data for CSV: Collect lower and upper bounds for each category
      ranges_data = {'Lower_Bound': [], 'Upper_Bound': []}
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          ranges_data['Lower_Bound'].append(lower_bound)
          ranges_data['Upper_Bound'].append(upper_bound)

      # Create DataFrame and save to CSV
      relative_intactness_df = pd.DataFrame(ranges_data)
      relative_intactness_csv_path = os.path.join(intactness_baseline_dist_dir, f'{relative_intactness_name}.csv')
      relative_intactness_df.to_csv(relative_intactness_csv_path, index=False)

      # Generate and save histogram as .png
      histogram_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.png')
      plt.figure()
      plt.hist(valid_elements.flatten(), bins='auto')
      plt.title(f'{relative_intactness_name} Histogram')
      plt.xlabel('Value')
      plt.ylabel('Frequency')
      plt.savefig(histogram_path)
      plt.close()

    else: print(f"{relative_intactness_name} already exists.")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()