<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/8_differences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas
!pip install rasterio
!apt-get install -y gdal-bin

In [None]:
# Imports
import geopandas as gpd
from google.colab import runtime
from os import makedirs
from os.path import join, exists
from osgeo import gdal, ogr
gdal.UseExceptions()
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import rasterize
from shutil import copyfile

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
scenarios_dir = join(base_dir, "6_scenarios")
masks_dir = join(scenarios_dir, "scenario_masks")
uncertainty_dir = join(base_dir, "7_uncertainty")
differences_dir = join(base_dir, "8_differences")

# Create directories
makedirs(differences_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ["COMPRESS=DEFLATE", "PREDICTOR=3", "ZLEVEL=9"]
    else: options = []
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Global function: burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = vector = None
    try:
        raster = gdal.Open(raster_path, gdal.GA_Update)
        vector = ogr.Open(polygon_path)
        if not raster or not vector:
            raise ValueError("Cannot open input files")
        layer = vector.GetLayer()
        options = ["ALL_TOUCHED=TRUE"] if all_touched else []
        if fixed:
            gdal.RasterizeLayer(raster, [1], layer, burn_values=[fixed_value], options=options)
        else:
            attr_name = column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()
            options.append(f"ATTRIBUTE={attr_name}")
            gdal.RasterizeLayer(raster, [1], layer, options=options)
    finally:
        if raster: raster.FlushCache()
        raster = vector = None

# Select source and model

In [None]:
# Select if to source predictions from scenarios_dir or uncertainty_dir
# If available, uncertainty_dir should be selected so that uncertainty can
# be propagated and scenario 'mean' iteration values used.

source_dir = uncertainty_dir
# source_dir = scenarios_dir

print(f"{source_dir.split('/')[-1]} has been selected as the source directory for predictions")
print("to calculate disturbance and intactness.\n")

# If uncertainty selected, check it exists
if not exists(uncertainty_dir) and source_dir == uncertainty_dir:
  print("The uncertainty directory does not yet exist. Defaulting to scenarios directory.")
  source_dir = scenarios_dir

source_dir_name = f"{source_dir.split('_')[-1]}_dir"

# Select the model
for subdir in os.listdir(source_dir):
  if 'scenario_masks' not in subdir:
    print(f"selected_model = '{subdir}'")

In [None]:
selected_model = 'agbd_tekai_250625_003858'

selected_model_dir = join(source_dir, selected_model)
if source_dir == scenarios_dir: predictions_dir = join(selected_model_dir, 'scenario_predictions')
if source_dir == uncertainty_dir: predictions_dir = join(selected_model_dir, 'uncertainty_predictions')

# Check predictions exist to calculate differences
if len(os.listdir(predictions_dir)) < 2: print(f"At least 2 predictions must exist in {source_dir} to calculate differences.")
else:
  model_differences_dir = join(differences_dir, f"{selected_model}_{source_dir_name}")
  disturbance_dir = join(model_differences_dir, 'disturbance')
  intactness_dir = join(model_differences_dir, 'intactness')
  makedirs(model_differences_dir, exist_ok=True)
  makedirs(disturbance_dir, exist_ok=True)
  makedirs(intactness_dir, exist_ok=True)

# Disturbance

## Define type and period

In [None]:
# Disturbance is measured as absolute AGBD loss
# This block builds dictionaries of disturbance options based on available files

# Extract all available scenarios from scenarios predictions directory
if source_dir == scenarios_dir:
  scenarios = set()
  for file in os.listdir(predictions_dir):
      scenarios.add(file.split("__")[0])

# OR Extract all available scenarios from uncertainty predictions directory
if source_dir == uncertainty_dir:
  prediction_stats = {}
  for file in os.listdir(predictions_dir):
      parts = file.split("__")
      if len(parts) >= 2:
          stat, scenario = parts[0], parts[1]
          if scenario not in prediction_stats:
              prediction_stats[scenario] = set()
          prediction_stats[scenario].add(stat)
  # Only keep scenarios that have both 'uncertainty' and 'mean' prediction stats
  scenarios = {prediction for prediction, stats in prediction_stats.items()
              if 'uncertainty' in stats and 'mean' in stats}

# Categorise years from scenarios
years = set()
plain_years = set()
oldgrowth_years = set()
oldgrowth_all_land_years = set()
for s in scenarios:
    if s.isdigit():
        years.add(int(s))
        plain_years.add(int(s))
    elif "_oldgrowth_all_land" in s:
        year = s.split("_oldgrowth_all_land")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_all_land_years.add(int(year))
    elif "_oldgrowth" in s:
        year = s.split("_oldgrowth")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_years.add(int(year))
    elif any(pattern in s for pattern in ["_no_disturbance_since_", "_no_degradation_since_"]):
        year = s.split("_")[0]
        if year.isdigit():
            years.add(int(year))
        if "_since_" in s:
            since_year = s.split("_since_")[1]
            if since_year.isdigit():
                years.add(int(since_year) - 1)
years_sorted = sorted(list(years))

# Output dictionaries
disturbance_since_dictionary = {}
degradation_since_dictionary = {}
deforestation_since_dictionary = {}
print("disturbance_since_dictionary = {")
print("")

# 1. Process disturbance_since scenarios
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)

        if year_a in plain_years and f"{a_str}_no_disturbance_since_{b_plus1}" in scenarios:
            print(f"# Disturbance in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{a_str}_no_disturbance_since_{b_plus1}'):")
            print(f"    '{a_str}_disturbance_since_{b_plus1}',")
            print("")
            disturbance_since_dictionary[(a_str, f"{a_str}_no_disturbance_since_{b_plus1}")] = f"{a_str}_disturbance_since_{b_plus1}"
# Process disturbance_since_oldgrowth scenarios
for year in years_sorted:
    y_str = str(year)
    if (year in plain_years and year in oldgrowth_all_land_years and f"{y_str}_oldgrowth_all_land" in scenarios):
        print(f"# Disturbance in {y_str} caused by events since an oldgrowth state.")
        print(f"  ('{y_str}', '{y_str}_oldgrowth_all_land'):")
        print(f"    '{y_str}_disturbance_since_oldgrowth',")
        print("")
        disturbance_since_dictionary[(y_str, f"{y_str}_oldgrowth_all_land")] = f"{y_str}_disturbance_since_oldgrowth"
print("}\n")

# 2. Degradation since dictionary
print("degradation_since_dictionary = {\n")
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)
        if year_a in plain_years and f"{a_str}_no_degradation_since_{b_plus1}" in scenarios:
            print(f"# Degradation in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{a_str}_no_degradation_since_{b_plus1}'):")
            print(f"    '{a_str}_degradation_since_{b_plus1}',")
            print("")
            degradation_since_dictionary[(a_str, f"{a_str}_no_degradation_since_{b_plus1}")] = f"{a_str}_degradation_since_{b_plus1}"
# Process degradation_since_oldgrowth scenarios
for year in years_sorted:
    y_str = str(year)
    if (year in plain_years and year in oldgrowth_years and f"{y_str}_oldgrowth" in scenarios):
        print(f"# Degradation in {y_str} caused by events since an old-growth state")
        print(f"  ('{y_str}', '{y_str}_oldgrowth'):")
        print(f"    '{y_str}_degradation_since_oldgrowth',")
        print("")
        degradation_since_dictionary[(y_str, f"{y_str}_oldgrowth")] = f"{y_str}_degradation_since_oldgrowth"
print("}\n")

# 3. Deforestation since dictionary
print("deforestation_since_dictionary = {\n")
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)
        deg_key = (a_str, f"{a_str}_no_degradation_since_{b_plus1}")
        dist_key = (a_str, f"{a_str}_no_disturbance_since_{b_plus1}")
        if deg_key in degradation_since_dictionary and dist_key in disturbance_since_dictionary:
            deg_result = degradation_since_dictionary[deg_key]
            dist_result = disturbance_since_dictionary[dist_key]
            defor_result = f"{a_str}_deforestation_since_{b_plus1}"
            print(f"# Deforestation in {a_str} caused by events since {b_plus1}")
            print(f"  ('{deg_result}', '{dist_result}'):")
            print(f"    '{defor_result}',")
            print("")
            deforestation_since_dictionary[(deg_result, dist_result)] = defor_result
# Process deforestation_since_oldgrowth scenarios
for year in years_sorted:
    y_str = str(year)
    deg_key = (y_str, f"{y_str}_oldgrowth")
    dist_key = (y_str, f"{y_str}_oldgrowth_all_land")

    if deg_key in degradation_since_dictionary and dist_key in disturbance_since_dictionary:
        print(f"# Deforestation in {y_str} caused by events since an old-growth state")
        deg_result = degradation_since_dictionary[deg_key]
        dist_result = disturbance_since_dictionary[dist_key]
        defor_result = f"{y_str}_deforestation_since_oldgrowth"
        print(f"  ('{deg_result}', '{dist_result}'):")
        print(f"    '{defor_result}',")
        print("")
        deforestation_since_dictionary[(deg_result, dist_result)] = defor_result
print("}\n")


# 4. Specific year effects dictionary
print("specific_year_effects_dictionary = {\n")
# Collect all since results and organise by year of interest and disturbance type
effects_by_year = {}
# Process degradation since results
for result_name in degradation_since_dictionary.values():
    if "_degradation_since_" in result_name and "_oldgrowth" not in result_name:
        parts = result_name.split("_degradation_since_")
        year_of_interest = parts[0]
        baseline_year = int(parts[1])
        if year_of_interest not in effects_by_year:
            effects_by_year[year_of_interest] = {}
        if 'degradation' not in effects_by_year[year_of_interest]:
            effects_by_year[year_of_interest]['degradation'] = {}
        effects_by_year[year_of_interest]['degradation'][baseline_year] = result_name
# Process disturbance since results
for result_name in disturbance_since_dictionary.values():
    if "_disturbance_since_" in result_name and "_oldgrowth" not in result_name:
        parts = result_name.split("_disturbance_since_")
        year_of_interest = parts[0]
        baseline_year = int(parts[1])
        if year_of_interest not in effects_by_year:
            effects_by_year[year_of_interest] = {}
        if 'disturbance' not in effects_by_year[year_of_interest]:
            effects_by_year[year_of_interest]['disturbance'] = {}
        effects_by_year[year_of_interest]['disturbance'][baseline_year] = result_name
# Process deforestation since results
for result_name in deforestation_since_dictionary.values():
    if "_deforestation_since_" in result_name and "_oldgrowth" not in result_name:
        parts = result_name.split("_deforestation_since_")
        year_of_interest = parts[0]
        baseline_year = int(parts[1])
        if year_of_interest not in effects_by_year:
            effects_by_year[year_of_interest] = {}
        if 'deforestation' not in effects_by_year[year_of_interest]:
            effects_by_year[year_of_interest]['deforestation'] = {}
        effects_by_year[year_of_interest]['deforestation'][baseline_year] = result_name
# Output dictionary entries grouped by year of interest and disturbance type
specific_year_effects_dictionary = {}
for year_of_interest in sorted(effects_by_year.keys()):
    year_effects = effects_by_year[year_of_interest]

    # Build all effects for this year first
    year_has_effects = False
    all_type_effects = {}

    # Process each disturbance type
    for dist_type in ['degradation', 'deforestation', 'disturbance']:
        if dist_type in year_effects:
            baseline_years = sorted(year_effects[dist_type].keys())
            # Find consecutive year pairs for specific year effects
            type_effects = []
            for i in range(len(baseline_years) - 1):
                current_year = baseline_years[i]
                next_year = baseline_years[i + 1]
                if next_year == current_year + 1:
                    since_current = year_effects[dist_type][current_year]
                    since_next = year_effects[dist_type][next_year]
                    effect_name = f"{year_of_interest}_effect_of_{dist_type}_in_{current_year}"
                    type_effects.append((since_current, since_next, effect_name, current_year))
                    specific_year_effects_dictionary[(since_current, since_next)] = effect_name
            # Add same-year effect (copy and rename)
            if baseline_years:
                last_year = max(baseline_years)
                if last_year == int(year_of_interest):
                    since_same_year = year_effects[dist_type][last_year]
                    same_year_effect = f"{year_of_interest}_effect_of_{dist_type}_in_{last_year}"
                    type_effects.append((since_same_year, None, same_year_effect, last_year))
                    specific_year_effects_dictionary[(since_same_year,)] = same_year_effect

            if type_effects:
                all_type_effects[dist_type] = type_effects
                year_has_effects = True

    # Only print if there are effects for this year
    if year_has_effects:
        print(f"# Effects in {year_of_interest}")
        for dist_type in ['degradation', 'deforestation', 'disturbance']:
            if dist_type in all_type_effects:
                print(f"  # {dist_type.capitalize()} effects")
                # Sort by effect year chronologically
                sorted_effects = sorted(all_type_effects[dist_type], key=lambda x: x[3])
                for since_current, since_next, effect_name, effect_year in sorted_effects:
                    if since_next is None:  # Same-year effect (copy and rename)
                        print(f"  ('{since_current}',):")
                        print(f"    '{effect_name}',")
                    else:  # Regular subtraction effect
                        print(f"  ('{since_current}', '{since_next}'):")
                        print(f"    '{effect_name}',")
                print("")
print("}\n")

# 5. Area-based dictionary
print("area_based_dictionary = {")
# Get polygon names from polygons directory
polygon_names = set()
if os.path.exists(polygons_dir):
    for file in os.listdir(polygons_dir):
        if file.endswith('.gpkg'):
            polygon_names.add(file[:-5])
area_based_entries = []
for scenario in scenarios:
    parts = scenario.split('_')
    # Check for deforestation (ends with "Xm_degradation_buffer")
    if len(parts) >= 5 and parts[-1] == 'buffer' and parts[-2] == 'degradation' and parts[-3].endswith('m'):
        alt_year, year_affix, dist_type = parts[0], parts[-4], parts[-5]
        polygon_name = '_'.join(parts[1:-5])
        if polygon_name in polygon_names and dist_type == 'deforestation':
            output_name = f"{alt_year}_deforestation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_name))
    # Check for degradation (ends with "degradation_YYYY")
    elif len(parts) >= 3 and parts[-2] == 'degradation' and parts[-1].isdigit() and len(parts[-1]) == 4:
        alt_year, year_affix = parts[0], parts[-1]
        polygon_name = '_'.join(parts[1:-2])
        if polygon_name in polygon_names:
            output_name = f"{alt_year}_degradation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_name))
if area_based_entries:
    print("\n# Area-based disturbance from alternate scenarios")
    for scenario, alt_year, output_name in sorted(area_based_entries):
        print(f"  ('{scenario}', '{alt_year}'):")
        print(f"    '{output_name}',")
print("}\n")

In [None]:
disturbance_since_dictionary = {

# Disturbance in 2021 caused by events since 1993
  ('2021', '2021_no_disturbance_since_1993'):
    '2021_disturbance_since_1993',

# Disturbance in 2024 caused by events since 1996
  ('2024', '2024_no_disturbance_since_1996'):
    '2024_disturbance_since_1996',

# Disturbance in 2024 caused by events since 1997
  ('2024', '2024_no_disturbance_since_1997'):
    '2024_disturbance_since_1997',

# Disturbance in 2024 caused by events since 1998
  ('2024', '2024_no_disturbance_since_1998'):
    '2024_disturbance_since_1998',

# Disturbance in 2024 caused by events since 1999
  ('2024', '2024_no_disturbance_since_1999'):
    '2024_disturbance_since_1999',

# Disturbance in 2024 caused by events since 2000
  ('2024', '2024_no_disturbance_since_2000'):
    '2024_disturbance_since_2000',

# Disturbance in 2024 caused by events since 2001
  ('2024', '2024_no_disturbance_since_2001'):
    '2024_disturbance_since_2001',

# Disturbance in 2024 caused by events since 2002
  ('2024', '2024_no_disturbance_since_2002'):
    '2024_disturbance_since_2002',

# Disturbance in 2024 caused by events since 2003
  ('2024', '2024_no_disturbance_since_2003'):
    '2024_disturbance_since_2003',

# Disturbance in 2024 caused by events since 2004
  ('2024', '2024_no_disturbance_since_2004'):
    '2024_disturbance_since_2004',

# Disturbance in 2024 caused by events since 2005
  ('2024', '2024_no_disturbance_since_2005'):
    '2024_disturbance_since_2005',

# Disturbance in 2024 caused by events since 2006
  ('2024', '2024_no_disturbance_since_2006'):
    '2024_disturbance_since_2006',

# Disturbance in 2024 caused by events since 2007
  ('2024', '2024_no_disturbance_since_2007'):
    '2024_disturbance_since_2007',

# Disturbance in 2024 caused by events since 2008
  ('2024', '2024_no_disturbance_since_2008'):
    '2024_disturbance_since_2008',

# Disturbance in 2024 caused by events since 2009
  ('2024', '2024_no_disturbance_since_2009'):
    '2024_disturbance_since_2009',

# Disturbance in 2024 caused by events since 2010
  ('2024', '2024_no_disturbance_since_2010'):
    '2024_disturbance_since_2010',

# Disturbance in 2024 caused by events since 2011
  ('2024', '2024_no_disturbance_since_2011'):
    '2024_disturbance_since_2011',

# Disturbance in 2024 caused by events since 2012
  ('2024', '2024_no_disturbance_since_2012'):
    '2024_disturbance_since_2012',

# Disturbance in 2024 caused by events since 2013
  ('2024', '2024_no_disturbance_since_2013'):
    '2024_disturbance_since_2013',

# Disturbance in 2024 caused by events since 2014
  ('2024', '2024_no_disturbance_since_2014'):
    '2024_disturbance_since_2014',

# Disturbance in 2024 caused by events since 2015
  ('2024', '2024_no_disturbance_since_2015'):
    '2024_disturbance_since_2015',

# Disturbance in 2024 caused by events since 2016
  ('2024', '2024_no_disturbance_since_2016'):
    '2024_disturbance_since_2016',

# Disturbance in 2024 caused by events since 2017
  ('2024', '2024_no_disturbance_since_2017'):
    '2024_disturbance_since_2017',

# Disturbance in 2024 caused by events since 2018
  ('2024', '2024_no_disturbance_since_2018'):
    '2024_disturbance_since_2018',

# Disturbance in 2024 caused by events since 2019
  ('2024', '2024_no_disturbance_since_2019'):
    '2024_disturbance_since_2019',

# Disturbance in 2024 caused by events since 2020
  ('2024', '2024_no_disturbance_since_2020'):
    '2024_disturbance_since_2020',

# Disturbance in 2024 caused by events since 2021
  ('2024', '2024_no_disturbance_since_2021'):
    '2024_disturbance_since_2021',

# Disturbance in 2024 caused by events since 2022
  ('2024', '2024_no_disturbance_since_2022'):
    '2024_disturbance_since_2022',

# Disturbance in 2024 caused by events since 2023
  ('2024', '2024_no_disturbance_since_2023'):
    '2024_disturbance_since_2023',

# Disturbance in 2024 caused by events since 2024
  ('2024', '2024_no_disturbance_since_2024'):
    '2024_disturbance_since_2024',

# Disturbance in 2021 caused by events since an oldgrowth state.
  ('2021', '2021_oldgrowth_all_land'):
    '2021_disturbance_since_oldgrowth',

# Disturbance in 2024 caused by events since an oldgrowth state.
  ('2024', '2024_oldgrowth_all_land'):
    '2024_disturbance_since_oldgrowth',

}

degradation_since_dictionary = {

# Degradation in 2021 caused by events since 1993
  ('2021', '2021_no_degradation_since_1993'):
    '2021_degradation_since_1993',

# Degradation in 2024 caused by events since 1996
  ('2024', '2024_no_degradation_since_1996'):
    '2024_degradation_since_1996',

# Degradation in 2021 caused by events since an old-growth state
  ('2021', '2021_oldgrowth'):
    '2021_degradation_since_oldgrowth',

# Degradation in 2024 caused by events since an old-growth state
  ('2024', '2024_oldgrowth'):
    '2024_degradation_since_oldgrowth',

}

deforestation_since_dictionary = {

# Deforestation in 2021 caused by events since 1993
  ('2021_degradation_since_1993', '2021_disturbance_since_1993'):
    '2021_deforestation_since_1993',

# Deforestation in 2024 caused by events since 1996
  ('2024_degradation_since_1996', '2024_disturbance_since_1996'):
    '2024_deforestation_since_1996',

# Deforestation in 2021 caused by events since an old-growth state
  ('2021_degradation_since_oldgrowth', '2021_disturbance_since_oldgrowth'):
    '2021_deforestation_since_oldgrowth',

# Deforestation in 2024 caused by events since an old-growth state
  ('2024_degradation_since_oldgrowth', '2024_disturbance_since_oldgrowth'):
    '2024_deforestation_since_oldgrowth',

}

specific_year_effects_dictionary = {

# Effects in 2024
  # Disturbance effects
  ('2024_disturbance_since_1996', '2024_disturbance_since_1997'):
    '2024_effect_of_disturbance_in_1996',
  ('2024_disturbance_since_1997', '2024_disturbance_since_1998'):
    '2024_effect_of_disturbance_in_1997',
  ('2024_disturbance_since_1998', '2024_disturbance_since_1999'):
    '2024_effect_of_disturbance_in_1998',
  ('2024_disturbance_since_1999', '2024_disturbance_since_2000'):
    '2024_effect_of_disturbance_in_1999',
  ('2024_disturbance_since_2000', '2024_disturbance_since_2001'):
    '2024_effect_of_disturbance_in_2000',
  ('2024_disturbance_since_2001', '2024_disturbance_since_2002'):
    '2024_effect_of_disturbance_in_2001',
  ('2024_disturbance_since_2002', '2024_disturbance_since_2003'):
    '2024_effect_of_disturbance_in_2002',
  ('2024_disturbance_since_2003', '2024_disturbance_since_2004'):
    '2024_effect_of_disturbance_in_2003',
  ('2024_disturbance_since_2004', '2024_disturbance_since_2005'):
    '2024_effect_of_disturbance_in_2004',
  ('2024_disturbance_since_2005', '2024_disturbance_since_2006'):
    '2024_effect_of_disturbance_in_2005',
  ('2024_disturbance_since_2006', '2024_disturbance_since_2007'):
    '2024_effect_of_disturbance_in_2006',
  ('2024_disturbance_since_2007', '2024_disturbance_since_2008'):
    '2024_effect_of_disturbance_in_2007',
  ('2024_disturbance_since_2008', '2024_disturbance_since_2009'):
    '2024_effect_of_disturbance_in_2008',
  ('2024_disturbance_since_2009', '2024_disturbance_since_2010'):
    '2024_effect_of_disturbance_in_2009',
  ('2024_disturbance_since_2010', '2024_disturbance_since_2011'):
    '2024_effect_of_disturbance_in_2010',
  ('2024_disturbance_since_2011', '2024_disturbance_since_2012'):
    '2024_effect_of_disturbance_in_2011',
  ('2024_disturbance_since_2012', '2024_disturbance_since_2013'):
    '2024_effect_of_disturbance_in_2012',
  ('2024_disturbance_since_2013', '2024_disturbance_since_2014'):
    '2024_effect_of_disturbance_in_2013',
  ('2024_disturbance_since_2014', '2024_disturbance_since_2015'):
    '2024_effect_of_disturbance_in_2014',
  ('2024_disturbance_since_2015', '2024_disturbance_since_2016'):
    '2024_effect_of_disturbance_in_2015',
  ('2024_disturbance_since_2016', '2024_disturbance_since_2017'):
    '2024_effect_of_disturbance_in_2016',
  ('2024_disturbance_since_2017', '2024_disturbance_since_2018'):
    '2024_effect_of_disturbance_in_2017',
  ('2024_disturbance_since_2018', '2024_disturbance_since_2019'):
    '2024_effect_of_disturbance_in_2018',
  ('2024_disturbance_since_2019', '2024_disturbance_since_2020'):
    '2024_effect_of_disturbance_in_2019',
  ('2024_disturbance_since_2020', '2024_disturbance_since_2021'):
    '2024_effect_of_disturbance_in_2020',
  ('2024_disturbance_since_2021', '2024_disturbance_since_2022'):
    '2024_effect_of_disturbance_in_2021',
  ('2024_disturbance_since_2022', '2024_disturbance_since_2023'):
    '2024_effect_of_disturbance_in_2022',
  ('2024_disturbance_since_2023', '2024_disturbance_since_2024'):
    '2024_effect_of_disturbance_in_2023',
  ('2024_disturbance_since_2024',):
    '2024_effect_of_disturbance_in_2024',

}

area_based_dictionary = {

# Area-based disturbance from alternate scenarios
  ('2024_road_mat_daling_deforestation_2023_30m_degradation_buffer', '2024'):
    '2024_deforestation_of_road_mat_daling_2023',
}


## Calculate disturbance

In [None]:
# Apply floor constraint preventing degradation from exceeding total disturbance.
# Conceptually, disturbance = degradation + deforestation.
# This can happen in rare cases where edge effects from non-forest have a positive
# predicted impact on AGBD, especially at high elevation.
apply_degradation_floor = False
# Caps all positive differences, again mainly from rare edge effects cases.
# Conceptually this is disturbance loss rather than effect of disturbance.
cap_positive_differences = False
# Uncertainty propagation will result in changes to original precision (unlike AGBD mean)
uncertainty_precision = 1

# Calculate AGBD loss between two scenarios (array1 - array2)
# Returns negative values for disturbance losses
# Gains will be negligible artifacts of float precision if scenario dictionary correct
def subtract_arrays(array1, array2):
  diff_array = array1 - array2
  # Remove 'positive' artefacts from Monte Carlo averaging
  return np.where(diff_array > 0 , 0, diff_array) if cap_positive_differences else diff_array

# Propagate uncertainty for forest AGBD loss calculations using standard error propagation
# Measures uncertainty of forest AGBD change from disturbance events only

# Mathematical basis: For difference Z = X - Y with relative uncertainties u_x, u_y:
# Absolute uncertainty: σ_z = √[(X×u_x)² + (Y×u_y)²] (IPCC 2006, Eq. 3.2; 2019, Eq. 3.2A)
# Relative uncertainty: σ_z / |Z| = σ_z / |X - Y|

# Note: Liang et al. (2023) incorrectly used |X + Y| as denominator, violating standard
# uncertainty propagation theory for differences. IPCC guidelines (2006 Section 3.2.3.1,
# 2019 Section 3.2.3.1) specify the denominator must be the absolute value of the
# difference |X - Y| for mathematically correct relative uncertainty calculations.

# Limitation: This approach assumes independence between scenario uncertainties, but
# scenarios using identical models and predictors are highly correlated. This results
# in conservative (overestimated) uncertainty bounds. Liang et al. (2023) has the same
# correlation limitation plus the mathematical error noted above.

# Forest classification from external dataset determines data availability per scenario
# External disturbance classification determines whether forest AGBD change occurred
# Uncertainty quantifies confidence in magnitude of forest AGBD change from disturbance

# References:
# - IPCC (2006) Guidelines Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# - IPCC (2019) Refinement Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# - Liang et al. (2023) Remote Sensing of Environment 284:113367

# Parameters:
#   mean1, mean2: Forest AGBD values for two scenarios/timepoints (Mg/ha)
#   uncertainty1, uncertainty2: Relative uncertainties as percentages (0-100)
# Returns: Relative uncertainty of forest AGBD change from disturbance as percentage (0-100)
def propagate_uncertainty(mean1, uncertainty1, mean2, uncertainty2):
  mean_diff = mean1 - mean2
  # Convert percentage uncertainties to decimals
  unc1_decimal = uncertainty1 / 100.0
  unc2_decimal = uncertainty2 / 100.0

  # Handle forest/non-forest transitions where one scenario has nodata (converted to 0 mean, 0 uncertainty)
  # Uncertainty reflects confidence in original forest AGBD estimate, not the forest mask
  deforestation_case = (unc1_decimal == 0) & (mean1 == 0) & (unc2_decimal != 0) & (mean2 != 0)
  reforestation_case = (unc1_decimal != 0) & (mean1 != 0) & (unc2_decimal == 0) & (mean2 == 0)

  # Convert to absolute uncertainties (same units as measurements) because IPCC error propagation
  # formula requires absolute values - relative uncertainties cannot be combined directly
  absolute_uncertainty1 = np.multiply(mean1, unc1_decimal)
  absolute_uncertainty2 = np.multiply(mean2, unc2_decimal)

  # Combine absolute uncertainties using IPCC error propagation formula
  # Applies when both scenarios contain forest AGBD estimates
  absolute_uncertainty_combined = np.sqrt(
      np.square(absolute_uncertainty1) +
      np.square(absolute_uncertainty2)
  )

  # Calculate relative uncertainty using |difference| as denominator (IPCC standard)
  denominator = np.abs(mean_diff)
  # Avoid division by zero using np.divide with where parameter
  standard_uncertainty = np.divide(absolute_uncertainty_combined, denominator,
                                 out=np.zeros_like(absolute_uncertainty_combined),
                                 where=(denominator != 0))

  # Apply uncertainty logic for forest AGBD change from disturbance measurements
  # Zero uncertainty when no disturbance classified: scenarios definitionally identical, not measurement-based
  # Uncertainty measures confidence in forest AGBD change magnitude given that disturbance occurred
  relative_uncertainty = np.where(
      deforestation_case, unc2_decimal,  # Forest > non-forest: uncertainty equals original forest AGBD uncertainty
      np.where(reforestation_case, unc1_decimal,  # Non-forest > forest: uncertainty equals new forest AGBD uncertainty
               np.where((mean_diff > 0) | (denominator == 0),  # No disturbance classified or forest AGBD gains
                       0,  # Scenarios definitionally identical when no forest disturbance occurred
                       standard_uncertainty)))  # Standard propagation for forest degradation cases

  # Convert back to percentage
  return np.round(relative_uncertainty * 100.0, uncertainty_precision)

# Determine processing mode based on source directory
use_uncertainty = source_dir == uncertainty_dir

# Progress tracking
total_operations = len(disturbance_since_dictionary) + len(degradation_since_dictionary) + len(deforestation_since_dictionary) + len(specific_year_effects_dictionary) + len(area_based_dictionary)
progress_index = 0
progress_label = widgets.Label(f"Disturbance calculation progress: {progress_index}/{total_operations}")

display(progress_label)

# 1. Process disturbance_since calculations
for (scenario1, scenario2), disturbance_name in disturbance_since_dictionary.items():
  if use_uncertainty:
      # Define filenames and paths for disturbance mean and uncertainty
      mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
      mean_path = join(disturbance_dir, mean_filename)
      uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
      uncertainty_path = join(disturbance_dir, uncertainty_filename)
      # Skip if both files already exist
      if exists(mean_path) and exists(uncertainty_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue

      scenario1_base = f"{scenario1}__{selected_model}"
      scenario2_base = f"{scenario2}__{selected_model}"
      # Define scenario paths, assert that both exist for both scenarios
      scenario1_mean_path = join(predictions_dir, f"mean__{scenario1_base}.tif")
      scenario1_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario1_base}.tif")
      scenario2_mean_path = join(predictions_dir, f"mean__{scenario2_base}.tif")
      scenario2_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario2_base}.tif")
      assert exists(scenario1_mean_path), f"mean__{scenario1_base}.tif does not exist."
      assert exists(scenario1_uncertainty_path), f"uncertainty__{scenario1_base}.tif does not exist."
      assert exists(scenario2_mean_path), f"mean__{scenario2_base}.tif does not exist."
      assert exists(scenario2_uncertainty_path), f"uncertainty__{scenario2_base}.tif does not exist."
      # Read arrays
      scenario1_mean = gdal.Open(scenario1_mean_path).ReadAsArray()
      scenario1_uncertainty = gdal.Open(scenario1_uncertainty_path).ReadAsArray()
      scenario2_mean = gdal.Open(scenario2_mean_path).ReadAsArray()
      scenario2_uncertainty = gdal.Open(scenario2_uncertainty_path).ReadAsArray()
      # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
      scenario1_mean = np.where((scenario1_mean == nodatavalue) & (scenario2_mean != nodatavalue), 0, scenario1_mean)
      scenario1_uncertainty = np.where((scenario1_uncertainty == nodatavalue) & (scenario2_uncertainty != nodatavalue), 0, scenario1_uncertainty)
      scenario2_mean = np.where((scenario2_mean == nodatavalue) & (scenario1_mean != nodatavalue), 0, scenario2_mean)
      scenario2_uncertainty = np.where((scenario2_uncertainty == nodatavalue) & (scenario1_uncertainty != nodatavalue), 0, scenario2_uncertainty)
      # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
      dist_mean_array = np.where(scenario1_mean == nodatavalue, nodatavalue, subtract_arrays(scenario1_mean, scenario2_mean))
      dist_uncertainty_array = np.where(scenario1_mean == nodatavalue, nodatavalue,
                                       propagate_uncertainty(scenario1_mean, scenario1_uncertainty, scenario2_mean, scenario2_uncertainty))
      # Round arrays before export
      dist_mean_array_rounded = np.round(dist_mean_array)
      # Set uncertainty to 0 where rounded difference is 0
      dist_uncertainty_array = np.where(dist_mean_array_rounded == 0, 0, dist_uncertainty_array)
      # Export disturbance rasters
      export_array_as_tif(dist_mean_array_rounded, mean_path, template=scenario1_mean_path)
      export_array_as_tif(dist_uncertainty_array, uncertainty_path, template=scenario1_mean_path)
  else:
      # Define filenames and paths for disturbance
      dist_filename = f"{disturbance_name}__{selected_model}.tif"
      dist_path = join(disturbance_dir, dist_filename)
      # Skip if file already exists
      if exists(dist_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue
      # Define scenario paths, assert that both exist for both scenarios
      scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
      assert exists(scenario1_path), f"{scenario1_path} does not exist."
      scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
      assert exists(scenario2_path), f"{scenario2_path} does not exist."
      # Read arrays
      scenario1_array_temp = gdal.Open(scenario1_path).ReadAsArray()
      scenario2_array_temp = gdal.Open(scenario2_path).ReadAsArray()
      # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
      scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
      scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)
      # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
      dist_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))
      # Round array before export
      dist_array_rounded = np.round(dist_array)
      # Export disturbance raster
      export_array_as_tif(dist_array_rounded, dist_path, template = scenario1_path)

  # Update progress
  progress_index += 1
  progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 2. Process degradation_since calculations
for (scenario1, scenario2), disturbance_name in degradation_since_dictionary.items():
  if use_uncertainty:
      # Define filenames and paths for disturbance mean and uncertainty
      mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
      mean_path = join(disturbance_dir, mean_filename)
      uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
      uncertainty_path = join(disturbance_dir, uncertainty_filename)
      # Skip if both files already exist
      if exists(mean_path) and exists(uncertainty_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue

      scenario1_base = f"{scenario1}__{selected_model}"
      scenario2_base = f"{scenario2}__{selected_model}"
      # Define scenario paths, assert that both exist for both scenarios
      scenario1_mean_path = join(predictions_dir, f"mean__{scenario1_base}.tif")
      scenario1_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario1_base}.tif")
      scenario2_mean_path = join(predictions_dir, f"mean__{scenario2_base}.tif")
      scenario2_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario2_base}.tif")
      assert exists(scenario1_mean_path), f"mean__{scenario1_base}.tif does not exist."
      assert exists(scenario1_uncertainty_path), f"uncertainty__{scenario1_base}.tif does not exist."
      assert exists(scenario2_mean_path), f"mean__{scenario2_base}.tif does not exist."
      assert exists(scenario2_uncertainty_path), f"uncertainty__{scenario2_base}.tif does not exist."
      # Read arrays
      scenario1_mean = gdal.Open(scenario1_mean_path).ReadAsArray()
      scenario1_uncertainty = gdal.Open(scenario1_uncertainty_path).ReadAsArray()
      scenario2_mean = gdal.Open(scenario2_mean_path).ReadAsArray()
      scenario2_uncertainty = gdal.Open(scenario2_uncertainty_path).ReadAsArray()
      # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
      scenario1_mean = np.where((scenario1_mean == nodatavalue) & (scenario2_mean != nodatavalue), 0, scenario1_mean)
      scenario1_uncertainty = np.where((scenario1_uncertainty == nodatavalue) & (scenario2_uncertainty != nodatavalue), 0, scenario1_uncertainty)
      scenario2_mean = np.where((scenario2_mean == nodatavalue) & (scenario1_mean != nodatavalue), 0, scenario2_mean)
      scenario2_uncertainty = np.where((scenario2_uncertainty == nodatavalue) & (scenario1_uncertainty != nodatavalue), 0, scenario2_uncertainty)
      # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
      dist_mean_array = np.where(scenario1_mean == nodatavalue, nodatavalue, subtract_arrays(scenario1_mean, scenario2_mean))
      dist_uncertainty_array = np.where(scenario1_mean == nodatavalue, nodatavalue,
                                       propagate_uncertainty(scenario1_mean, scenario1_uncertainty, scenario2_mean, scenario2_uncertainty))

      # Check for matching disturbance file and apply floor constraint
      if apply_degradation_floor and 'degradation_since' in disturbance_name:
          # construct matching disturbance filename
          equiv_disturbance_name = disturbance_name.replace('degradation_since', 'disturbance_since')
          equiv_dist_mean_filename = f"mean__{equiv_disturbance_name}__{selected_model}.tif"
          equiv_dist_uncertainty_filename = f"uncertainty__{equiv_disturbance_name}__{selected_model}.tif"

          # get list of files in disturbance directory
          disturbance_files = os.listdir(disturbance_dir)

          # check if matching files exist
          if equiv_dist_mean_filename in disturbance_files and equiv_dist_uncertainty_filename in disturbance_files:
              print(f"Applying floor constraint: {disturbance_name} will be constrained by {equiv_disturbance_name}")

              equiv_dist_mean_path = join(disturbance_dir, equiv_dist_mean_filename)
              equiv_dist_uncertainty_path = join(disturbance_dir, equiv_dist_uncertainty_filename)

              equiv_dist_mean = gdal.Open(equiv_dist_mean_path).ReadAsArray()
              equiv_dist_uncertainty = gdal.Open(equiv_dist_uncertainty_path).ReadAsArray()

              # Apply floor constraint: degradation cannot be more negative than disturbance
              floor_applied = ((dist_mean_array != nodatavalue) & (equiv_dist_mean != nodatavalue) &
                             (dist_mean_array < equiv_dist_mean))
              dist_mean_array = np.where(floor_applied, equiv_dist_mean, dist_mean_array)
              # Use disturbance uncertainty where floor constraint applied
              dist_uncertainty_array = np.where(floor_applied, equiv_dist_uncertainty, dist_uncertainty_array)
          else:
              print(f"No floor constraint applied: {equiv_disturbance_name} files not found for {disturbance_name}")

      # Round arrays before export
      dist_mean_array_rounded = np.round(dist_mean_array)
      # Set uncertainty to 0 where rounded difference is 0
      dist_uncertainty_array = np.where(dist_mean_array_rounded == 0, 0, dist_uncertainty_array)
      # Export disturbance rasters
      export_array_as_tif(dist_mean_array_rounded, mean_path, template=scenario1_mean_path)
      export_array_as_tif(dist_uncertainty_array, uncertainty_path, template=scenario1_mean_path)
  else:
      # Define filenames and paths for disturbance
      dist_filename = f"{disturbance_name}__{selected_model}.tif"
      dist_path = join(disturbance_dir, dist_filename)
      # Skip if file already exists
      if exists(dist_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue
      # Define scenario paths, assert that both exist for both scenarios
      scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
      assert exists(scenario1_path), f"{scenario1_path} does not exist."
      scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
      assert exists(scenario2_path), f"{scenario2_path} does not exist."
      # Read arrays
      scenario1_array_temp = gdal.Open(scenario1_path).ReadAsArray()
      scenario2_array_temp = gdal.Open(scenario2_path).ReadAsArray()
      # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
      scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
      scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)
      # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
      dist_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))

      # Check for matching disturbance file and apply floor constraint
      if apply_degradation_floor and 'degradation_since' in disturbance_name:
          # construct matching disturbance filename
          equiv_disturbance_name = disturbance_name.replace('degradation_since', 'disturbance_since')
          equiv_dist_filename = f"{equiv_disturbance_name}__{selected_model}.tif"

          # get list of files in disturbance directory
          disturbance_files = os.listdir(disturbance_dir)

          # check if matching file exists
          if equiv_dist_filename in disturbance_files:
              print(f"Applying floor constraint: {disturbance_name} will be constrained by {equiv_disturbance_name}")

              equiv_dist_path = join(disturbance_dir, equiv_dist_filename)
              equiv_dist_array = gdal.Open(equiv_dist_path).ReadAsArray()

              # Apply floor constraint: degradation cannot be more negative than disturbance
              dist_array = np.where(
                  (dist_array != nodatavalue) & (equiv_dist_array != nodatavalue) & (dist_array < equiv_dist_array),
                  equiv_dist_array,
                  dist_array
              )
          else:
              print(f"No floor constraint applied: {equiv_disturbance_name} file not found for {disturbance_name}")

      # Round array before export
      dist_array_rounded = np.round(dist_array)
      # Export disturbance raster
      export_array_as_tif(dist_array_rounded, dist_path, template = scenario1_path)

  # Update progress
  progress_index += 1
  progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 3. Process deforestation_since calculations
for (dist1_name, dist2_name), disturbance_name in deforestation_since_dictionary.items():
  if use_uncertainty:
      # Define filenames and paths of disturbance .tifs
      mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
      mean_path = join(disturbance_dir, mean_filename)
      uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
      uncertainty_path = join(disturbance_dir, uncertainty_filename)
      # Skip if both files already exist
      if exists(mean_path) and exists(uncertainty_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue
      # Define disturbance paths, assert that both exist
      dist1_mean_path = join(disturbance_dir, f"mean__{dist1_name}__{selected_model}.tif")
      dist1_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist1_name}__{selected_model}.tif")
      dist2_mean_path = join(disturbance_dir, f"mean__{dist2_name}__{selected_model}.tif")
      dist2_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist2_name}__{selected_model}.tif")
      assert exists(dist1_mean_path), f"{dist1_mean_path} does not exist."
      assert exists(dist1_uncertainty_path), f"{dist1_uncertainty_path} does not exist."
      assert exists(dist2_mean_path), f"{dist2_mean_path} does not exist."
      assert exists(dist2_uncertainty_path), f"{dist2_uncertainty_path} does not exist."
      # Read arrays
      dist1_mean = gdal.Open(dist1_mean_path).ReadAsArray()
      dist1_uncertainty = gdal.Open(dist1_uncertainty_path).ReadAsArray()
      dist2_mean = gdal.Open(dist2_mean_path).ReadAsArray()
      dist2_uncertainty = gdal.Open(dist2_uncertainty_path).ReadAsArray()
      # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
      dist1_mean = np.where((dist1_mean == nodatavalue) & (dist2_mean != nodatavalue), 0, dist1_mean)
      dist1_uncertainty = np.where((dist1_uncertainty == nodatavalue) & (dist2_uncertainty != nodatavalue), 0, dist1_uncertainty)
      dist2_mean = np.where((dist2_mean == nodatavalue) & (dist1_mean != nodatavalue), 0, dist2_mean)
      dist2_uncertainty = np.where((dist2_uncertainty == nodatavalue) & (dist1_uncertainty != nodatavalue), 0, dist2_uncertainty)
      # Create disturbance arrays where the value is not 'nodatavalue' in disturbance (second array)
      result_mean = np.where(dist2_mean == nodatavalue, nodatavalue, subtract_arrays(dist2_mean, dist1_mean))
      result_uncertainty = np.where(dist2_mean == nodatavalue, nodatavalue,
                                   propagate_uncertainty(dist2_mean, dist2_uncertainty, dist1_mean, dist1_uncertainty))
      # Round arrays before export
      result_mean_rounded = np.round(result_mean)
      # Set uncertainty to 0 where rounded difference is 0
      result_uncertainty = np.where(result_mean_rounded == 0, 0, result_uncertainty)
      # Export disturbance rasters
      export_array_as_tif(result_mean_rounded, mean_path, template=dist2_mean_path)
      export_array_as_tif(result_uncertainty, uncertainty_path, template=dist2_mean_path)
  else:
      # Define filenames and paths of disturbance .tifs
      output_filename = f"{disturbance_name}__{selected_model}.tif"
      output_path = join(disturbance_dir, output_filename)
      # Skip if file already exists
      if exists(output_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue
      # Define disturbance paths, assert that both exist
      dist1_path = join(disturbance_dir, f"{dist1_name}__{selected_model}.tif")
      assert exists(dist1_path), f"{dist1_path} does not exist."
      dist2_path = join(disturbance_dir, f"{dist2_name}__{selected_model}.tif")
      assert exists(dist2_path), f"{dist2_path} does not exist."
      # Read arrays
      dist1_array_temp = gdal.Open(dist1_path).ReadAsArray()
      dist2_array_temp = gdal.Open(dist2_path).ReadAsArray()
      # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
      dist1_array = np.where((dist1_array_temp == nodatavalue) & (dist2_array_temp != nodatavalue), 0, dist1_array_temp)
      dist2_array = np.where((dist2_array_temp == nodatavalue) & (dist1_array != nodatavalue), 0, dist2_array_temp)
      # Create disturbance arrays where the value is not 'nodatavalue' in disturbance (second array)
      result_array = np.where(dist2_array==nodatavalue, nodatavalue, subtract_arrays(dist2_array, dist1_array))
      # Round array before export
      result_array_rounded = np.round(result_array)
      # Export disturbance raster
      export_array_as_tif(result_array_rounded, output_path, template = dist2_path)

  # Update progress
  progress_index += 1
  progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 4. Process specific year effects
for dist_key, disturbance_name in specific_year_effects_dictionary.items():
  if len(dist_key) == 1:  # Direct export operation (same-year effect)
      source_name = dist_key[0]
      if use_uncertainty:
          # Define source and target filenames and paths
          source_mean_filename = f"mean__{source_name}__{selected_model}.tif"
          source_mean_path = join(disturbance_dir, source_mean_filename)
          source_uncertainty_filename = f"uncertainty__{source_name}__{selected_model}.tif"
          source_uncertainty_path = join(disturbance_dir, source_uncertainty_filename)
          target_mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
          target_mean_path = join(disturbance_dir, target_mean_filename)
          target_uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
          target_uncertainty_path = join(disturbance_dir, target_uncertainty_filename)
          # Skip if both target files already exist
          if exists(target_mean_path) and exists(target_uncertainty_path):
              progress_index += 1
              progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
              continue
          # Assert source files exist
          assert exists(source_mean_path), f"{source_mean_path} does not exist."
          assert exists(source_uncertainty_path), f"{source_uncertainty_path} does not exist."
          # Read arrays
          source_mean = gdal.Open(source_mean_path).ReadAsArray()
          source_uncertainty = gdal.Open(source_uncertainty_path).ReadAsArray()
          # Round arrays before export
          source_mean_rounded = np.round(source_mean)
          # Set uncertainty to 0 where rounded difference is 0
          source_uncertainty = np.where(source_mean_rounded == 0, 0, source_uncertainty)
          # Export arrays directly
          export_array_as_tif(source_mean_rounded, target_mean_path, template=source_mean_path)
          export_array_as_tif(source_uncertainty, target_uncertainty_path, template=source_mean_path)
      else:
          # Define source and target filenames and paths
          source_filename = f"{source_name}__{selected_model}.tif"
          source_path = join(disturbance_dir, source_filename)
          target_filename = f"{disturbance_name}__{selected_model}.tif"
          target_path = join(disturbance_dir, target_filename)
          # Skip if target file already exists
          if exists(target_path):
              progress_index += 1
              progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
              continue
          # Assert source file exists
          assert exists(source_path), f"{source_path} does not exist."
          # Read array
          source_array = gdal.Open(source_path).ReadAsArray()
          # Round array before export
          source_array_rounded = np.round(source_array)
          # Export array directly
          export_array_as_tif(source_array_rounded, target_path, template=source_path)
  else:  # Subtraction operation (two-element tuple)
      dist1_name, dist2_name = dist_key
      if use_uncertainty:
          # Define filenames and paths of disturbance .tifs
          mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
          mean_path = join(disturbance_dir, mean_filename)
          uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
          uncertainty_path = join(disturbance_dir, uncertainty_filename)
          # Skip if both files already exist
          if exists(mean_path) and exists(uncertainty_path):
              progress_index += 1
              progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
              continue
          # Define disturbance paths, assert that both exist
          dist1_mean_path = join(disturbance_dir, f"mean__{dist1_name}__{selected_model}.tif")
          dist1_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist1_name}__{selected_model}.tif")
          dist2_mean_path = join(disturbance_dir, f"mean__{dist2_name}__{selected_model}.tif")
          dist2_uncertainty_path = join(disturbance_dir, f"uncertainty__{dist2_name}__{selected_model}.tif")
          assert exists(dist1_mean_path), f"{dist1_mean_path} does not exist."
          assert exists(dist1_uncertainty_path), f"{dist1_uncertainty_path} does not exist."
          assert exists(dist2_mean_path), f"{dist2_mean_path} does not exist."
          assert exists(dist2_uncertainty_path), f"{dist2_uncertainty_path} does not exist."
          # Read arrays
          dist1_mean = gdal.Open(dist1_mean_path).ReadAsArray()
          dist1_uncertainty = gdal.Open(dist1_uncertainty_path).ReadAsArray()
          dist2_mean = gdal.Open(dist2_mean_path).ReadAsArray()
          dist2_uncertainty = gdal.Open(dist2_uncertainty_path).ReadAsArray()
          # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
          dist1_mean = np.where((dist1_mean == nodatavalue) & (dist2_mean != nodatavalue), 0, dist1_mean)
          dist1_uncertainty = np.where((dist1_uncertainty == nodatavalue) & (dist2_uncertainty != nodatavalue), 0, dist1_uncertainty)
          dist2_mean = np.where((dist2_mean == nodatavalue) & (dist1_mean != nodatavalue), 0, dist2_mean)
          dist2_uncertainty = np.where((dist2_uncertainty == nodatavalue) & (dist1_uncertainty != nodatavalue), 0, dist2_uncertainty)
          # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
          result_mean = np.where(dist1_mean == nodatavalue, nodatavalue, subtract_arrays(dist1_mean, dist2_mean))
          result_uncertainty = np.where(dist1_mean == nodatavalue, nodatavalue,
                                       propagate_uncertainty(dist1_mean, dist1_uncertainty, dist2_mean, dist2_uncertainty))
          # Round arrays before export
          result_mean_rounded = np.round(result_mean)
          # Set uncertainty to 0 where rounded difference is 0
          result_uncertainty = np.where(result_mean_rounded == 0, 0, result_uncertainty)
          # Export disturbance rasters
          export_array_as_tif(result_mean_rounded, mean_path, template=dist1_mean_path)
          export_array_as_tif(result_uncertainty, uncertainty_path, template=dist1_mean_path)
      else:
          # Define filenames and paths of disturbance .tifs
          output_filename = f"{disturbance_name}__{selected_model}.tif"
          output_path = join(disturbance_dir, output_filename)
          # Skip if file already exists
          if exists(output_path):
              progress_index += 1
              progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
              continue
          # Define disturbance paths, assert that both exist
          dist1_path = join(disturbance_dir, f"{dist1_name}__{selected_model}.tif")
          assert exists(dist1_path), f"{dist1_path} does not exist."
          dist2_path = join(disturbance_dir, f"{dist2_name}__{selected_model}.tif")
          assert exists(dist2_path), f"{dist2_path} does not exist."
          # Read arrays
          dist1_array_temp = gdal.Open(dist1_path).ReadAsArray()
          dist2_array_temp = gdal.Open(dist2_path).ReadAsArray()
          # Fill disturbance nodata values with 0 if they are not nodatavalues in the other disturbance
          dist1_array = np.where((dist1_array_temp == nodatavalue) & (dist2_array_temp != nodatavalue), 0, dist1_array_temp)
          dist2_array = np.where((dist2_array_temp == nodatavalue) & (dist1_array != nodatavalue), 0, dist2_array_temp)
          # Create disturbance arrays where the value is not 'nodatavalue' in both scenarios
          result_array = np.where(dist1_array==nodatavalue, nodatavalue, subtract_arrays(dist1_array, dist2_array))
          # Round array before export
          result_array_rounded = np.round(result_array)
          # Export disturbance raster
          export_array_as_tif(result_array_rounded, output_path, template = dist1_path)

  # Update progress
  progress_index += 1
  progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

# 5. Process area-based disturbances
for (scenario1, scenario2), disturbance_name in area_based_dictionary.items():
  if use_uncertainty:
      mean_filename = f"mean__{disturbance_name}__{selected_model}.tif"
      mean_path = join(disturbance_dir, mean_filename)
      uncertainty_filename = f"uncertainty__{disturbance_name}__{selected_model}.tif"
      uncertainty_path = join(disturbance_dir, uncertainty_filename)
      if exists(mean_path) and exists(uncertainty_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue
      scenario1_base = f"{scenario1}__{selected_model}"
      scenario2_base = f"{scenario2}__{selected_model}"
      scenario1_mean_path = join(predictions_dir, f"mean__{scenario1_base}.tif")
      scenario1_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario1_base}.tif")
      scenario2_mean_path = join(predictions_dir, f"mean__{scenario2_base}.tif")
      scenario2_uncertainty_path = join(predictions_dir, f"uncertainty__{scenario2_base}.tif")
      assert exists(scenario1_mean_path), f"mean__{scenario1_base}.tif does not exist."
      assert exists(scenario1_uncertainty_path), f"uncertainty__{scenario1_base}.tif does not exist."
      assert exists(scenario2_mean_path), f"mean__{scenario2_base}.tif does not exist."
      assert exists(scenario2_uncertainty_path), f"uncertainty__{scenario2_base}.tif does not exist."
      # Read arrays
      scenario1_mean = gdal.Open(scenario1_mean_path).ReadAsArray()
      scenario1_uncertainty = gdal.Open(scenario1_uncertainty_path).ReadAsArray()
      scenario2_mean = gdal.Open(scenario2_mean_path).ReadAsArray()
      scenario2_uncertainty = gdal.Open(scenario2_uncertainty_path).ReadAsArray()
      scenario1_mean = np.where((scenario1_mean == nodatavalue) & (scenario2_mean != nodatavalue), 0, scenario1_mean)
      scenario1_uncertainty = np.where((scenario1_uncertainty == nodatavalue) & (scenario2_uncertainty != nodatavalue), 0, scenario1_uncertainty)
      scenario2_mean = np.where((scenario2_mean == nodatavalue) & (scenario1_mean != nodatavalue), 0, scenario2_mean)
      scenario2_uncertainty = np.where((scenario2_uncertainty == nodatavalue) & (scenario1_uncertainty != nodatavalue), 0, scenario2_uncertainty)
      dist_mean_array = np.where(scenario1_mean == nodatavalue, nodatavalue, subtract_arrays(scenario1_mean, scenario2_mean))
      dist_uncertainty_array = np.where(scenario1_mean == nodatavalue, nodatavalue,
                                       propagate_uncertainty(scenario1_mean, scenario1_uncertainty, scenario2_mean, scenario2_uncertainty))
      # Round arrays before export
      dist_mean_array_rounded = np.round(dist_mean_array)
      # Set uncertainty to 0 where rounded difference is 0
      dist_uncertainty_array = np.where(dist_mean_array_rounded == 0, 0, dist_uncertainty_array)
      # Export disturbance rasters
      export_array_as_tif(dist_mean_array_rounded, mean_path, template=scenario1_mean_path)
      export_array_as_tif(dist_uncertainty_array, uncertainty_path, template=scenario1_mean_path)
  else:
      dist_filename = f"{disturbance_name}__{selected_model}.tif"
      dist_path = join(disturbance_dir, dist_filename)
      if exists(dist_path):
          progress_index += 1
          progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
          continue
      scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
      assert exists(scenario1_path), f"{scenario1_path} does not exist."
      scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
      assert exists(scenario2_path), f"{scenario2_path} does not exist."
      # Read arrays
      scenario1_array_temp = gdal.Open(scenario1_path).ReadAsArray()
      scenario2_array_temp = gdal.Open(scenario2_path).ReadAsArray()
      scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
      scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)
      dist_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))
      # Round array before export
      dist_array_rounded = np.round(dist_array)
      # Export disturbance raster
      export_array_as_tif(dist_array_rounded, dist_path, template = scenario1_path)

  progress_index += 1
  progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"

print("All disturbances calculated.")

# Intactness

## Percentage loss

In [None]:
# Intactness is measured as relative percentage loss of AGBD within an area of interest

# Select which baseline and disturbance raster to use for calculating intactness
# percentage and relative intactness. Ideally this is the scenario with the least disturbance
# and the difference between that and the current reality.

for baseline in os.listdir(predictions_dir):
  if source_dir == scenarios_dir: print(f"selected_baseline = '{baseline}'")
  if source_dir == uncertainty_dir:
    if 'mean' in baseline: print(f"selected_baseline = '{baseline}'")
for dist in os.listdir(disturbance_dir):
  if source_dir == scenarios_dir: print(f"selected_dist = '{dist}'")
  if source_dir == uncertainty_dir:
    if 'mean' in dist:print(f"selected_dist = '{dist}'")

In [None]:
# selected_baseline = 'mean__2021_no_disturbance_since_1993__agbd_tekai_250625_003858.tif'
# selected_dist = 'mean__2021_disturbance_since_1993__agbd_tekai_250625_003858.tif'
# selected_baseline = 'mean__2021_oldgrowth_all_land__agbd_tekai_250625_003858.tif'
# selected_dist = 'mean__2021_disturbance_since_oldgrowth__agbd_tekai_250625_003858.tif'
# selected_baseline = 'mean__2024_no_disturbance_since_1996__agbd_tekai_250625_003858.tif'
# selected_dist = 'mean__2024_disturbance_since_1996__agbd_tekai_250625_003858.tif'
selected_baseline = 'mean__2024_oldgrowth_all_land__agbd_tekai_250625_003858.tif'
selected_dist = 'mean__2024_disturbance_since_oldgrowth__agbd_tekai_250625_003858.tif'

# Define the baseline name based on source directory
if source_dir == scenarios_dir:
  base_dist_name = f"{selected_baseline.split('__')[0]}__{selected_dist.split('__')[0]}"
if source_dir == uncertainty_dir:
  base_dist_name = f"{selected_baseline.split('__')[1]}__{selected_dist.split('__')[1]}"
forest_mask_year = base_dist_name.split('_')[0]

intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
makedirs(intactness_baseline_dist_dir, exist_ok=True)

percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}.tif"
percentage_path = join(intactness_baseline_dist_dir, percentage_filename)

if not exists(percentage_path):
  # Define filenames and directories
  selected_baseline_path = join(predictions_dir, selected_baseline)
  selected_dist_path = join(disturbance_dir, selected_dist)
  selected_mask_path = join(masks_dir, f"mask_forest_{forest_mask_year}.tif")

  # Convert to arrays
  selected_baseline_array = gdal.Open(selected_baseline_path).ReadAsArray()
  selected_dist_array = gdal.Open(selected_dist_path).ReadAsArray()
  selected_mask_array = gdal.Open(selected_mask_path).ReadAsArray()

  # Create percentage array where the value is not 'nodatavalue' in any of the inputs
  percentage_array = np.where((selected_mask_array==nodatavalue) | (selected_baseline_array==nodatavalue) | (selected_dist_array==nodatavalue), nodatavalue,
                              selected_dist_array/selected_baseline_array*100)
  export_array_as_tif(percentage_array, percentage_path, template = selected_baseline_path)
  print(f"{percentage_filename} has been exported.")

else: print(f"{percentage_filename} already exists.")

## Quantiles (relative intactness)

In [None]:
# Use additional polygons for masking relative intactness quantiles
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']

# Select baseline / disturbance pairs to measure relative intactness
print("baseline_disturbance_pairs = [")
for dir in os.listdir(intactness_dir):
  print(f"'{dir}',")
print("]\n")

# Select polygons to mask and calculate quantiles
print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"'{polygon}',")
print(None)
print("]")

In [None]:
baseline_disturbance_pairs = [
'2021_no_disturbance_since_1993__2021_disturbance_since_1993',
'2021_oldgrowth_all_land__2021_disturbance_since_oldgrowth',
'2024_no_disturbance_since_1996__2024_disturbance_since_1996',
'2024_oldgrowth_all_land__2024_disturbance_since_oldgrowth',
]

mask_polygons = [
# 'project_area.gpkg',
# 'peninsular_malaysia.gpkg',
# 'lu_yong.gpkg',
# 'lu_yong_lipis.gpkg',
# 'lu_berkelah_jerantut.gpkg',
# 'lu_tekai_tembeling.gpkg',
# 'lu_ais.gpkg',
# 'lu_pa_taman_negara_krau.gpkg',
# 'lu_tekam.gpkg',
# 'lu_berkelah_temerloh.gpkg',
# 'lu_remen_chereh.gpkg',
# 'lu_berkelah_kuantan.gpkg',
'forest_reserves.gpkg',
# 'gedi_area.gpkg',
# None
]

# Convert nodata values inside the mask to a score of 0 (representing non-forest areas)
# Otherwise both non-forest and masked areas will be nodatavalue
convert_non_forest_nodatavalue_to_0 = True

# Define top score for intactness rating (e.g. 10 for 1 - 10 scale)
top_score = 10

# Margin for small variation in estimated change values due to uncertainty
# Values above this margin are considered 'intact'
top_score_margin = -0.01

# Calculate actual number of quantiles for non-zero values
num_quantiles = top_score - 1

print(f"Calculating {num_quantiles} quantiles for negative percentage change (scores 1-{num_quantiles}), with score {top_score} reserved for >= {top_score_margin} % change.\n")

# Create polygon mask array using template tif
template_array = gdal.Open(template_tif_path).ReadAsArray()
polygon_mask_array = np.ones_like(template_array, dtype=bool)

for mask_polygon in mask_polygons:
  if mask_polygon is not None:
    # Create an inverse project area path for masking
    template_polygon_path = join(polygons_dir, "template.gpkg")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    if not exists(inverse_polygon_path):
      polygon_path = join(polygons_dir, mask_polygon)
      template_polygon = gpd.read_file(template_polygon_path)
      polygon_read = gpd.read_file(polygon_path)
      polygon_crs = polygon_read.crs.to_epsg()
      inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
      inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
      inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
      print(f"An inverse masking polygon for {mask_polygon} has been created in {polygons_dir}.")
    else: print(f"An inverse masking polygon for {mask_polygon} already exists.")

    # Apply polygon mask to the array
    print(f"Creating a polygon masking array.")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    temp_mask_path = join(intactness_dir, f"temp_mask_{mask_polygon[:-5]}.tif")
    copyfile(template_tif_path, temp_mask_path)
    burn_polygon_to_raster(temp_mask_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
    temp_mask_array = gdal.Open(temp_mask_path).ReadAsArray()
    polygon_mask_array[temp_mask_array == nodatavalue] = False
    os.remove(temp_mask_path)

for base_dist_name in baseline_disturbance_pairs:
  intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
  percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}"
  percentage_path = join(intactness_baseline_dist_dir, f"{percentage_filename}.tif")

  for mask_polygon in mask_polygons:

    if mask_polygon is not None:
      # Copy the percentage raster for potential masking
      percentage_masked_filename = f"{percentage_filename}__masked_{mask_polygon[:-5]}.tif"
      percentage_masked_path = join(intactness_baseline_dist_dir, percentage_masked_filename)
      if not exists(percentage_masked_path):
        print(f"Copying {percentage_filename} for masking...")
        copyfile(percentage_path, percentage_masked_path)
        print(f"Masking {percentage_filename} with {mask_polygon}...")
        inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
        burn_polygon_to_raster(percentage_masked_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
        # Recompress the prediction after burning the polygon masks
        percentage_masked_array = gdal.Open(percentage_masked_path).ReadAsArray()
        export_array_as_tif(percentage_masked_array, percentage_masked_path, compress = True)
        print(f"{percentage_filename} masked.")
      else: print(f"{percentage_masked_filename} already exists.")

    # Define paths and arrays
    if mask_polygon is None: relative_intactness_name = f'intactness__{top_score}_quantiles__{base_dist_name}__{selected_model}'
    else: relative_intactness_name = f'intactness__{mask_polygon[:-5]}_{top_score}_quantiles__{base_dist_name}__{selected_model}'
    relative_intactness_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.tif')
    if not exists(relative_intactness_path):
      # Always track originally nodata pixels from the original percentage raster
      original_percentage_array = gdal.Open(percentage_path).ReadAsArray()
      originally_nodata_mask = original_percentage_array == nodatavalue

      # Apply polygon masking to percentage array using pre-created mask
      if mask_polygon is None:
        percentage_array = original_percentage_array.copy()
      else:
        percentage_array = original_percentage_array.copy()
        percentage_array[~polygon_mask_array] = nodatavalue

      # Capture original data for histogram before conversions
      original_valid_elements = percentage_array[percentage_array != nodatavalue]

      relative_intactness_array = np.empty_like(percentage_array, dtype=object)

      # Set all values above 0 to 0, assuming negative values are not intact
      percentage_array[percentage_array > 0] = 0

      # Set all values above the 'top score margin' to 0 to account for uncertainty
      percentage_array[percentage_array > top_score_margin] = 0

      # Separate valid and invalid (nodatavalue) elements
      valid_elements = percentage_array[percentage_array != nodatavalue]
      invalid_elements = percentage_array == nodatavalue

      # Separate zero and non-zero values, excluding originally nodata pixels from quantile calculation
      zero_elements = percentage_array == 0
      # Exclude pixels that were originally nodata from quantile calculation
      quantile_mask = (percentage_array != nodatavalue) & (~originally_nodata_mask) & (percentage_array != 0)
      non_zero_valid_elements = percentage_array[quantile_mask]

      # Calculate quantiles for non-zero valid elements only
      quantiles = np.percentile(non_zero_valid_elements, np.linspace(0, 100, num_quantiles + 1)[1:-1]) if len(non_zero_valid_elements) > 0 else []

      # Assign scores 1 to num_quantiles for non-zero values
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          relative_intactness_array[(percentage_array > lower_bound) & (percentage_array <= upper_bound) & (percentage_array != 0)] = i

      # Set all zero values to top score
      relative_intactness_array[zero_elements] = top_score

      # Set areas outside polygon to nodatavalue using pre-created mask
      if mask_polygon is not None:
        relative_intactness_array[~polygon_mask_array] = nodatavalue

      # Convert non-forest areas inside polygon to 0
      if convert_non_forest_nodatavalue_to_0:
        if mask_polygon is None:
          non_forest_inside_polygon = originally_nodata_mask
        else:
          non_forest_inside_polygon = originally_nodata_mask & polygon_mask_array
        relative_intactness_array[non_forest_inside_polygon] = 0

      export_array_as_tif(relative_intactness_array, relative_intactness_path)

      # Prepare data for CSV: Collect lower and upper bounds for each category
      ranges_data = {'Score': [], 'Lower_Bound': [], 'Upper_Bound': []}

      # Add ranges for scores 1 to num_quantiles (non-zero values)
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          if i == num_quantiles:
            upper_bound = top_score_margin - 0.000000001
          else:upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          ranges_data['Score'].append(i)
          ranges_data['Lower_Bound'].append(lower_bound)
          ranges_data['Upper_Bound'].append(upper_bound)

      # Add entry for top score (values from margin to 0)
      ranges_data['Score'].append(top_score)
      ranges_data['Lower_Bound'].append(top_score_margin)
      ranges_data['Upper_Bound'].append(0)

      # Create DataFrame and save to CSV
      relative_intactness_df = pd.DataFrame(ranges_data)
      relative_intactness_csv_path = os.path.join(intactness_baseline_dist_dir, f'{relative_intactness_name}.csv')
      relative_intactness_df.to_csv(relative_intactness_csv_path, index=False)

      # Generate and save histogram for converted data as .png
      histogram_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.png')
      plt.figure()
      counts, bins, patches = plt.hist(original_valid_elements.flatten(), bins='auto')

      # Count how many values became 0 after conversions
      zero_count_after_conversion = np.sum(valid_elements == 0)

      # Find the zero bin and set its frequency to 0
      zero_idx = next((i for i, (l, r) in enumerate(zip(bins[:-1], bins[1:])) if l <= 0 <= r), None)
      if zero_idx is not None:
          counts[zero_idx] = 0
          plt.clf()
          plt.bar(bins[:-1], counts, width=np.diff(bins), align='edge')
          x_center = (bins.min() + bins.max()) / 2
          y_max = max(counts)
          plt.text(x_center, y_max * 0.9,
                  f'0 value frequency = {zero_count_after_conversion:,}',
                  ha='center', va='center', fontweight='bold',
                  bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9))
      plt.title(f'{relative_intactness_name} Histogram')
      plt.xlabel('Value')
      plt.ylabel('Frequency')
      plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: format(int(x), ',')))
      plt.tight_layout()
      plt.savefig(histogram_path)
      plt.close()

    else: print(f"{relative_intactness_name} already exists.")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()