<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/8_differences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas
!pip install rasterio
!apt-get install -y gdal-bin

In [None]:
# Imports
import geopandas as gpd
from google.colab import runtime
from os import makedirs
from os.path import join, exists
from osgeo import gdal, ogr
gdal.UseExceptions()
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import rasterize
from shutil import copyfile

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
scenarios_dir = join(base_dir, "6_scenarios")
masks_dir = join(scenarios_dir, "scenario_masks")
uncertainty_dir = join(base_dir, "7_uncertainty")
differences_dir = join(base_dir, "8_differences")

# Create directories
makedirs(differences_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -11111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ['COMPRESS=ZSTD', 'ZSTD_LEVEL=1'] # Good speed / size ratio
    else: options = []
    if input_array.dtype == 'int16': dtype = gdal.GDT_Int16
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Global function: burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = vector = None
    try:
        raster = gdal.Open(raster_path, gdal.GA_Update)
        vector = ogr.Open(polygon_path)
        if not raster or not vector:
            raise ValueError("Cannot open input files")
        layer = vector.GetLayer()
        options = ["ALL_TOUCHED=TRUE"] if all_touched else []
        if fixed:
            gdal.RasterizeLayer(raster, [1], layer, burn_values=[fixed_value], options=options)
        else:
            attr_name = column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()
            options.append(f"ATTRIBUTE={attr_name}")
            gdal.RasterizeLayer(raster, [1], layer, options=options)
    finally:
        if raster: raster.FlushCache()
        raster = vector = None

# Select source and model

In [None]:
# Select if to source predictions from scenarios_dir or uncertainty_dir
# If available, uncertainty_dir should be selected so that uncertainty can
# be propagated and scenario 'mean' iteration values used.

# source_dir = uncertainty_dir
source_dir = scenarios_dir

print(f"{source_dir.split('/')[-1]} has been selected as the source directory for predictions")
print("to calculate disturbance and intactness.\n")

# If uncertainty selected, check it exists
if not exists(uncertainty_dir) and source_dir == uncertainty_dir:
  print("The uncertainty directory does not yet exist. Defaulting to scenarios directory.")
  source_dir = scenarios_dir

source_dir_name = f"{source_dir.split('_')[-1]}_dir"

# Select the model
for subdir in os.listdir(source_dir):
  if 'scenario_masks' not in subdir:
    print(f"selected_model = '{subdir}'")

In [None]:
selected_model = 'agbd_251203_161707'

selected_model_dir = join(source_dir, selected_model)
if source_dir == scenarios_dir: predictions_dir = join(selected_model_dir, 'scenario_predictions')
if source_dir == uncertainty_dir:
  predictions_dir = join(selected_model_dir, 'uncertainty_predictions')
  predictions_unmasked_dir = join(selected_model_dir, 'uncertainty_predictions_unmasked')

# Check predictions exist to calculate differences
if len(os.listdir(predictions_dir)) < 2: print(f"At least 2 predictions must exist in {source_dir} to calculate differences.")
else:
  model_differences_dir = join(differences_dir, f"{selected_model}_{source_dir_name}")
  disturbance_dir = join(model_differences_dir, 'disturbance')
  intactness_dir = join(model_differences_dir, 'intactness')
  makedirs(model_differences_dir, exist_ok=True)
  makedirs(disturbance_dir, exist_ok=True)
  makedirs(intactness_dir, exist_ok=True)

# Disturbance

## Define type and period

In [None]:
# Disturbance is measured as absolute AGBD loss
# This block builds dictionaries of disturbance options based on available files

# Extract all available scenarios from scenarios predictions directory
if source_dir == scenarios_dir:
  scenarios = set()
  for file in os.listdir(predictions_dir):
      scenarios.add(file.split("__")[0])

# OR Extract all available scenarios from uncertainty predictions directory
if source_dir == uncertainty_dir:
  prediction_stats = {}
  for file in os.listdir(predictions_dir):
      parts = file.split("__")
      if len(parts) >= 2:
          stat, scenario = parts[0], parts[1]
          if scenario not in prediction_stats:
              prediction_stats[scenario] = set()
          prediction_stats[scenario].add(stat)
  # Only keep scenarios that have both 'uncertainty' and 'mean' prediction stats
  scenarios = {prediction for prediction, stats in prediction_stats.items()
              if 'uncertainty' in stats and 'mean' in stats}

# Categorise years from scenarios
years = set()
plain_years = set()
oldgrowth_years = set()
oldgrowth_all_land_years = set()
for s in scenarios:
    if s.isdigit():
        years.add(int(s))
        plain_years.add(int(s))
    elif "_oldgrowth_all_land" in s:
        year = s.split("_oldgrowth_all_land")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_all_land_years.add(int(year))
    elif "_oldgrowth" in s:
        year = s.split("_oldgrowth")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_years.add(int(year))
    elif any(pattern in s for pattern in ["_no_disturbance_since_", "_no_degradation_since_"]):
        year = s.split("_")[0]
        if year.isdigit():
            years.add(int(year))
        if "_since_" in s:
            since_year = s.split("_since_")[1]
            if since_year.isdigit():
                years.add(int(since_year) - 1)
years_sorted = sorted(list(years))

# Output dictionaries
disturbance_since_dictionary = {}
degradation_since_dictionary = {}
deforestation_since_dictionary = {}
print("disturbance_since_dictionary = {")
print("")

# 1. Process disturbance_since scenarios
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)

        if year_a in plain_years and f"{a_str}_no_disturbance_since_{b_plus1}" in scenarios:
            print(f"# Disturbance in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{a_str}_no_disturbance_since_{b_plus1}'):")
            print(f"    '{a_str}_disturbance_since_{b_plus1}',")
            print("")
            disturbance_since_dictionary[(a_str, f"{a_str}_no_disturbance_since_{b_plus1}")] = f"{a_str}_disturbance_since_{b_plus1}"
# Process disturbance_since_oldgrowth scenarios
for year in years_sorted:
    y_str = str(year)
    if (year in plain_years and year in oldgrowth_all_land_years and f"{y_str}_oldgrowth_all_land" in scenarios):
        print(f"# Disturbance in {y_str} caused by events since an oldgrowth state.")
        print(f"  ('{y_str}', '{y_str}_oldgrowth_all_land'):")
        print(f"    '{y_str}_disturbance_since_oldgrowth',")
        print("")
        disturbance_since_dictionary[(y_str, f"{y_str}_oldgrowth_all_land")] = f"{y_str}_disturbance_since_oldgrowth"
print("}\n")

# 2. Degradation since dictionary
print("degradation_since_dictionary = {\n")
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)
        if year_a in plain_years and f"{a_str}_no_degradation_since_{b_plus1}" in scenarios:
            print(f"# Degradation in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{a_str}_no_degradation_since_{b_plus1}'):")
            print(f"    '{a_str}_degradation_since_{b_plus1}',")
            print("")
            degradation_since_dictionary[(a_str, f"{a_str}_no_degradation_since_{b_plus1}")] = f"{a_str}_degradation_since_{b_plus1}"
# Process degradation_since_oldgrowth scenarios
for year in years_sorted:
    y_str = str(year)
    if (year in plain_years and year in oldgrowth_years and f"{y_str}_oldgrowth" in scenarios):
        print(f"# Degradation in {y_str} caused by events since an old-growth state")
        print(f"  ('{y_str}', '{y_str}_oldgrowth'):")
        print(f"    '{y_str}_degradation_since_oldgrowth',")
        print("")
        degradation_since_dictionary[(y_str, f"{y_str}_oldgrowth")] = f"{y_str}_degradation_since_oldgrowth"
print("}\n")

# 3. Deforestation since dictionary
print("deforestation_since_dictionary = {\n")
for year_a in sorted(years_sorted):
    a_str = str(year_a)
    for year_b in sorted(years_sorted):
        if year_a <= year_b:
            continue
        b_str, b_plus1 = str(year_b), str(year_b + 1)
        deg_key = (a_str, f"{a_str}_no_degradation_since_{b_plus1}")
        dist_key = (a_str, f"{a_str}_no_disturbance_since_{b_plus1}")
        if deg_key in degradation_since_dictionary and dist_key in disturbance_since_dictionary:
            deg_result = degradation_since_dictionary[deg_key]
            dist_result = disturbance_since_dictionary[dist_key]
            defor_result = f"{a_str}_deforestation_since_{b_plus1}"
            print(f"# Deforestation in {a_str} caused by events since {b_plus1}")
            print(f"  ('{deg_result}', '{dist_result}'):")
            print(f"    '{defor_result}',")
            print("")
            deforestation_since_dictionary[(deg_result, dist_result)] = defor_result
# Process deforestation_since_oldgrowth scenarios
for year in years_sorted:
    y_str = str(year)
    deg_key = (y_str, f"{y_str}_oldgrowth")
    dist_key = (y_str, f"{y_str}_oldgrowth_all_land")

    if deg_key in degradation_since_dictionary and dist_key in disturbance_since_dictionary:
        print(f"# Deforestation in {y_str} caused by events since an old-growth state")
        deg_result = degradation_since_dictionary[deg_key]
        dist_result = disturbance_since_dictionary[dist_key]
        defor_result = f"{y_str}_deforestation_since_oldgrowth"
        print(f"  ('{deg_result}', '{dist_result}'):")
        print(f"    '{defor_result}',")
        print("")
        deforestation_since_dictionary[(deg_result, dist_result)] = defor_result
print("}\n")


# 4. Specific year effects dictionary
print("specific_year_effects_dictionary = {\n")
# Collect all since results and organise by year of interest and disturbance type
effects_by_year = {}
# Process degradation since results
for result_name in degradation_since_dictionary.values():
    if "_degradation_since_" in result_name and "_oldgrowth" not in result_name:
        parts = result_name.split("_degradation_since_")
        year_of_interest = parts[0]
        baseline_year = int(parts[1])
        if year_of_interest not in effects_by_year:
            effects_by_year[year_of_interest] = {}
        if 'degradation' not in effects_by_year[year_of_interest]:
            effects_by_year[year_of_interest]['degradation'] = {}
        effects_by_year[year_of_interest]['degradation'][baseline_year] = result_name
# Process disturbance since results
for result_name in disturbance_since_dictionary.values():
    if "_disturbance_since_" in result_name and "_oldgrowth" not in result_name:
        parts = result_name.split("_disturbance_since_")
        year_of_interest = parts[0]
        baseline_year = int(parts[1])
        if year_of_interest not in effects_by_year:
            effects_by_year[year_of_interest] = {}
        if 'disturbance' not in effects_by_year[year_of_interest]:
            effects_by_year[year_of_interest]['disturbance'] = {}
        effects_by_year[year_of_interest]['disturbance'][baseline_year] = result_name
# Process deforestation since results
for result_name in deforestation_since_dictionary.values():
    if "_deforestation_since_" in result_name and "_oldgrowth" not in result_name:
        parts = result_name.split("_deforestation_since_")
        year_of_interest = parts[0]
        baseline_year = int(parts[1])
        if year_of_interest not in effects_by_year:
            effects_by_year[year_of_interest] = {}
        if 'deforestation' not in effects_by_year[year_of_interest]:
            effects_by_year[year_of_interest]['deforestation'] = {}
        effects_by_year[year_of_interest]['deforestation'][baseline_year] = result_name
# Output dictionary entries grouped by year of interest and disturbance type
specific_year_effects_dictionary = {}
for year_of_interest in sorted(effects_by_year.keys()):
    year_effects = effects_by_year[year_of_interest]

    # Build all effects for this year first
    year_has_effects = False
    all_type_effects = {}

    # Process each disturbance type
    for dist_type in ['degradation', 'deforestation', 'disturbance']:
        if dist_type in year_effects:
            baseline_years = sorted(year_effects[dist_type].keys())
            # Find consecutive year pairs for specific year effects
            type_effects = []
            for i in range(len(baseline_years) - 1):
                current_year = baseline_years[i]
                next_year = baseline_years[i + 1]
                if next_year == current_year + 1:
                    since_current = year_effects[dist_type][current_year]
                    since_next = year_effects[dist_type][next_year]
                    effect_name = f"{year_of_interest}_effect_of_{dist_type}_in_{current_year}"
                    type_effects.append((since_current, since_next, effect_name, current_year))
                    specific_year_effects_dictionary[(since_current, since_next)] = effect_name
            # Add same-year effect (copy and rename)
            if baseline_years:
                last_year = max(baseline_years)
                if last_year == int(year_of_interest):
                    since_same_year = year_effects[dist_type][last_year]
                    same_year_effect = f"{year_of_interest}_effect_of_{dist_type}_in_{last_year}"
                    type_effects.append((since_same_year, None, same_year_effect, last_year))
                    specific_year_effects_dictionary[(since_same_year,)] = same_year_effect

            if type_effects:
                all_type_effects[dist_type] = type_effects
                year_has_effects = True

    # Only print if there are effects for this year
    if year_has_effects:
        print(f"# Effects in {year_of_interest}")
        for dist_type in ['degradation', 'deforestation', 'disturbance']:
            if dist_type in all_type_effects:
                print(f"  # {dist_type.capitalize()} effects")
                # Sort by effect year chronologically
                sorted_effects = sorted(all_type_effects[dist_type], key=lambda x: x[3])
                for since_current, since_next, effect_name, effect_year in sorted_effects:
                    if since_next is None:  # Same-year effect (copy and rename)
                        print(f"  ('{since_current}',):")
                        print(f"    '{effect_name}',")
                    else:  # Regular subtraction effect
                        print(f"  ('{since_current}', '{since_next}'):")
                        print(f"    '{effect_name}',")
                print("")
print("}\n")

# 5. Area-based dictionary
print("area_based_dictionary = {")
# Get polygon names from polygons directory
polygon_names = set()
if os.path.exists(polygons_dir):
    for file in os.listdir(polygons_dir):
        if file.endswith('.gpkg'):
            polygon_names.add(file[:-5])
area_based_entries = []
for scenario in scenarios:
    parts = scenario.split('_')
    # Check for deforestation (ends with "Xm_degradation_buffer")
    if len(parts) >= 5 and parts[-1] == 'buffer' and parts[-2] == 'degradation' and parts[-3].endswith('m'):
        alt_year, year_affix, dist_type = parts[0], parts[-4], parts[-5]
        polygon_name = '_'.join(parts[1:-5])
        if polygon_name in polygon_names and dist_type == 'deforestation':
            output_name = f"{alt_year}_deforestation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_name))
    # Check for degradation (ends with "degradation_YYYY")
    elif len(parts) >= 3 and parts[-2] == 'degradation' and parts[-1].isdigit() and len(parts[-1]) == 4:
        alt_year, year_affix = parts[0], parts[-1]
        polygon_name = '_'.join(parts[1:-2])
        if polygon_name in polygon_names:
            output_name = f"{alt_year}_degradation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_name))
if area_based_entries:
    print("\n# Area-based disturbance from alternate scenarios")
    for scenario, alt_year, output_name in sorted(area_based_entries):
        print(f"  ('{scenario}', '{alt_year}'):")
        print(f"    '{output_name}',")
print("}\n")

In [None]:
disturbance_since_dictionary = {

# Disturbance in 2024 caused by events since 1996
  ('2024', '2024_no_disturbance_since_1996'):
    '2024_disturbance_since_1996',

# Disturbance in 2024 caused by events since an oldgrowth state.
  ('2024', '2024_oldgrowth_all_land'):
    '2024_disturbance_since_oldgrowth',

}

degradation_since_dictionary = {

# Degradation in 2024 caused by events since 1996
  ('2024', '2024_no_degradation_since_1996'):
    '2024_degradation_since_1996',

# Degradation in 2024 caused by events since an old-growth state
  ('2024', '2024_oldgrowth'):
    '2024_degradation_since_oldgrowth',

}

deforestation_since_dictionary = {

# Deforestation in 2024 caused by events since 1996
  ('2024_degradation_since_1996', '2024_disturbance_since_1996'):
    '2024_deforestation_since_1996',

# Deforestation in 2024 caused by events since an old-growth state
  ('2024_degradation_since_oldgrowth', '2024_disturbance_since_oldgrowth'):
    '2024_deforestation_since_oldgrowth',

}

specific_year_effects_dictionary = {

}

area_based_dictionary = {

# Area-based disturbance from alternate scenarios
  ('2024_road_mat_daling_deforestation_2023_30m_degradation_buffer', '2024'):
    '2024_deforestation_of_road_mat_daling_2023',
}


In [None]:
# disturbance_since_dictionary = {

# # Disturbance in 2021 caused by events since 1993
#   ('2021', '2021_no_disturbance_since_1993'):
#     '2021_disturbance_since_1993',

# # Disturbance in 2024 caused by events since 1996
#   ('2024', '2024_no_disturbance_since_1996'):
#     '2024_disturbance_since_1996',

# # Disturbance in 2024 caused by events since 1997
#   ('2024', '2024_no_disturbance_since_1997'):
#     '2024_disturbance_since_1997',

# # Disturbance in 2024 caused by events since 1998
#   ('2024', '2024_no_disturbance_since_1998'):
#     '2024_disturbance_since_1998',

# # Disturbance in 2024 caused by events since 1999
#   ('2024', '2024_no_disturbance_since_1999'):
#     '2024_disturbance_since_1999',

# # Disturbance in 2024 caused by events since 2000
#   ('2024', '2024_no_disturbance_since_2000'):
#     '2024_disturbance_since_2000',

# # Disturbance in 2024 caused by events since 2001
#   ('2024', '2024_no_disturbance_since_2001'):
#     '2024_disturbance_since_2001',

# # Disturbance in 2024 caused by events since 2002
#   ('2024', '2024_no_disturbance_since_2002'):
#     '2024_disturbance_since_2002',

# # Disturbance in 2024 caused by events since 2003
#   ('2024', '2024_no_disturbance_since_2003'):
#     '2024_disturbance_since_2003',

# # Disturbance in 2024 caused by events since 2004
#   ('2024', '2024_no_disturbance_since_2004'):
#     '2024_disturbance_since_2004',

# # Disturbance in 2024 caused by events since 2005
#   ('2024', '2024_no_disturbance_since_2005'):
#     '2024_disturbance_since_2005',

# # Disturbance in 2024 caused by events since 2006
#   ('2024', '2024_no_disturbance_since_2006'):
#     '2024_disturbance_since_2006',

# # Disturbance in 2024 caused by events since 2007
#   ('2024', '2024_no_disturbance_since_2007'):
#     '2024_disturbance_since_2007',

# # Disturbance in 2024 caused by events since 2008
#   ('2024', '2024_no_disturbance_since_2008'):
#     '2024_disturbance_since_2008',

# # Disturbance in 2024 caused by events since 2009
#   ('2024', '2024_no_disturbance_since_2009'):
#     '2024_disturbance_since_2009',

# # Disturbance in 2024 caused by events since 2010
#   ('2024', '2024_no_disturbance_since_2010'):
#     '2024_disturbance_since_2010',

# # Disturbance in 2024 caused by events since 2011
#   ('2024', '2024_no_disturbance_since_2011'):
#     '2024_disturbance_since_2011',

# # Disturbance in 2024 caused by events since 2012
#   ('2024', '2024_no_disturbance_since_2012'):
#     '2024_disturbance_since_2012',

# # Disturbance in 2024 caused by events since 2013
#   ('2024', '2024_no_disturbance_since_2013'):
#     '2024_disturbance_since_2013',

# # Disturbance in 2024 caused by events since 2014
#   ('2024', '2024_no_disturbance_since_2014'):
#     '2024_disturbance_since_2014',

# # Disturbance in 2024 caused by events since 2015
#   ('2024', '2024_no_disturbance_since_2015'):
#     '2024_disturbance_since_2015',

# # Disturbance in 2024 caused by events since 2016
#   ('2024', '2024_no_disturbance_since_2016'):
#     '2024_disturbance_since_2016',

# # Disturbance in 2024 caused by events since 2017
#   ('2024', '2024_no_disturbance_since_2017'):
#     '2024_disturbance_since_2017',

# # Disturbance in 2024 caused by events since 2018
#   ('2024', '2024_no_disturbance_since_2018'):
#     '2024_disturbance_since_2018',

# # Disturbance in 2024 caused by events since 2019
#   ('2024', '2024_no_disturbance_since_2019'):
#     '2024_disturbance_since_2019',

# # Disturbance in 2024 caused by events since 2020
#   ('2024', '2024_no_disturbance_since_2020'):
#     '2024_disturbance_since_2020',

# # Disturbance in 2024 caused by events since 2021
#   ('2024', '2024_no_disturbance_since_2021'):
#     '2024_disturbance_since_2021',

# # Disturbance in 2024 caused by events since 2022
#   ('2024', '2024_no_disturbance_since_2022'):
#     '2024_disturbance_since_2022',

# # Disturbance in 2024 caused by events since 2023
#   ('2024', '2024_no_disturbance_since_2023'):
#     '2024_disturbance_since_2023',

# # Disturbance in 2024 caused by events since 2024
#   ('2024', '2024_no_disturbance_since_2024'):
#     '2024_disturbance_since_2024',

# # Disturbance in 2021 caused by events since an oldgrowth state.
#   ('2021', '2021_oldgrowth_all_land'):
#     '2021_disturbance_since_oldgrowth',

# # Disturbance in 2024 caused by events since an oldgrowth state.
#   ('2024', '2024_oldgrowth_all_land'):
#     '2024_disturbance_since_oldgrowth',

# }

# degradation_since_dictionary = {

# # Degradation in 2021 caused by events since 1993
#   ('2021', '2021_no_degradation_since_1993'):
#     '2021_degradation_since_1993',

# # Degradation in 2024 caused by events since 1996
#   ('2024', '2024_no_degradation_since_1996'):
#     '2024_degradation_since_1996',

# # Degradation in 2021 caused by events since an old-growth state
#   ('2021', '2021_oldgrowth'):
#     '2021_degradation_since_oldgrowth',

# # Degradation in 2024 caused by events since an old-growth state
#   ('2024', '2024_oldgrowth'):
#     '2024_degradation_since_oldgrowth',

# }

# deforestation_since_dictionary = {

# # Deforestation in 2021 caused by events since 1993
#   ('2021_degradation_since_1993', '2021_disturbance_since_1993'):
#     '2021_deforestation_since_1993',

# # Deforestation in 2024 caused by events since 1996
#   ('2024_degradation_since_1996', '2024_disturbance_since_1996'):
#     '2024_deforestation_since_1996',

# # Deforestation in 2021 caused by events since an old-growth state
#   ('2021_degradation_since_oldgrowth', '2021_disturbance_since_oldgrowth'):
#     '2021_deforestation_since_oldgrowth',

# # Deforestation in 2024 caused by events since an old-growth state
#   ('2024_degradation_since_oldgrowth', '2024_disturbance_since_oldgrowth'):
#     '2024_deforestation_since_oldgrowth',

# }

# specific_year_effects_dictionary = {

# # Effects in 2024
#   # Disturbance effects
#   ('2024_disturbance_since_1996', '2024_disturbance_since_1997'):
#     '2024_effect_of_disturbance_in_1996',
#   ('2024_disturbance_since_1997', '2024_disturbance_since_1998'):
#     '2024_effect_of_disturbance_in_1997',
#   ('2024_disturbance_since_1998', '2024_disturbance_since_1999'):
#     '2024_effect_of_disturbance_in_1998',
#   ('2024_disturbance_since_1999', '2024_disturbance_since_2000'):
#     '2024_effect_of_disturbance_in_1999',
#   ('2024_disturbance_since_2000', '2024_disturbance_since_2001'):
#     '2024_effect_of_disturbance_in_2000',
#   ('2024_disturbance_since_2001', '2024_disturbance_since_2002'):
#     '2024_effect_of_disturbance_in_2001',
#   ('2024_disturbance_since_2002', '2024_disturbance_since_2003'):
#     '2024_effect_of_disturbance_in_2002',
#   ('2024_disturbance_since_2003', '2024_disturbance_since_2004'):
#     '2024_effect_of_disturbance_in_2003',
#   ('2024_disturbance_since_2004', '2024_disturbance_since_2005'):
#     '2024_effect_of_disturbance_in_2004',
#   ('2024_disturbance_since_2005', '2024_disturbance_since_2006'):
#     '2024_effect_of_disturbance_in_2005',
#   ('2024_disturbance_since_2006', '2024_disturbance_since_2007'):
#     '2024_effect_of_disturbance_in_2006',
#   ('2024_disturbance_since_2007', '2024_disturbance_since_2008'):
#     '2024_effect_of_disturbance_in_2007',
#   ('2024_disturbance_since_2008', '2024_disturbance_since_2009'):
#     '2024_effect_of_disturbance_in_2008',
#   ('2024_disturbance_since_2009', '2024_disturbance_since_2010'):
#     '2024_effect_of_disturbance_in_2009',
#   ('2024_disturbance_since_2010', '2024_disturbance_since_2011'):
#     '2024_effect_of_disturbance_in_2010',
#   ('2024_disturbance_since_2011', '2024_disturbance_since_2012'):
#     '2024_effect_of_disturbance_in_2011',
#   ('2024_disturbance_since_2012', '2024_disturbance_since_2013'):
#     '2024_effect_of_disturbance_in_2012',
#   ('2024_disturbance_since_2013', '2024_disturbance_since_2014'):
#     '2024_effect_of_disturbance_in_2013',
#   ('2024_disturbance_since_2014', '2024_disturbance_since_2015'):
#     '2024_effect_of_disturbance_in_2014',
#   ('2024_disturbance_since_2015', '2024_disturbance_since_2016'):
#     '2024_effect_of_disturbance_in_2015',
#   ('2024_disturbance_since_2016', '2024_disturbance_since_2017'):
#     '2024_effect_of_disturbance_in_2016',
#   ('2024_disturbance_since_2017', '2024_disturbance_since_2018'):
#     '2024_effect_of_disturbance_in_2017',
#   ('2024_disturbance_since_2018', '2024_disturbance_since_2019'):
#     '2024_effect_of_disturbance_in_2018',
#   ('2024_disturbance_since_2019', '2024_disturbance_since_2020'):
#     '2024_effect_of_disturbance_in_2019',
#   ('2024_disturbance_since_2020', '2024_disturbance_since_2021'):
#     '2024_effect_of_disturbance_in_2020',
#   ('2024_disturbance_since_2021', '2024_disturbance_since_2022'):
#     '2024_effect_of_disturbance_in_2021',
#   ('2024_disturbance_since_2022', '2024_disturbance_since_2023'):
#     '2024_effect_of_disturbance_in_2022',
#   ('2024_disturbance_since_2023', '2024_disturbance_since_2024'):
#     '2024_effect_of_disturbance_in_2023',
#   ('2024_disturbance_since_2024',):
#     '2024_effect_of_disturbance_in_2024',

# }

# area_based_dictionary = {

# # Area-based disturbance from alternate scenarios
#   ('2024_road_mat_daling_deforestation_2023_30m_degradation_buffer', '2024'):
#     '2024_deforestation_of_road_mat_daling_2023',
# }


## Calculate disturbance

In [None]:
# Apply floor constraint preventing degradation from exceeding total disturbance.
# Conceptually, disturbance = degradation + deforestation.
# This can happen in rare cases where edge effects from non-forest have a positive
# predicted impact on AGBD, especially at high elevation.
apply_degradation_floor = False

# Caps all positive differences, again mainly from rare edge effect cases.
# Conceptually this is disturbance loss rather than effect of disturbance.
cap_positive_differences = False

# Precision settings for output rasters
mean_precision = 2
ci_precision = 2
uncertainty_precision = 2

# Confidence interval level for CI filename pattern
confidence_level = 95

# Calculate AGBD loss between two scenarios (array1 - array2).
# Returns negative values for disturbance losses.
# Gains will be negligible artifacts of float precision if scenario dictionary correct.
def subtract_arrays(array1, array2, cap_positive=False):
    diff = array1 - array2
    return np.where(diff > 0, 0, diff) if cap_positive else diff

# Propagate uncertainty for forest AGBD loss calculations using confidence intervals.
# Measures uncertainty of forest AGBD change from disturbance events only.

# Mathematical basis: For difference Z = X - Y with confidence intervals CI_x, CI_y:
# Combined CI: CI_z = √[CI_x² + CI_y²] (IPCC 2006, Eq. 3.2; 2019, Eq. 3.2A)
# Relative uncertainty: CI_z / |Z| = CI_z / |X - Y|

# Note: Liang et al. (2023) incorrectly used |X + Y| as denominator, violating standard
# uncertainty propagation theory for differences. IPCC guidelines (2006 Section 3.2.3.1,
# 2019 Section 3.2.3.1) specify the denominator must be the absolute value of the
# difference |X - Y| for mathematically correct relative uncertainty calculations.

# Limitation: This approach assumes independence between scenario uncertainties, but
# scenarios using identical models and predictors are highly correlated. This results
# in conservative (overestimated) uncertainty bounds. Liang et al. (2023) has the same
# correlation limitation plus the mathematical error noted above.

# Forest classification from external dataset determines data availability per scenario.
# External disturbance classification determines whether forest AGBD change occurred.
# Uncertainty quantifies confidence in magnitude of forest AGBD change from disturbance.

# References:
# - IPCC (2006) Guidelines Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# - IPCC (2019) Refinement Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# - Liang et al. (2023) Remote Sensing of Environment 284:113367

# Propagate uncertainty function
# mean1, mean2: Forest AGBD values for two scenarios/timepoints (Mg/ha)
# ci1, ci2: Confidence interval half-widths (Mg/ha)
# relative_uncertainty: Percentage (0-100)
# ci_combined: Absolute CI for downstream calculations
def propagate_uncertainty(mean1, ci1, mean2, ci2):
    mean_diff = mean1 - mean2
    # Handle forest/non-forest transitions where one scenario has nodata (converted to 0 mean, 0 CI).
    # Uncertainty reflects confidence in original forest AGBD estimate, not the forest mask.
    deforestation_case = (ci1 == 0) & (mean1 == 0) & (ci2 != 0) & (mean2 != 0)
    reforestation_case = (ci1 != 0) & (mean1 != 0) & (ci2 == 0) & (mean2 == 0)
    # Combine absolute uncertainties using IPCC error propagation formula.
    # Applies when both scenarios contain forest AGBD estimates.
    ci_combined = np.sqrt(np.square(ci1) + np.square(ci2))
    # Calculate relative uncertainty using absolute difference as denominator (IPCC standard).
    denominator = np.abs(mean_diff)
    standard_rel_unc = np.divide(ci_combined, denominator,
                                 out=np.zeros_like(ci_combined, dtype=np.float64),
                                 where=(denominator != 0))
    # Forest transition uncertainties.
    defor_rel_unc = np.divide(ci2, np.abs(mean2),
                              out=np.zeros_like(ci2, dtype=np.float64),
                              where=(mean2 != 0))
    refor_rel_unc = np.divide(ci1, np.abs(mean1),
                              out=np.zeros_like(ci1, dtype=np.float64),
                              where=(mean1 != 0))
    # Apply uncertainty logic for forest AGBD change from disturbance measurements.
    # Zero uncertainty when no disturbance classified: scenarios definitionally identical.
    # Uncertainty measures confidence in forest AGBD change magnitude given disturbance occurred.
    relative_uncertainty = np.where(
        deforestation_case, defor_rel_unc,
        np.where(reforestation_case, refor_rel_unc,
                 np.where((mean_diff > 0) | (denominator == 0), 0, standard_rel_unc)))
    return relative_uncertainty * 100.0, ci_combined

# Load raster as numpy array.
def load_raster(path):
    ds = gdal.Open(path)
    arr = ds.ReadAsArray()
    ds = None
    return arr

# Fill nodata with zero where partner array has valid data.
def fill_nodata_pair(arr1, arr2, nodata):
    arr1 = np.where((arr1 == nodata) & (arr2 != nodata), 0, arr1)
    arr2 = np.where((arr2 == nodata) & (arr1 != nodata), 0, arr2)
    return arr1, arr2

# Round array to precision; convert to int16 if precision is 0.
def round_array(arr, precision):
    rounded = np.round(arr, precision)
    return rounded.astype(np.int16) if precision == 0 else rounded

# Determine processing mode based on source directory
use_uncertainty = source_dir == uncertainty_dir

# Progress tracking
total_operations = (len(disturbance_since_dictionary) + len(degradation_since_dictionary) +
                    len(deforestation_since_dictionary) + len(specific_year_effects_dictionary) +
                    len(area_based_dictionary))
progress_index = 0
progress_label = widgets.Label(f"Disturbance calculation progress: {progress_index}/{total_operations}")
display(progress_label)


# 1. Process disturbance_since calculations
for (scenario1, scenario2), disturbance_name in disturbance_since_dictionary.items():
    # Define output paths
    if use_uncertainty:
        mean_output_path = join(disturbance_dir, f"mean__{disturbance_name}__{selected_model}.tif")
        ci_output_path = join(disturbance_dir, f"ci_{confidence_level}__{disturbance_name}__{selected_model}.tif")
        unc_output_path = join(disturbance_dir, f"uncertainty__{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(mean_output_path) and exists(ci_output_path) and exists(unc_output_path)
    else:
        output_path = join(disturbance_dir, f"{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(output_path)
    if outputs_exist:
        progress_index += 1
        progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
        continue
    # Define and validate input paths
    if use_uncertainty:
        scenario1_mean_path = join(predictions_dir, f"mean__{scenario1}__{selected_model}.tif")
        scenario1_ci_path = join(predictions_dir, f"ci_{confidence_level}__{scenario1}__{selected_model}.tif")
        scenario2_mean_path = join(predictions_dir, f"mean__{scenario2}__{selected_model}.tif")
        scenario2_ci_path = join(predictions_dir, f"ci_{confidence_level}__{scenario2}__{selected_model}.tif")
        assert exists(scenario1_mean_path), f"Missing: mean__{scenario1}__{selected_model}.tif"
        assert exists(scenario1_ci_path), f"Missing: ci_{confidence_level}__{scenario1}__{selected_model}.tif"
        assert exists(scenario2_mean_path), f"Missing: mean__{scenario2}__{selected_model}.tif"
        assert exists(scenario2_ci_path), f"Missing: ci_{confidence_level}__{scenario2}__{selected_model}.tif"
    else:
        scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
        scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
        assert exists(scenario1_path), f"Missing: {scenario1_path}"
        assert exists(scenario2_path), f"Missing: {scenario2_path}"
    # Load input rasters and fill nodata
    if use_uncertainty:
        scenario1_mean = load_raster(scenario1_mean_path)
        scenario2_mean = load_raster(scenario2_mean_path)
        scenario1_ci = load_raster(scenario1_ci_path)
        scenario2_ci = load_raster(scenario2_ci_path)
        scenario1_mean, scenario2_mean = fill_nodata_pair(scenario1_mean, scenario2_mean, nodatavalue)
        scenario1_ci, scenario2_ci = fill_nodata_pair(scenario1_ci, scenario2_ci, nodatavalue)
        template_path = scenario1_mean_path
    else:
        scenario1_arr = load_raster(scenario1_path)
        scenario2_arr = load_raster(scenario2_path)
        scenario1_arr, scenario2_arr = fill_nodata_pair(scenario1_arr, scenario2_arr, nodatavalue)
        template_path = scenario1_path
    # Calculate disturbance difference and export
    if use_uncertainty:
        disturbance_mean = subtract_arrays(scenario1_mean, scenario2_mean, cap_positive_differences)
        disturbance_mean = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_mean)
        disturbance_unc, disturbance_ci = propagate_uncertainty(
            scenario1_mean, scenario1_ci, scenario2_mean, scenario2_ci)
        disturbance_ci = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_ci)
        disturbance_unc = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_unc)
        # Zero uncertainty where mean rounds to zero
        disturbance_mean_rounded = round_array(disturbance_mean, mean_precision)
        zero_mean_mask = (disturbance_mean_rounded == 0)
        disturbance_ci = np.where(zero_mean_mask, 0, disturbance_ci)
        disturbance_unc = np.where(zero_mean_mask, 0, disturbance_unc)
        export_array_as_tif(disturbance_mean_rounded, mean_output_path, template=template_path)
        export_array_as_tif(round_array(disturbance_ci, ci_precision), ci_output_path, template=template_path)
        export_array_as_tif(round_array(disturbance_unc, uncertainty_precision), unc_output_path, template=template_path)
    else:
        disturbance_arr = subtract_arrays(scenario1_arr, scenario2_arr, cap_positive_differences)
        disturbance_arr = np.where(scenario1_arr == nodatavalue, nodatavalue, disturbance_arr)
        export_array_as_tif(round_array(disturbance_arr, mean_precision), output_path, template=template_path)
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"


# 2. Process degradation_since calculations
for (scenario1, scenario2), disturbance_name in degradation_since_dictionary.items():
    # Define output paths
    if use_uncertainty:
        mean_output_path = join(disturbance_dir, f"mean__{disturbance_name}__{selected_model}.tif")
        ci_output_path = join(disturbance_dir, f"ci_{confidence_level}__{disturbance_name}__{selected_model}.tif")
        unc_output_path = join(disturbance_dir, f"uncertainty__{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(mean_output_path) and exists(ci_output_path) and exists(unc_output_path)
    else:
        output_path = join(disturbance_dir, f"{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(output_path)
    if outputs_exist:
        progress_index += 1
        progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
        continue
    # Define and validate input paths
    if use_uncertainty:
        scenario1_mean_path = join(predictions_dir, f"mean__{scenario1}__{selected_model}.tif")
        scenario1_ci_path = join(predictions_dir, f"ci_{confidence_level}__{scenario1}__{selected_model}.tif")
        scenario2_mean_path = join(predictions_dir, f"mean__{scenario2}__{selected_model}.tif")
        scenario2_ci_path = join(predictions_dir, f"ci_{confidence_level}__{scenario2}__{selected_model}.tif")
        assert exists(scenario1_mean_path), f"Missing: mean__{scenario1}__{selected_model}.tif"
        assert exists(scenario1_ci_path), f"Missing: ci_{confidence_level}__{scenario1}__{selected_model}.tif"
        assert exists(scenario2_mean_path), f"Missing: mean__{scenario2}__{selected_model}.tif"
        assert exists(scenario2_ci_path), f"Missing: ci_{confidence_level}__{scenario2}__{selected_model}.tif"
    else:
        scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
        scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
        assert exists(scenario1_path), f"Missing: {scenario1_path}"
        assert exists(scenario2_path), f"Missing: {scenario2_path}"
    # Load input rasters and fill nodata
    if use_uncertainty:
        scenario1_mean = load_raster(scenario1_mean_path)
        scenario2_mean = load_raster(scenario2_mean_path)
        scenario1_ci = load_raster(scenario1_ci_path)
        scenario2_ci = load_raster(scenario2_ci_path)
        scenario1_mean, scenario2_mean = fill_nodata_pair(scenario1_mean, scenario2_mean, nodatavalue)
        scenario1_ci, scenario2_ci = fill_nodata_pair(scenario1_ci, scenario2_ci, nodatavalue)
        template_path = scenario1_mean_path
    else:
        scenario1_arr = load_raster(scenario1_path)
        scenario2_arr = load_raster(scenario2_path)
        scenario1_arr, scenario2_arr = fill_nodata_pair(scenario1_arr, scenario2_arr, nodatavalue)
        template_path = scenario1_path
    # Calculate degradation difference
    if use_uncertainty:
        disturbance_mean = subtract_arrays(scenario1_mean, scenario2_mean, cap_positive_differences)
        disturbance_mean = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_mean)
        disturbance_unc, disturbance_ci = propagate_uncertainty(
            scenario1_mean, scenario1_ci, scenario2_mean, scenario2_ci)
        disturbance_ci = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_ci)
        disturbance_unc = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_unc)
        # Apply floor constraint: degradation cannot exceed total disturbance
        if apply_degradation_floor:
            equiv_name = disturbance_name.replace('degradation_since', 'disturbance_since')
            equiv_mean_path = join(disturbance_dir, f"mean__{equiv_name}__{selected_model}.tif")
            equiv_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{equiv_name}__{selected_model}.tif")
            equiv_unc_path = join(disturbance_dir, f"uncertainty__{equiv_name}__{selected_model}.tif")
            if exists(equiv_mean_path) and exists(equiv_ci_path) and exists(equiv_unc_path):
                print(f"Applying floor constraint: {disturbance_name} constrained by {equiv_name}")
                equiv_mean = load_raster(equiv_mean_path)
                equiv_ci = load_raster(equiv_ci_path)
                equiv_unc = load_raster(equiv_unc_path)
                floor_mask = (disturbance_mean != nodatavalue) & (equiv_mean != nodatavalue) & (disturbance_mean < equiv_mean)
                disturbance_mean = np.where(floor_mask, equiv_mean, disturbance_mean)
                disturbance_ci = np.where(floor_mask, equiv_ci, disturbance_ci)
                disturbance_unc = np.where(floor_mask, equiv_unc, disturbance_unc)
            else: print(f"No floor constraint applied: {equiv_name} files not found")
        # Zero uncertainty where mean rounds to zero
        disturbance_mean_rounded = round_array(disturbance_mean, mean_precision)
        zero_mean_mask = (disturbance_mean_rounded == 0)
        disturbance_ci = np.where(zero_mean_mask, 0, disturbance_ci)
        disturbance_unc = np.where(zero_mean_mask, 0, disturbance_unc)
        export_array_as_tif(disturbance_mean_rounded, mean_output_path, template=template_path)
        export_array_as_tif(round_array(disturbance_ci, ci_precision), ci_output_path, template=template_path)
        export_array_as_tif(round_array(disturbance_unc, uncertainty_precision), unc_output_path, template=template_path)
    else:
        disturbance_arr = subtract_arrays(scenario1_arr, scenario2_arr, cap_positive_differences)
        disturbance_arr = np.where(scenario1_arr == nodatavalue, nodatavalue, disturbance_arr)
        # Apply floor constraint
        if apply_degradation_floor:
            equiv_name = disturbance_name.replace('degradation_since', 'disturbance_since')
            equiv_path = join(disturbance_dir, f"{equiv_name}__{selected_model}.tif")
            if exists(equiv_path):
                print(f"Applying floor constraint: {disturbance_name} constrained by {equiv_name}")
                equiv_arr = load_raster(equiv_path)
                disturbance_arr = np.where(
                    (disturbance_arr != nodatavalue) & (equiv_arr != nodatavalue) & (disturbance_arr < equiv_arr),
                    equiv_arr, disturbance_arr)
            else: print(f"No floor constraint applied: {equiv_name} file not found")
        export_array_as_tif(round_array(disturbance_arr, mean_precision), output_path, template=template_path)
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"


# 3. Process deforestation_since calculations (disturbance - degradation)
for (dist1_name, dist2_name), disturbance_name in deforestation_since_dictionary.items():
    # Define output paths
    if use_uncertainty:
        mean_output_path = join(disturbance_dir, f"mean__{disturbance_name}__{selected_model}.tif")
        ci_output_path = join(disturbance_dir, f"ci_{confidence_level}__{disturbance_name}__{selected_model}.tif")
        unc_output_path = join(disturbance_dir, f"uncertainty__{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(mean_output_path) and exists(ci_output_path) and exists(unc_output_path)
    else:
        output_path = join(disturbance_dir, f"{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(output_path)
    if outputs_exist:
        progress_index += 1
        progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
        continue
    # Define and validate input paths (from previously calculated disturbance outputs)
    if use_uncertainty:
        dist1_mean_path = join(disturbance_dir, f"mean__{dist1_name}__{selected_model}.tif")
        dist1_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{dist1_name}__{selected_model}.tif")
        dist2_mean_path = join(disturbance_dir, f"mean__{dist2_name}__{selected_model}.tif")
        dist2_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{dist2_name}__{selected_model}.tif")
        assert exists(dist1_mean_path), f"Missing: mean__{dist1_name}__{selected_model}.tif"
        assert exists(dist1_ci_path), f"Missing: ci_{confidence_level}__{dist1_name}__{selected_model}.tif"
        assert exists(dist2_mean_path), f"Missing: mean__{dist2_name}__{selected_model}.tif"
        assert exists(dist2_ci_path), f"Missing: ci_{confidence_level}__{dist2_name}__{selected_model}.tif"
    else:
        dist1_path = join(disturbance_dir, f"{dist1_name}__{selected_model}.tif")
        dist2_path = join(disturbance_dir, f"{dist2_name}__{selected_model}.tif")
        assert exists(dist1_path), f"Missing: {dist1_path}"
        assert exists(dist2_path), f"Missing: {dist2_path}"
    # Load input rasters and fill nodata
    if use_uncertainty:
        dist1_mean = load_raster(dist1_mean_path)
        dist2_mean = load_raster(dist2_mean_path)
        dist1_ci = load_raster(dist1_ci_path)
        dist2_ci = load_raster(dist2_ci_path)
        dist1_mean, dist2_mean = fill_nodata_pair(dist1_mean, dist2_mean, nodatavalue)
        dist1_ci, dist2_ci = fill_nodata_pair(dist1_ci, dist2_ci, nodatavalue)
        template_path = dist2_mean_path
    else:
        dist1_arr = load_raster(dist1_path)
        dist2_arr = load_raster(dist2_path)
        dist1_arr, dist2_arr = fill_nodata_pair(dist1_arr, dist2_arr, nodatavalue)
        template_path = dist2_path
    # Calculate deforestation (dist2 - dist1) and export
    if use_uncertainty:
        result_mean = subtract_arrays(dist2_mean, dist1_mean, cap_positive_differences)
        result_mean = np.where(dist2_mean == nodatavalue, nodatavalue, result_mean)
        result_unc, result_ci = propagate_uncertainty(dist2_mean, dist2_ci, dist1_mean, dist1_ci)
        result_ci = np.where(dist2_mean == nodatavalue, nodatavalue, result_ci)
        result_unc = np.where(dist2_mean == nodatavalue, nodatavalue, result_unc)
        # Zero uncertainty where mean rounds to zero
        result_mean_rounded = round_array(result_mean, mean_precision)
        zero_mean_mask = (result_mean_rounded == 0)
        result_ci = np.where(zero_mean_mask, 0, result_ci)
        result_unc = np.where(zero_mean_mask, 0, result_unc)
        export_array_as_tif(result_mean_rounded, mean_output_path, template=template_path)
        export_array_as_tif(round_array(result_ci, ci_precision), ci_output_path, template=template_path)
        export_array_as_tif(round_array(result_unc, uncertainty_precision), unc_output_path, template=template_path)
    else:
        result_arr = subtract_arrays(dist2_arr, dist1_arr, cap_positive_differences)
        result_arr = np.where(dist2_arr == nodatavalue, nodatavalue, result_arr)
        export_array_as_tif(round_array(result_arr, mean_precision), output_path, template=template_path)
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"


# 4. Process specific_year_effects calculations
for dist_key, disturbance_name in specific_year_effects_dictionary.items():
    if len(dist_key) == 1:
        # Direct copy (same-year effect)
        source_name = dist_key[0]
        if use_uncertainty:
            src_mean_path = join(disturbance_dir, f"mean__{source_name}__{selected_model}.tif")
            src_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{source_name}__{selected_model}.tif")
            src_unc_path = join(disturbance_dir, f"uncertainty__{source_name}__{selected_model}.tif")
            tgt_mean_path = join(disturbance_dir, f"mean__{disturbance_name}__{selected_model}.tif")
            tgt_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{disturbance_name}__{selected_model}.tif")
            tgt_unc_path = join(disturbance_dir, f"uncertainty__{disturbance_name}__{selected_model}.tif")
            outputs_exist = exists(tgt_mean_path) and exists(tgt_ci_path) and exists(tgt_unc_path)
        else:
            src_path = join(disturbance_dir, f"{source_name}__{selected_model}.tif")
            tgt_path = join(disturbance_dir, f"{disturbance_name}__{selected_model}.tif")
            outputs_exist = exists(tgt_path)
        if outputs_exist:
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Copy source to target (source files already rounded)
        if use_uncertainty:
            assert exists(src_mean_path), f"Missing: mean__{source_name}__{selected_model}.tif"
            assert exists(src_ci_path), f"Missing: ci_{confidence_level}__{source_name}__{selected_model}.tif"
            assert exists(src_unc_path), f"Missing: uncertainty__{source_name}__{selected_model}.tif"
            src_mean = load_raster(src_mean_path)
            src_ci = load_raster(src_ci_path)
            src_unc = load_raster(src_unc_path)
            # Zero uncertainty where mean rounds to zero
            src_mean_rounded = round_array(src_mean, mean_precision)
            zero_mean_mask = (src_mean_rounded == 0)
            src_ci = np.where(zero_mean_mask, 0, src_ci)
            src_unc = np.where(zero_mean_mask, 0, src_unc)
            export_array_as_tif(src_mean_rounded, tgt_mean_path, template=src_mean_path)
            export_array_as_tif(src_ci, tgt_ci_path, template=src_mean_path)
            export_array_as_tif(src_unc, tgt_unc_path, template=src_mean_path)
        else:
            assert exists(src_path), f"Missing: {src_path}"
            export_array_as_tif(round_array(load_raster(src_path), mean_precision), tgt_path, template=src_path)
    else:
        # Subtraction (dist1 - dist2)
        dist1_name, dist2_name = dist_key
        if use_uncertainty:
            mean_output_path = join(disturbance_dir, f"mean__{disturbance_name}__{selected_model}.tif")
            ci_output_path = join(disturbance_dir, f"ci_{confidence_level}__{disturbance_name}__{selected_model}.tif")
            unc_output_path = join(disturbance_dir, f"uncertainty__{disturbance_name}__{selected_model}.tif")
            outputs_exist = exists(mean_output_path) and exists(ci_output_path) and exists(unc_output_path)
        else:
            output_path = join(disturbance_dir, f"{disturbance_name}__{selected_model}.tif")
            outputs_exist = exists(output_path)
        if outputs_exist:
            progress_index += 1
            progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
            continue
        # Define and validate input paths
        if use_uncertainty:
            dist1_mean_path = join(disturbance_dir, f"mean__{dist1_name}__{selected_model}.tif")
            dist1_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{dist1_name}__{selected_model}.tif")
            dist2_mean_path = join(disturbance_dir, f"mean__{dist2_name}__{selected_model}.tif")
            dist2_ci_path = join(disturbance_dir, f"ci_{confidence_level}__{dist2_name}__{selected_model}.tif")
            assert exists(dist1_mean_path), f"Missing: mean__{dist1_name}__{selected_model}.tif"
            assert exists(dist1_ci_path), f"Missing: ci_{confidence_level}__{dist1_name}__{selected_model}.tif"
            assert exists(dist2_mean_path), f"Missing: mean__{dist2_name}__{selected_model}.tif"
            assert exists(dist2_ci_path), f"Missing: ci_{confidence_level}__{dist2_name}__{selected_model}.tif"
        else:
            dist1_path = join(disturbance_dir, f"{dist1_name}__{selected_model}.tif")
            dist2_path = join(disturbance_dir, f"{dist2_name}__{selected_model}.tif")
            assert exists(dist1_path), f"Missing: {dist1_path}"
            assert exists(dist2_path), f"Missing: {dist2_path}"
        # Load input rasters and fill nodata
        if use_uncertainty:
            dist1_mean = load_raster(dist1_mean_path)
            dist2_mean = load_raster(dist2_mean_path)
            dist1_ci = load_raster(dist1_ci_path)
            dist2_ci = load_raster(dist2_ci_path)
            dist1_mean, dist2_mean = fill_nodata_pair(dist1_mean, dist2_mean, nodatavalue)
            dist1_ci, dist2_ci = fill_nodata_pair(dist1_ci, dist2_ci, nodatavalue)
            template_path = dist1_mean_path
        else:
            dist1_arr = load_raster(dist1_path)
            dist2_arr = load_raster(dist2_path)
            dist1_arr, dist2_arr = fill_nodata_pair(dist1_arr, dist2_arr, nodatavalue)
            template_path = dist1_path
        # Calculate difference (dist1 - dist2) and export
        if use_uncertainty:
            result_mean = subtract_arrays(dist1_mean, dist2_mean, cap_positive_differences)
            result_mean = np.where(dist1_mean == nodatavalue, nodatavalue, result_mean)
            result_unc, result_ci = propagate_uncertainty(dist1_mean, dist1_ci, dist2_mean, dist2_ci)
            result_ci = np.where(dist1_mean == nodatavalue, nodatavalue, result_ci)
            result_unc = np.where(dist1_mean == nodatavalue, nodatavalue, result_unc)
            # Zero uncertainty where mean rounds to zero
            result_mean_rounded = round_array(result_mean, mean_precision)
            zero_mean_mask = (result_mean_rounded == 0)
            result_ci = np.where(zero_mean_mask, 0, result_ci)
            result_unc = np.where(zero_mean_mask, 0, result_unc)
            export_array_as_tif(result_mean_rounded, mean_output_path, template=template_path)
            export_array_as_tif(round_array(result_ci, ci_precision), ci_output_path, template=template_path)
            export_array_as_tif(round_array(result_unc, uncertainty_precision), unc_output_path, template=template_path)
        else:
            result_arr = subtract_arrays(dist1_arr, dist2_arr, cap_positive_differences)
            result_arr = np.where(dist1_arr == nodatavalue, nodatavalue, result_arr)
            export_array_as_tif(round_array(result_arr, mean_precision), output_path, template=template_path)
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"


# 5. Process area_based calculations
for (scenario1, scenario2), disturbance_name in area_based_dictionary.items():
    # Define output paths
    if use_uncertainty:
        mean_output_path = join(disturbance_dir, f"mean__{disturbance_name}__{selected_model}.tif")
        ci_output_path = join(disturbance_dir, f"ci_{confidence_level}__{disturbance_name}__{selected_model}.tif")
        unc_output_path = join(disturbance_dir, f"uncertainty__{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(mean_output_path) and exists(ci_output_path) and exists(unc_output_path)
    else:
        output_path = join(disturbance_dir, f"{disturbance_name}__{selected_model}.tif")
        outputs_exist = exists(output_path)
    if outputs_exist:
        progress_index += 1
        progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"
        continue
    # Define and validate input paths
    if use_uncertainty:
        scenario1_mean_path = join(predictions_dir, f"mean__{scenario1}__{selected_model}.tif")
        scenario1_ci_path = join(predictions_dir, f"ci_{confidence_level}__{scenario1}__{selected_model}.tif")
        scenario2_mean_path = join(predictions_dir, f"mean__{scenario2}__{selected_model}.tif")
        scenario2_ci_path = join(predictions_dir, f"ci_{confidence_level}__{scenario2}__{selected_model}.tif")
        assert exists(scenario1_mean_path), f"Missing: mean__{scenario1}__{selected_model}.tif"
        assert exists(scenario1_ci_path), f"Missing: ci_{confidence_level}__{scenario1}__{selected_model}.tif"
        assert exists(scenario2_mean_path), f"Missing: mean__{scenario2}__{selected_model}.tif"
        assert exists(scenario2_ci_path), f"Missing: ci_{confidence_level}__{scenario2}__{selected_model}.tif"
    else:
        scenario1_path = join(predictions_dir, f"{scenario1}__{selected_model}.tif")
        scenario2_path = join(predictions_dir, f"{scenario2}__{selected_model}.tif")
        assert exists(scenario1_path), f"Missing: {scenario1_path}"
        assert exists(scenario2_path), f"Missing: {scenario2_path}"
    # Load input rasters and fill nodata
    if use_uncertainty:
        scenario1_mean = load_raster(scenario1_mean_path)
        scenario2_mean = load_raster(scenario2_mean_path)
        scenario1_ci = load_raster(scenario1_ci_path)
        scenario2_ci = load_raster(scenario2_ci_path)
        scenario1_mean, scenario2_mean = fill_nodata_pair(scenario1_mean, scenario2_mean, nodatavalue)
        scenario1_ci, scenario2_ci = fill_nodata_pair(scenario1_ci, scenario2_ci, nodatavalue)
        template_path = scenario1_mean_path
    else:
        scenario1_arr = load_raster(scenario1_path)
        scenario2_arr = load_raster(scenario2_path)
        scenario1_arr, scenario2_arr = fill_nodata_pair(scenario1_arr, scenario2_arr, nodatavalue)
        template_path = scenario1_path
    # Calculate area-based disturbance difference and export
    if use_uncertainty:
        disturbance_mean = subtract_arrays(scenario1_mean, scenario2_mean, cap_positive_differences)
        disturbance_mean = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_mean)
        disturbance_unc, disturbance_ci = propagate_uncertainty(
            scenario1_mean, scenario1_ci, scenario2_mean, scenario2_ci)
        disturbance_ci = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_ci)
        disturbance_unc = np.where(scenario1_mean == nodatavalue, nodatavalue, disturbance_unc)
        # Zero uncertainty where mean rounds to zero
        disturbance_mean_rounded = round_array(disturbance_mean, mean_precision)
        zero_mean_mask = (disturbance_mean_rounded == 0)
        disturbance_ci = np.where(zero_mean_mask, 0, disturbance_ci)
        disturbance_unc = np.where(zero_mean_mask, 0, disturbance_unc)
        export_array_as_tif(disturbance_mean_rounded, mean_output_path, template=template_path)
        export_array_as_tif(round_array(disturbance_ci, ci_precision), ci_output_path, template=template_path)
        export_array_as_tif(round_array(disturbance_unc, uncertainty_precision), unc_output_path, template=template_path)
    else:
        disturbance_arr = subtract_arrays(scenario1_arr, scenario2_arr, cap_positive_differences)
        disturbance_arr = np.where(scenario1_arr == nodatavalue, nodatavalue, disturbance_arr)
        export_array_as_tif(round_array(disturbance_arr, mean_precision), output_path, template=template_path)
    progress_index += 1
    progress_label.value = f"Disturbance calculation progress: {progress_index}/{total_operations}"


print("All disturbances calculated.")

# Intactness

## Percentage loss

In [None]:
# Intactness is measured as relative percentage loss of AGBD within an area of interest

# Select which baseline and disturbance raster to use for calculating intactness
# percentage and relative intactness. Ideally this is the scenario with the least disturbance
# and the difference between that and the current reality.

for baseline in os.listdir(predictions_dir):
  if source_dir == scenarios_dir: print(f"selected_baseline = '{'__'.join(baseline.split('__')[0:-1])}'")
  if source_dir == uncertainty_dir:
    if 'mean' in baseline: print(f"selected_baseline = '{'__'.join(baseline.split('__')[1:-1])}'")
for dist in os.listdir(disturbance_dir):
  if source_dir == scenarios_dir: print(f"selected_dist = '{'__'.join(dist.split('__')[0:-1])}'")
  if source_dir == uncertainty_dir:
    if 'mean' in dist:print(f"selected_dist = '{'__'.join(dist.split('__')[1:-1])}'")

In [None]:
# selected_baseline = '2021_no_disturbance_since_1993'
# selected_dist = '2021_disturbance_since_1993'
# selected_baseline = '2021_oldgrowth_all_land'
# selected_dist = '2021_disturbance_since_oldgrowth'
# selected_baseline = '2024_no_disturbance_since_1996'
# selected_dist = '2024_disturbance_since_1996'
selected_baseline = '2024_oldgrowth_all_land'
selected_dist = '2024_disturbance_since_oldgrowth'

percentage_loss_precision = 0

# Define the baseline name based on source directory
if source_dir == scenarios_dir:
  base_dist_name = f"{selected_baseline.split('__')[0]}__{selected_dist.split('__')[0]}"
if source_dir == uncertainty_dir:
  base_dist_name = f"{selected_baseline.split('__')[0]}__{selected_dist.split('__')[0]}"
forest_mask_year = base_dist_name.split('_')[0]

intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
makedirs(intactness_baseline_dist_dir, exist_ok=True)

percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}.tif"
percentage_path = join(intactness_baseline_dist_dir, percentage_filename)

if not exists(percentage_path):
  # Define filenames and directories
  if source_dir == scenarios_dir:
    selected_baseline_path = join(predictions_dir, f"{selected_baseline}__{selected_model}.tif")
    selected_dist_path = join(disturbance_dir, f"{selected_dist}__{selected_model}.tif")
  if source_dir == uncertainty_dir:
    selected_baseline_path = join(predictions_dir, f"mean__{selected_baseline}__{selected_model}.tif")
    selected_dist_path = join(disturbance_dir, f"mean__{selected_dist}__{selected_model}.tif")
  selected_mask_path = join(masks_dir, f"mask_forest_{forest_mask_year}.tif")

  # Convert to arrays
  selected_baseline = gdal.Open(selected_baseline_path)
  selected_baseline_array = selected_baseline.ReadAsArray()
  selected_baseline = None
  selected_dist = gdal.Open(selected_dist_path)
  selected_dist_array = selected_dist.ReadAsArray()
  selected_dist = None
  selected_mask = gdal.Open(selected_mask_path)
  selected_mask_array = selected_mask.ReadAsArray()
  selected_mask = None

  # Create percentage array where the value is not 'nodatavalue' in any of the inputs
  percentage_array = np.full_like(selected_baseline_array, nodatavalue, dtype=np.float64)
  valid_mask = (selected_mask_array!=nodatavalue) & (selected_baseline_array!=nodatavalue) & (selected_dist_array!=nodatavalue)
  zero_baseline = valid_mask & (selected_baseline_array==0)
  nonzero_baseline = valid_mask & (selected_baseline_array!=0)
  percentage_array[zero_baseline] = 0
  percentage_array[nonzero_baseline] = (selected_dist_array[nonzero_baseline] / selected_baseline_array[nonzero_baseline]) * 100
  if percentage_loss_precision == 0: percentage_array = np.round(percentage_array, percentage_loss_precision).astype(np.int16)
  else: percentage_array = np.round(percentage_array, percentage_loss_precision)
  export_array_as_tif(percentage_array, percentage_path, template = selected_baseline_path)
  print(f"{percentage_filename} has been exported.")

else: print(f"{percentage_filename} already exists.")

## Quantiles (relative intactness)

In [None]:
# Use additional polygons for masking relative intactness quantiles
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']

# Select baseline / disturbance pairs to measure relative intactness
print("baseline_disturbance_pairs = [")
for dir in os.listdir(intactness_dir):
  print(f"'{dir}',")
print("]\n")

# Select polygons to mask and calculate quantiles
print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"'{polygon}',")
print(None)
print("]")

In [None]:
baseline_disturbance_pairs = [
'2024_oldgrowth_all_land__2024_disturbance_since_oldgrowth',
]

mask_polygons = [
# 'project_area.gpkg',
# 'peninsular_malaysia.gpkg',
# 'lu_yong.gpkg',
# 'lu_yong_lipis.gpkg',
# 'lu_berkelah_jerantut.gpkg',
# 'lu_tekai_tembeling.gpkg',
# 'lu_ais.gpkg',
# 'lu_pa_taman_negara_krau.gpkg',
# 'lu_tekam.gpkg',
# 'lu_berkelah_temerloh.gpkg',
# 'lu_remen_chereh.gpkg',
# 'lu_berkelah_kuantan.gpkg',
'forest_reserves.gpkg',
'gedi_area.gpkg',
# None
]

# Convert nodata values inside the mask to a score of 0 (representing non-forest areas)
# Otherwise both non-forest and masked areas will be nodatavalue
convert_non_forest_nodatavalue_to_0 = True

# Define top score for intactness rating (e.g. 10 for 1 - 10 scale)
top_score = 10

# Calculate actual number of quantiles for non-zero values
num_quantiles = top_score - 1

print(f"Calculating {num_quantiles} quantiles for negative percentage change (scores 1-{num_quantiles}), with score {top_score} reserved for 0% change.\n")

# Create polygon mask array using template tif
template = gdal.Open(template_tif_path)
template_array = template.ReadAsArray()
template = None
polygon_mask_array = np.ones_like(template_array, dtype=bool)

polygon_masks = {}
for mask_polygon in mask_polygons:
  if mask_polygon is not None:
    # Create an inverse project area path for masking
    template_polygon_path = join(polygons_dir, "template.gpkg")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    if not exists(inverse_polygon_path):
      polygon_path = join(polygons_dir, mask_polygon)
      template_polygon = gpd.read_file(template_polygon_path)
      polygon_read = gpd.read_file(polygon_path)
      polygon_crs = polygon_read.crs.to_epsg()
      inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
      inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
      inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
      print(f"An inverse masking polygon for {mask_polygon} has been created in {polygons_dir}.")
    else: print(f"An inverse masking polygon for {mask_polygon} already exists.")

    # Create and store individual mask for this polygon
    print(f"Creating polygon mask for {mask_polygon}.")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    temp_mask_path = join(intactness_dir, f"temp_mask_{mask_polygon[:-5]}.tif")
    copyfile(template_tif_path, temp_mask_path)
    burn_polygon_to_raster(temp_mask_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
    temp_mask_array = gdal.Open(temp_mask_path).ReadAsArray()
    individual_mask = np.ones_like(template_array, dtype=bool)
    individual_mask[temp_mask_array == nodatavalue] = False
    polygon_masks[mask_polygon] = individual_mask
    os.remove(temp_mask_path)

for base_dist_name in baseline_disturbance_pairs:
  intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
  percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}"
  percentage_path = join(intactness_baseline_dist_dir, f"{percentage_filename}.tif")

  for mask_polygon in mask_polygons:

    if mask_polygon is not None:
      # Copy the percentage raster for potential masking
      percentage_masked_filename = f"{percentage_filename}__masked_{mask_polygon[:-5]}.tif"
      percentage_masked_path = join(intactness_baseline_dist_dir, percentage_masked_filename)
      if not exists(percentage_masked_path):
        print(f"Copying {percentage_filename} for masking...")
        copyfile(percentage_path, percentage_masked_path)
        print(f"Masking {percentage_filename} with {mask_polygon}...")
        inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
        burn_polygon_to_raster(percentage_masked_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
        # Recompress the prediction after burning the polygon masks
        percentage_masked = gdal.Open(percentage_masked_path)
        percentage_masked_array = percentage_masked.ReadAsArray()
        percentage_masked = None
        export_array_as_tif(percentage_masked_array, percentage_masked_path, compress = True)
        print(f"{percentage_filename} masked.")
      else: print(f"{percentage_masked_filename} already exists.")

    # Define paths and arrays
    if mask_polygon is None: relative_intactness_name = f'intactness__{top_score}_quantiles__{base_dist_name}__{selected_model}'
    else: relative_intactness_name = f'intactness__{mask_polygon[:-5]}_{top_score}_quantiles__{base_dist_name}__{selected_model}'
    relative_intactness_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.tif')
    if not exists(relative_intactness_path):
      # Always track originally nodata pixels from the original percentage raster
      original_percentage = gdal.Open(percentage_path)
      original_percentage_array = original_percentage.ReadAsArray()
      original_percentage = None
      originally_nodata_mask = original_percentage_array == nodatavalue

      # Apply polygon masking to percentage array using pre-created mask
      if mask_polygon is None:
        percentage_array = original_percentage_array.copy()
      else:
        percentage_array = original_percentage_array.copy()
        percentage_array[~polygon_masks[mask_polygon]] = nodatavalue

      # Capture original data for histogram before conversions
      original_valid_elements = percentage_array[percentage_array != nodatavalue]

      relative_intactness_array = np.full_like(percentage_array, nodatavalue, dtype=np.int16)

      # Set all values above 0 to 0
      percentage_array[percentage_array > 0] = 0

      # Separate valid and invalid (nodatavalue) elements
      valid_elements = percentage_array[percentage_array != nodatavalue]

      # Separate zero and non-zero values, excluding originally nodata pixels from quantile calculation
      zero_elements = percentage_array == 0
      # Exclude pixels that were originally nodata from quantile calculation
      quantile_mask = (percentage_array != nodatavalue) & (~originally_nodata_mask) & (percentage_array != 0)
      non_zero_valid_elements = percentage_array[quantile_mask]

      # Calculate quantiles for non-zero valid elements only
      quantiles = np.percentile(non_zero_valid_elements, np.linspace(0, 100, num_quantiles + 1)[1:-1]) if len(non_zero_valid_elements) > 0 else []

      # Assign scores 1 to num_quantiles for non-zero values
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          relative_intactness_array[
              (percentage_array > lower_bound) & (percentage_array <= upper_bound) &
              (percentage_array != 0) & (percentage_array != nodatavalue)] = i

      # Set all zero values to top score
      relative_intactness_array[zero_elements] = top_score

      # Set areas outside polygon to nodatavalue using pre-created mask
      if mask_polygon is not None:
        relative_intactness_array[~polygon_masks[mask_polygon]] = nodatavalue

      # Convert non-forest areas inside polygon to 0
      if convert_non_forest_nodatavalue_to_0:
        if mask_polygon is None:
          non_forest_inside_polygon = originally_nodata_mask
        else:
          non_forest_inside_polygon = originally_nodata_mask & polygon_masks[mask_polygon]
        relative_intactness_array[non_forest_inside_polygon] = 0

      export_array_as_tif(relative_intactness_array, relative_intactness_path)

      # Prepare data for CSV: Collect lower and upper bounds for each category
      ranges_data = {'Score': [], 'Lower_Bound': [], 'Upper_Bound': []}

      # Add ranges for scores 1 to num_quantiles (non-zero values)
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          if i == num_quantiles:
            upper_bound = -0.000000001
          else: upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          ranges_data['Score'].append(i)
          ranges_data['Lower_Bound'].append(lower_bound)
          ranges_data['Upper_Bound'].append(upper_bound)

      # Add entry for top score (value of 0)
      ranges_data['Score'].append(top_score)
      ranges_data['Lower_Bound'].append(0)
      ranges_data['Upper_Bound'].append(0)

      # Create DataFrame and save to CSV
      relative_intactness_df = pd.DataFrame(ranges_data)
      relative_intactness_csv_path = os.path.join(intactness_baseline_dist_dir, f'{relative_intactness_name}.csv')
      relative_intactness_df.to_csv(relative_intactness_csv_path, index=False)

      # Generate and save histogram for converted data as .png
      histogram_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.png')
      plt.figure()
      counts, bins, patches = plt.hist(original_valid_elements.flatten(), bins='auto')

      # Count how many values became 0 after conversions
      zero_count_after_conversion = np.sum(valid_elements == 0)

      # Find the zero bin and set its frequency to 0
      zero_idx = next((i for i, (l, r) in enumerate(zip(bins[:-1], bins[1:])) if l <= 0 <= r), None)
      if zero_idx is not None:
          counts[zero_idx] = 0
          plt.clf()
          plt.bar(bins[:-1], counts, width=np.diff(bins), align='edge')
          x_center = (bins.min() + bins.max()) / 2
          y_max = max(counts)
          plt.text(x_center, y_max * 0.9,
                  f'0 value frequency = {zero_count_after_conversion:,}',
                  ha='center', va='center', fontweight='bold',
                  bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9))
      plt.title(f'{relative_intactness_name} Histogram')
      plt.xlabel('Value')
      plt.ylabel('Frequency')
      plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: format(int(x), ',')))
      plt.tight_layout()
      plt.savefig(histogram_path)
      plt.close()

    else: print(f"{relative_intactness_name} already exists.")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()