<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/8_differences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas
!pip install rasterio
!apt-get install -y gdal-bin

In [None]:
# Imports
import geopandas as gpd
from google.colab import runtime
from os import makedirs
from os.path import join, exists
from osgeo import gdal, ogr
gdal.UseExceptions()
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import rasterize
import re
from shutil import copyfile
import warnings

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
scenarios_dir = join(base_dir, "6_scenarios")

uncertainty_dir = join(base_dir, "7_uncertainty")
differences_dir = join(base_dir, "8_differences")

# Create directories
makedirs(differences_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -11111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ['COMPRESS=ZSTD', 'ZSTD_LEVEL=1'] # Good speed / size ratio
    else: options = []
    if input_array.dtype == 'int16': dtype = gdal.GDT_Int16
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Global function: burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = vector = None
    try:
        raster = gdal.Open(raster_path, gdal.GA_Update)
        vector = ogr.Open(polygon_path)
        if not raster or not vector:
            raise ValueError("Cannot open input files")
        layer = vector.GetLayer()
        options = ["ALL_TOUCHED=TRUE"] if all_touched else []
        if fixed:
            gdal.RasterizeLayer(raster, [1], layer, burn_values=[fixed_value], options=options)
        else:
            attr_name = column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()
            options.append(f"ATTRIBUTE={attr_name}")
            gdal.RasterizeLayer(raster, [1], layer, options=options)
    finally:
        if raster: raster.FlushCache()
        raster = vector = None

# Global function: read raster as array
def read_raster_as_array(path):
    ds = gdal.Open(path)
    arr = ds.ReadAsArray()
    ds = None
    return arr

# Global function: round array
def round_array(arr, precision):
    rounded = np.round(arr, precision)
    return rounded.astype(np.int16) if precision == 0 else rounded

# Select source and model

In [None]:
# Select if to source predictions from scenarios_dir or uncertainty_dir
# If available, uncertainty_dir should be selected so that uncertainty can
# be propagated and scenario 'mean' iteration values used.

source_dir = uncertainty_dir
# source_dir = scenarios_dir

print(f"{source_dir.split('/')[-1]} has been selected as the source directory for predictions")
print("to calculate disturbance, intactness and restoration.\n")

# If uncertainty selected, check it exists
if not exists(uncertainty_dir) and source_dir == uncertainty_dir:
  print("The uncertainty directory does not yet exist. Defaulting to scenarios directory.")
  source_dir = scenarios_dir

source_dir_name = f"{source_dir.split('_')[-1]}_dir"

# Select the model
for subdir in os.listdir(source_dir):
  if 'scenario_masks' not in subdir:
    print(f"selected_model = '{subdir}'")

In [None]:
selected_model = 'agbd_251203_161707'

selected_model_dir = join(source_dir, selected_model)
if source_dir == scenarios_dir: predictions_dir = join(selected_model_dir, 'scenario_predictions')
if source_dir == uncertainty_dir:
  predictions_dir = join(selected_model_dir, 'uncertainty_predictions')
  scenario_iterations_dir = join(selected_model_dir, 'scenario_iterations')

scenario_masks_dir = join(scenarios_dir, selected_model, "scenario_masks")

# Check predictions exist to calculate differences
if len(os.listdir(predictions_dir)) < 2: print(f"At least 2 predictions must exist in {source_dir} to calculate differences.")
else:
  model_differences_dir = join(differences_dir, f"{selected_model}_{source_dir_name}")
  disturbance_dir = join(model_differences_dir, 'disturbance')
  intactness_dir = join(model_differences_dir, 'intactness')
  restoration_dir = join(model_differences_dir, 'restoration')
  makedirs(model_differences_dir, exist_ok=True)
  makedirs(disturbance_dir, exist_ok=True)
  makedirs(intactness_dir, exist_ok=True)

# Disturbance and restoration

## Define type and period

In [None]:
# Forest disturbance and restoration measured as absolute AGBD change.
# This block builds dictionaries of options based on available files.
# Disturbance dictionaries output a tuple (deforestation, degradation) or degradation-only strings.
# Restoration dictionary outputs a tuple (reforestation, recovery) or recovery-only strings.
# Disturbance: Both deforestation and degradation.
# - Deforestation: pixel changes from forest (data) to non-forest (nodata)
# - Degradation: pixel remains forest but potential AGBD loss
# Restoration: Both reforestation and recovery.
# - Reforestation: pixel changes from non-forest (nodata) to forest (data)
# - Recovery: pixel was already forest but can have potential AGBD gains

# Dictionaries:
# 1. disturbance_since_dictionary: actual scenario vs no_disturbance_since alternate scenarios
# 2. degradation_interval_dictionary: multi-year intervals between alternate scenarios
# 3. degradation_single_year_dictionary: consecutive alternate scenarios for single-year effects
# 4. disturbance_area_dictionary: polygon-based alternate scenarios
# 5. restoration_potential_dictionary:
# - Restoration and reforestation potential uses no_disturbance_since alternate scenarios
# - Recovery potential with edge effects uses recovery alternate scenarios,
# accounting for actual scenario forest edge effects.

# Extract all available scenarios from scenarios predictions directory
if source_dir == scenarios_dir:
    scenarios = set()
    for file in os.listdir(predictions_dir):
        scenarios.add(file.split("__")[0])
# Or extract all available scenarios from uncertainty predictions directory
if source_dir == uncertainty_dir:
    prediction_stats = {}
    for file in os.listdir(predictions_dir):
        parts = file.split("__")
        if len(parts) >= 2:
            stat, scenario = parts[0], parts[1]
            if scenario not in prediction_stats:
                prediction_stats[scenario] = set()
            prediction_stats[scenario].add(stat)
    scenarios = {scenario for scenario, stats in prediction_stats.items()
                 if 'uncertainty' in stats and 'mean' in stats}
# Categorise years from scenarios
years = set()
plain_years = set()
for s in scenarios:
    if s.isdigit():
        years.add(int(s))
        plain_years.add(int(s))
    elif "_no_disturbance_since_" in s:
        year = s.split("_")[0]
        if year.isdigit():
            years.add(int(year))
        since_part = s.split("_since_")[1]
        if since_part.isdigit():
            years.add(int(since_part) - 1)
years_sorted = sorted(list(years))


# 1. Disturbance since dictionary
# Compares actual AGBD against no_disturbance alternate scenario.
# Keys: (actual_year, alternate_scenario_scenario)
# Values: (deforestation_name, degradation_name) for oldgrowth, degradation_name otherwise
print("disturbance_since_dictionary = {\n")
disturbance_since_dictionary = {}
for year_a in years_sorted:
    a_str = str(year_a)
    if year_a not in plain_years: continue
    # Oldgrowth entry first
    alternate_scenario_oldgrowth = f"{a_str}_no_disturbance_since_oldgrowth"
    if alternate_scenario_oldgrowth in scenarios:
        print(f"  # Disturbance in {a_str} caused by events since oldgrowth")
        print(f"  ('{a_str}', '{alternate_scenario_oldgrowth}'):")
        print(f"    ('{a_str}_deforestation_since_oldgrowth',")
        print(f"     '{a_str}_degradation_since_oldgrowth'),")
        print("")
        disturbance_since_dictionary[(a_str, alternate_scenario_oldgrowth)] = (
            f"{a_str}_deforestation_since_oldgrowth",
            f"{a_str}_degradation_since_oldgrowth")
    # Year-based entries in chronological order
    for year_b in years_sorted:
        if year_a <= year_b: continue
        b_plus1 = str(year_b + 1)
        alternate_scenario_scenario = f"{a_str}_no_disturbance_since_{b_plus1}"
        if alternate_scenario_scenario in scenarios:
            print(f"  # Disturbance in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{alternate_scenario_scenario}'):")
            print(f"    '{a_str}_degradation_since_{b_plus1}',")
            print("")
            disturbance_since_dictionary[(a_str, alternate_scenario_scenario)] = \
                f"{a_str}_degradation_since_{b_plus1}"
print("}\n")


# 2. Degradation interval dictionary
# Calculates degradation over a multi-year interval from an alternate scenario baseline
# (oldgrowth or earliest available year) to an actual year.
# Single-year intervals handled by degradation_single_year_dictionary.
# Keys: (alternate_scenario_recent, alternate_scenario_baseline) alternate scenario pairs
# Values: degradation_name
print("degradation_interval_dictionary = {\n")
degradation_interval_dictionary = {}
for y in years_sorted:
    y_str = str(y)
    if y not in plain_years: continue
    # Collect available no_disturbance alternate scenarios
    alternate_scenario_dist = {}
    if f"{y_str}_no_disturbance_since_oldgrowth" in scenarios:
        alternate_scenario_dist["oldgrowth"] = f"{y_str}_no_disturbance_since_oldgrowth"
    for since_y in years_sorted:
        if since_y >= y:
            continue
        sp = str(since_y + 1)
        dist_alternate_scenario = f"{y_str}_no_disturbance_since_{sp}"
        if dist_alternate_scenario in scenarios:
            alternate_scenario_dist[sp] = dist_alternate_scenario
    if len(alternate_scenario_dist) < 2: continue
    since_parts = sorted([k for k in alternate_scenario_dist if k != "oldgrowth"], key=int)
    # Check if any multi-year intervals exist for this year
    has_entries = False
    if "oldgrowth" in alternate_scenario_dist and since_parts:
        has_entries = True
    if len(since_parts) > 1:
        earliest = min(since_parts, key=int)
        for sp in since_parts:
            if sp != earliest and int(sp) - int(earliest) > 1:
                has_entries = True
                break
    if not has_entries: continue
    # Oldgrowth alternate scenario pairs first
    if "oldgrowth" in alternate_scenario_dist:
        for sp in since_parts:
            recent_year = str(int(sp) - 1)
            print(f"  # Degradation in {y_str} from events since oldgrowth to {recent_year}")
            print(f"  ('{alternate_scenario_dist[sp]}', '{alternate_scenario_dist['oldgrowth']}'):")
            print(f"    '{y_str}_degradation_from_oldgrowth_to_{recent_year}',")
            print("")
            degradation_interval_dictionary[(alternate_scenario_dist[sp], alternate_scenario_dist["oldgrowth"])] = \
                f"{y_str}_degradation_from_oldgrowth_to_{recent_year}"
    # Earliest year alternate scenario pairs (excluding single-year differences)
    if len(since_parts) > 1:
        earliest = min(since_parts, key=int)
        for sp in since_parts:
            if sp != earliest and int(sp) - int(earliest) > 1:
                recent_year = str(int(sp) - 1)
                print(f"  # Degradation in {y_str} from events in {earliest} to {recent_year}")
                print(f"  ('{alternate_scenario_dist[sp]}', '{alternate_scenario_dist[earliest]}'):")
                print(f"    '{y_str}_degradation_from_{earliest}_to_{recent_year}',")
                print("")
                degradation_interval_dictionary[(alternate_scenario_dist[sp], alternate_scenario_dist[earliest])] = \
                    f"{y_str}_degradation_from_{earliest}_to_{recent_year}"
print("}\n")


# 3. Degradation single year dictionary
# Calculates degradation effect from a single year using consecutive alternate scenarios.
# Keys: (alternate_scenario_next, alternate_scenario_current) or (actual, alternate_scenario_current) for same-year
# Values: degradation_name
print("degradation_single_year_dictionary = {\n")
degradation_single_year_dictionary = {}
for y in years_sorted:
    y_str = str(y)
    if y not in plain_years: continue
    # Collect available no_disturbance alternate scenarios
    alternate_scenario_dist = {}
    for since_y in years_sorted:
        if since_y >= y: continue
        sp = str(since_y + 1)
        dist_alternate_scenario = f"{y_str}_no_disturbance_since_{sp}"
        if dist_alternate_scenario in scenarios:
            alternate_scenario_dist[int(sp)] = dist_alternate_scenario
    if not alternate_scenario_dist: continue
    since_years = sorted(alternate_scenario_dist.keys())
    # Consecutive year pairs
    for i in range(len(since_years) - 1):
        current_year = since_years[i]
        next_year = since_years[i + 1]
        if next_year == current_year + 1:
            effect_year = current_year
            print(f"  # Degradation in {y_str} from events in {effect_year}")
            print(f"  ('{alternate_scenario_dist[next_year]}', '{alternate_scenario_dist[current_year]}'):")
            print(f"    '{y_str}_effect_of_degradation_in_{effect_year}',")
            print("")
            degradation_single_year_dictionary[(alternate_scenario_dist[next_year], alternate_scenario_dist[current_year])] = \
                f"{y_str}_effect_of_degradation_in_{effect_year}"
    # Same-year case (actual vs no_disturbance_since_year)
    max_since = max(since_years)
    if max_since == y:
        print(f"  # Degradation in {y_str} from events in {y}")
        print(f"  ('{y_str}', '{alternate_scenario_dist[max_since]}'):")
        print(f"    '{y_str}_effect_of_degradation_in_{y}',")
        print("")
        degradation_single_year_dictionary[(y_str, alternate_scenario_dist[max_since])] = \
            f"{y_str}_effect_of_degradation_in_{y}"
print("}\n")


# 4. Disturbance area dictionary
# Calculates disturbance from polygon-based alternate scenarios.
# Unlike historical alternate scenarios, these represent alternate scenarios of potential
# loss, so AGBD loss from deforestation is calculated.
# Keys: (alternate_scenario, actual_year)
# Values: (deforestation_name, degradation_name)
print("disturbance_area_dictionary = {\n")
disturbance_area_dictionary = {}
polygon_names = set()
if os.path.exists(polygons_dir):
    for file in os.listdir(polygons_dir):
        if file.endswith('.gpkg'):
            polygon_names.add(file[:-5])
area_based_entries = []
for scenario in scenarios:
    parts = scenario.split('_')
    # Check for deforestation scenarios (ends with "Xm_degradation_buffer")
    if len(parts) >= 5 and parts[-1] == 'buffer' and parts[-2] == 'degradation' and parts[-3].endswith('m'):
        alt_year, year_affix, dist_type = parts[0], parts[-4], parts[-5]
        polygon_name = '_'.join(parts[1:-5])
        if polygon_name in polygon_names and dist_type == 'deforestation':
            output_base = f"{alt_year}_deforestation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_base))
    # Check for degradation scenarios (ends with "degradation_YYYY")
    elif len(parts) >= 3 and parts[-2] == 'degradation' and parts[-1].isdigit() and len(parts[-1]) == 4:
        alt_year, year_affix = parts[0], parts[-1]
        polygon_name = '_'.join(parts[1:-2])
        if polygon_name in polygon_names:
            output_base = f"{alt_year}_degradation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_base))
if area_based_entries:
    for scenario, alt_year, output_base in sorted(area_based_entries):
        print(f"  # Area-based disturbance: {output_base}")
        print(f"  ('{scenario}', '{alt_year}'):")
        print(f"    ('{output_base}_deforestation',")
        print(f"     '{output_base}_degradation'),")
        print("")
        disturbance_area_dictionary[(scenario, alt_year)] = (
            f"{output_base}_deforestation",
            f"{output_base}_degradation")

print("}\n")


# 5. Restoration potential dictionary
# Calculates potential AGBD gain from restoration to oldgrowth state.
# Unlike disturbance dictionaries, output is positive (gain not loss).
# Recovery with edge effects: gain in existing forest (oldgrowth_recovery scenario).
# Restoration potential: total gain including cleared areas (no_disturbance_since_oldgrowth).
# Reforestation potential: gain from non-forest to forest transitions only.
# Keys: (restoration_scenario, actual_year)
# Values: (restoration_name, reforestation_name) for no_disturbance_since, recovery_name otherwise
print("restoration_potential_dictionary = {\n")
restoration_potential_dictionary = {}
for year_a in years_sorted:
    a_str = str(year_a)
    if year_a not in plain_years: continue
    # Recovery potential with edge effects (existing forest only)
    recovery_scenario = f"{a_str}_oldgrowth_recovery"
    if recovery_scenario in scenarios:
        print(f"  # Recovery potential with edge effects in {a_str}")
        print(f"  ('{recovery_scenario}', '{a_str}'):")
        print(f"    '{a_str}_recovery_potential_with_edge_effects',")
        print("")
        restoration_potential_dictionary[(recovery_scenario, a_str)] = \
            f"{a_str}_recovery_potential_with_edge_effects"
    # Restoration and reforestation potential (including cleared areas)
    alternate_scenario_oldgrowth = f"{a_str}_no_disturbance_since_oldgrowth"
    if alternate_scenario_oldgrowth in scenarios:
        print(f"  # Restoration and reforestation potential in {a_str}")
        print(f"  ('{alternate_scenario_oldgrowth}', '{a_str}'):")
        print(f"    ('{a_str}_restoration_potential',")
        print(f"     '{a_str}_reforestation_potential'),")
        print("")
        restoration_potential_dictionary[(alternate_scenario_oldgrowth, a_str)] = (
            f"{a_str}_restoration_potential",
            f"{a_str}_reforestation_potential")
print("}\n")

In [None]:
disturbance_since_dictionary = {

  # Disturbance in 2021 caused by events since oldgrowth
  ('2021', '2021_no_disturbance_since_oldgrowth'):
    ('2021_deforestation_since_oldgrowth',
     '2021_degradation_since_oldgrowth'),

  # Disturbance in 2021 caused by events since 1993
  ('2021', '2021_no_disturbance_since_1993'):
    '2021_degradation_since_1993',

  # Disturbance in 2024 caused by events since oldgrowth
  ('2024', '2024_no_disturbance_since_oldgrowth'):
    ('2024_deforestation_since_oldgrowth',
     '2024_degradation_since_oldgrowth'),

  # Disturbance in 2024 caused by events since 1996
  ('2024', '2024_no_disturbance_since_1996'):
    '2024_degradation_since_1996',

  # # Disturbance in 2024 caused by events since 1997
  # ('2024', '2024_no_disturbance_since_1997'):
  #   '2024_degradation_since_1997',

  # # Disturbance in 2024 caused by events since 1998
  # ('2024', '2024_no_disturbance_since_1998'):
  #   '2024_degradation_since_1998',

  # # Disturbance in 2024 caused by events since 1999
  # ('2024', '2024_no_disturbance_since_1999'):
  #   '2024_degradation_since_1999',

  # # Disturbance in 2024 caused by events since 2000
  # ('2024', '2024_no_disturbance_since_2000'):
  #   '2024_degradation_since_2000',

  # # Disturbance in 2024 caused by events since 2001
  # ('2024', '2024_no_disturbance_since_2001'):
  #   '2024_degradation_since_2001',

  # # Disturbance in 2024 caused by events since 2002
  # ('2024', '2024_no_disturbance_since_2002'):
  #   '2024_degradation_since_2002',

  # # Disturbance in 2024 caused by events since 2003
  # ('2024', '2024_no_disturbance_since_2003'):
  #   '2024_degradation_since_2003',

  # # Disturbance in 2024 caused by events since 2004
  # ('2024', '2024_no_disturbance_since_2004'):
  #   '2024_degradation_since_2004',

  # # Disturbance in 2024 caused by events since 2005
  # ('2024', '2024_no_disturbance_since_2005'):
  #   '2024_degradation_since_2005',

  # # Disturbance in 2024 caused by events since 2006
  # ('2024', '2024_no_disturbance_since_2006'):
  #   '2024_degradation_since_2006',

  # # Disturbance in 2024 caused by events since 2007
  # ('2024', '2024_no_disturbance_since_2007'):
  #   '2024_degradation_since_2007',

  # # Disturbance in 2024 caused by events since 2008
  # ('2024', '2024_no_disturbance_since_2008'):
  #   '2024_degradation_since_2008',

  # # Disturbance in 2024 caused by events since 2009
  # ('2024', '2024_no_disturbance_since_2009'):
  #   '2024_degradation_since_2009',

  # # Disturbance in 2024 caused by events since 2010
  # ('2024', '2024_no_disturbance_since_2010'):
  #   '2024_degradation_since_2010',

  # # Disturbance in 2024 caused by events since 2011
  # ('2024', '2024_no_disturbance_since_2011'):
  #   '2024_degradation_since_2011',

  # # Disturbance in 2024 caused by events since 2012
  # ('2024', '2024_no_disturbance_since_2012'):
  #   '2024_degradation_since_2012',

  # # Disturbance in 2024 caused by events since 2013
  # ('2024', '2024_no_disturbance_since_2013'):
  #   '2024_degradation_since_2013',

  # # Disturbance in 2024 caused by events since 2014
  # ('2024', '2024_no_disturbance_since_2014'):
  #   '2024_degradation_since_2014',

  # # Disturbance in 2024 caused by events since 2015
  # ('2024', '2024_no_disturbance_since_2015'):
  #   '2024_degradation_since_2015',

  # # Disturbance in 2024 caused by events since 2016
  # ('2024', '2024_no_disturbance_since_2016'):
  #   '2024_degradation_since_2016',

  # # Disturbance in 2024 caused by events since 2017
  # ('2024', '2024_no_disturbance_since_2017'):
  #   '2024_degradation_since_2017',

  # # Disturbance in 2024 caused by events since 2018
  # ('2024', '2024_no_disturbance_since_2018'):
  #   '2024_degradation_since_2018',

  # # Disturbance in 2024 caused by events since 2019
  # ('2024', '2024_no_disturbance_since_2019'):
  #   '2024_degradation_since_2019',

  # # Disturbance in 2024 caused by events since 2020
  # ('2024', '2024_no_disturbance_since_2020'):
  #   '2024_degradation_since_2020',

  # # Disturbance in 2024 caused by events since 2021
  # ('2024', '2024_no_disturbance_since_2021'):
  #   '2024_degradation_since_2021',

  # # Disturbance in 2024 caused by events since 2022
  # ('2024', '2024_no_disturbance_since_2022'):
  #   '2024_degradation_since_2022',

  # # Disturbance in 2024 caused by events since 2023
  # ('2024', '2024_no_disturbance_since_2023'):
  #   '2024_degradation_since_2023',

  # # Disturbance in 2024 caused by events since 2024
  # ('2024', '2024_no_disturbance_since_2024'):
  #   '2024_degradation_since_2024',

}

degradation_interval_dictionary = {

  # Degradation in 2021 from events since oldgrowth to 1992
  ('2021_no_disturbance_since_1993', '2021_no_disturbance_since_oldgrowth'):
    '2021_degradation_from_oldgrowth_to_1992',

  # Degradation in 2024 from events since oldgrowth to 1995
  ('2024_no_disturbance_since_1996', '2024_no_disturbance_since_oldgrowth'):
    '2024_degradation_from_oldgrowth_to_1995',

  # # Degradation in 2024 from events since oldgrowth to 1996
  # ('2024_no_disturbance_since_1997', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1996',

  # # Degradation in 2024 from events since oldgrowth to 1997
  # ('2024_no_disturbance_since_1998', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1997',

  # # Degradation in 2024 from events since oldgrowth to 1998
  # ('2024_no_disturbance_since_1999', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1998',

  # # Degradation in 2024 from events since oldgrowth to 1999
  # ('2024_no_disturbance_since_2000', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1999',

  # # Degradation in 2024 from events since oldgrowth to 2000
  # ('2024_no_disturbance_since_2001', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2000',

  # # Degradation in 2024 from events since oldgrowth to 2001
  # ('2024_no_disturbance_since_2002', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2001',

  # # Degradation in 2024 from events since oldgrowth to 2002
  # ('2024_no_disturbance_since_2003', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2002',

  # # Degradation in 2024 from events since oldgrowth to 2003
  # ('2024_no_disturbance_since_2004', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2003',

  # # Degradation in 2024 from events since oldgrowth to 2004
  # ('2024_no_disturbance_since_2005', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2004',

  # # Degradation in 2024 from events since oldgrowth to 2005
  # ('2024_no_disturbance_since_2006', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2005',

  # # Degradation in 2024 from events since oldgrowth to 2006
  # ('2024_no_disturbance_since_2007', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2006',

  # # Degradation in 2024 from events since oldgrowth to 2007
  # ('2024_no_disturbance_since_2008', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2007',

  # # Degradation in 2024 from events since oldgrowth to 2008
  # ('2024_no_disturbance_since_2009', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2008',

  # # Degradation in 2024 from events since oldgrowth to 2009
  # ('2024_no_disturbance_since_2010', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2009',

  # # Degradation in 2024 from events since oldgrowth to 2010
  # ('2024_no_disturbance_since_2011', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2010',

  # # Degradation in 2024 from events since oldgrowth to 2011
  # ('2024_no_disturbance_since_2012', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2011',

  # # Degradation in 2024 from events since oldgrowth to 2012
  # ('2024_no_disturbance_since_2013', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2012',

  # # Degradation in 2024 from events since oldgrowth to 2013
  # ('2024_no_disturbance_since_2014', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2013',

  # # Degradation in 2024 from events since oldgrowth to 2014
  # ('2024_no_disturbance_since_2015', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2014',

  # # Degradation in 2024 from events since oldgrowth to 2015
  # ('2024_no_disturbance_since_2016', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2015',

  # # Degradation in 2024 from events since oldgrowth to 2016
  # ('2024_no_disturbance_since_2017', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2016',

  # # Degradation in 2024 from events since oldgrowth to 2017
  # ('2024_no_disturbance_since_2018', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2017',

  # # Degradation in 2024 from events since oldgrowth to 2018
  # ('2024_no_disturbance_since_2019', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2018',

  # # Degradation in 2024 from events since oldgrowth to 2019
  # ('2024_no_disturbance_since_2020', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2019',

  # # Degradation in 2024 from events since oldgrowth to 2020
  # ('2024_no_disturbance_since_2021', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2020',

  # # Degradation in 2024 from events since oldgrowth to 2021
  # ('2024_no_disturbance_since_2022', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2021',

  # # Degradation in 2024 from events since oldgrowth to 2022
  # ('2024_no_disturbance_since_2023', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2022',

  # # Degradation in 2024 from events since oldgrowth to 2023
  # ('2024_no_disturbance_since_2024', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2023',

  # # Degradation in 2024 from events in 1996 to 1997
  # ('2024_no_disturbance_since_1998', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_1997',

  # # Degradation in 2024 from events in 1996 to 1998
  # ('2024_no_disturbance_since_1999', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_1998',

  # # Degradation in 2024 from events in 1996 to 1999
  # ('2024_no_disturbance_since_2000', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_1999',

  # # Degradation in 2024 from events in 1996 to 2000
  # ('2024_no_disturbance_since_2001', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2000',

  # # Degradation in 2024 from events in 1996 to 2001
  # ('2024_no_disturbance_since_2002', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2001',

  # # Degradation in 2024 from events in 1996 to 2002
  # ('2024_no_disturbance_since_2003', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2002',

  # # Degradation in 2024 from events in 1996 to 2003
  # ('2024_no_disturbance_since_2004', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2003',

  # # Degradation in 2024 from events in 1996 to 2004
  # ('2024_no_disturbance_since_2005', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2004',

  # # Degradation in 2024 from events in 1996 to 2005
  # ('2024_no_disturbance_since_2006', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2005',

  # # Degradation in 2024 from events in 1996 to 2006
  # ('2024_no_disturbance_since_2007', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2006',

  # # Degradation in 2024 from events in 1996 to 2007
  # ('2024_no_disturbance_since_2008', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2007',

  # # Degradation in 2024 from events in 1996 to 2008
  # ('2024_no_disturbance_since_2009', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2008',

  # # Degradation in 2024 from events in 1996 to 2009
  # ('2024_no_disturbance_since_2010', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2009',

  # # Degradation in 2024 from events in 1996 to 2010
  # ('2024_no_disturbance_since_2011', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2010',

  # # Degradation in 2024 from events in 1996 to 2011
  # ('2024_no_disturbance_since_2012', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2011',

  # # Degradation in 2024 from events in 1996 to 2012
  # ('2024_no_disturbance_since_2013', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2012',

  # # Degradation in 2024 from events in 1996 to 2013
  # ('2024_no_disturbance_since_2014', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2013',

  # # Degradation in 2024 from events in 1996 to 2014
  # ('2024_no_disturbance_since_2015', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2014',

  # # Degradation in 2024 from events in 1996 to 2015
  # ('2024_no_disturbance_since_2016', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2015',

  # # Degradation in 2024 from events in 1996 to 2016
  # ('2024_no_disturbance_since_2017', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2016',

  # # Degradation in 2024 from events in 1996 to 2017
  # ('2024_no_disturbance_since_2018', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2017',

  # # Degradation in 2024 from events in 1996 to 2018
  # ('2024_no_disturbance_since_2019', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2018',

  # # Degradation in 2024 from events in 1996 to 2019
  # ('2024_no_disturbance_since_2020', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2019',

  # # Degradation in 2024 from events in 1996 to 2020
  # ('2024_no_disturbance_since_2021', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2020',

  # # Degradation in 2024 from events in 1996 to 2021
  # ('2024_no_disturbance_since_2022', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2021',

  # # Degradation in 2024 from events in 1996 to 2022
  # ('2024_no_disturbance_since_2023', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2022',

  # # Degradation in 2024 from events in 1996 to 2023
  # ('2024_no_disturbance_since_2024', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2023',

}

degradation_single_year_dictionary = {

  # Degradation in 2024 from events in 1996
  ('2024_no_disturbance_since_1997', '2024_no_disturbance_since_1996'):
    '2024_effect_of_degradation_in_1996',

  # Degradation in 2024 from events in 1997
  ('2024_no_disturbance_since_1998', '2024_no_disturbance_since_1997'):
    '2024_effect_of_degradation_in_1997',

  # Degradation in 2024 from events in 1998
  ('2024_no_disturbance_since_1999', '2024_no_disturbance_since_1998'):
    '2024_effect_of_degradation_in_1998',

  # Degradation in 2024 from events in 1999
  ('2024_no_disturbance_since_2000', '2024_no_disturbance_since_1999'):
    '2024_effect_of_degradation_in_1999',

  # Degradation in 2024 from events in 2000
  ('2024_no_disturbance_since_2001', '2024_no_disturbance_since_2000'):
    '2024_effect_of_degradation_in_2000',

  # Degradation in 2024 from events in 2001
  ('2024_no_disturbance_since_2002', '2024_no_disturbance_since_2001'):
    '2024_effect_of_degradation_in_2001',

  # Degradation in 2024 from events in 2002
  ('2024_no_disturbance_since_2003', '2024_no_disturbance_since_2002'):
    '2024_effect_of_degradation_in_2002',

  # Degradation in 2024 from events in 2003
  ('2024_no_disturbance_since_2004', '2024_no_disturbance_since_2003'):
    '2024_effect_of_degradation_in_2003',

  # Degradation in 2024 from events in 2004
  ('2024_no_disturbance_since_2005', '2024_no_disturbance_since_2004'):
    '2024_effect_of_degradation_in_2004',

  # Degradation in 2024 from events in 2005
  ('2024_no_disturbance_since_2006', '2024_no_disturbance_since_2005'):
    '2024_effect_of_degradation_in_2005',

  # Degradation in 2024 from events in 2006
  ('2024_no_disturbance_since_2007', '2024_no_disturbance_since_2006'):
    '2024_effect_of_degradation_in_2006',

  # Degradation in 2024 from events in 2007
  ('2024_no_disturbance_since_2008', '2024_no_disturbance_since_2007'):
    '2024_effect_of_degradation_in_2007',

  # Degradation in 2024 from events in 2008
  ('2024_no_disturbance_since_2009', '2024_no_disturbance_since_2008'):
    '2024_effect_of_degradation_in_2008',

  # Degradation in 2024 from events in 2009
  ('2024_no_disturbance_since_2010', '2024_no_disturbance_since_2009'):
    '2024_effect_of_degradation_in_2009',

  # Degradation in 2024 from events in 2010
  ('2024_no_disturbance_since_2011', '2024_no_disturbance_since_2010'):
    '2024_effect_of_degradation_in_2010',

  # Degradation in 2024 from events in 2011
  ('2024_no_disturbance_since_2012', '2024_no_disturbance_since_2011'):
    '2024_effect_of_degradation_in_2011',

  # Degradation in 2024 from events in 2012
  ('2024_no_disturbance_since_2013', '2024_no_disturbance_since_2012'):
    '2024_effect_of_degradation_in_2012',

  # Degradation in 2024 from events in 2013
  ('2024_no_disturbance_since_2014', '2024_no_disturbance_since_2013'):
    '2024_effect_of_degradation_in_2013',

  # Degradation in 2024 from events in 2014
  ('2024_no_disturbance_since_2015', '2024_no_disturbance_since_2014'):
    '2024_effect_of_degradation_in_2014',

  # Degradation in 2024 from events in 2015
  ('2024_no_disturbance_since_2016', '2024_no_disturbance_since_2015'):
    '2024_effect_of_degradation_in_2015',

  # Degradation in 2024 from events in 2016
  ('2024_no_disturbance_since_2017', '2024_no_disturbance_since_2016'):
    '2024_effect_of_degradation_in_2016',

  # Degradation in 2024 from events in 2017
  ('2024_no_disturbance_since_2018', '2024_no_disturbance_since_2017'):
    '2024_effect_of_degradation_in_2017',

  # Degradation in 2024 from events in 2018
  ('2024_no_disturbance_since_2019', '2024_no_disturbance_since_2018'):
    '2024_effect_of_degradation_in_2018',

  # Degradation in 2024 from events in 2019
  ('2024_no_disturbance_since_2020', '2024_no_disturbance_since_2019'):
    '2024_effect_of_degradation_in_2019',

  # Degradation in 2024 from events in 2020
  ('2024_no_disturbance_since_2021', '2024_no_disturbance_since_2020'):
    '2024_effect_of_degradation_in_2020',

  # Degradation in 2024 from events in 2021
  ('2024_no_disturbance_since_2022', '2024_no_disturbance_since_2021'):
    '2024_effect_of_degradation_in_2021',

  # Degradation in 2024 from events in 2022
  ('2024_no_disturbance_since_2023', '2024_no_disturbance_since_2022'):
    '2024_effect_of_degradation_in_2022',

  # Degradation in 2024 from events in 2023
  ('2024_no_disturbance_since_2024', '2024_no_disturbance_since_2023'):
    '2024_effect_of_degradation_in_2023',

  # Degradation in 2024 from events in 2024
  ('2024', '2024_no_disturbance_since_2024'):
    '2024_effect_of_degradation_in_2024',

}

disturbance_area_dictionary = {

  # Area-based disturbance: 2024_deforestation_of_road_mat_daling_2023
  ('2024_road_mat_daling_deforestation_2023_30m_degradation_buffer', '2024'):
    ('2024_deforestation_of_road_mat_daling_2023_deforestation',
     '2024_deforestation_of_road_mat_daling_2023_degradation'),

}

restoration_potential_dictionary = {

  # Recovery potential with edge effects in 2021
  ('2021_oldgrowth_recovery', '2021'):
    '2021_recovery_potential_with_edge_effects',

  # Restoration and reforestation potential in 2021
  ('2021_no_disturbance_since_oldgrowth', '2021'):
    ('2021_restoration_potential',
     '2021_reforestation_potential'),

  # Recovery potential with edge effects in 2024
  ('2024_oldgrowth_recovery', '2024'):
    '2024_recovery_potential_with_edge_effects',

  # Restoration and reforestation potential in 2024
  ('2024_no_disturbance_since_oldgrowth', '2024'):
    ('2024_restoration_potential',
     '2024_reforestation_potential'),

}

## Calculate change

In [None]:
# Precision settings for output rasters
mean_precision = 2
ci_precision = 2
uncertainty_precision = 2

# Disturbance and restoration calculations quantify AGBD change as absolute differences
# between a baseline alternate scenario and an actual or more recent alternate scenario.
# Positive differences indicate restoration (recovery and reforestation) and negative
# indicate disturbance (degradation and deforestation).

# Disturbance
# - Deforestation:
# -- Forest to non-forest transition (baseline has data, actual / recent alternate has nodata).
# -- The actual / recent nodata is replaced with 0 where baseline has data to calculate deforestation magnitude.
# - Degradation:
# -- AGBD loss without changing from forest (both scenarios in the pair have data).
def calculate_disturbance(recent_mean, baseline_mean, nodata, calculate_deforestation,
                          recent_ci=None, baseline_ci=None):
    recent_nodata = (recent_mean == nodata)
    baseline_nodata = (baseline_mean == nodata)
    # Fill recent where alternate scenario has data (deforestation case)
    fill_mask = recent_nodata & ~baseline_nodata
    recent_mean_filled = np.where(fill_mask, 0, recent_mean)
    diff = recent_mean_filled - baseline_mean
    # Deforestation: AGBD loss from forest-to-nonforest transitions
    if calculate_deforestation:
        deforestation = np.where(baseline_nodata, nodata, np.where(recent_nodata, diff, 0))
    # Degradation: AGBD loss within persistent forest
    degradation = np.where(recent_nodata | baseline_nodata, nodata, diff)
    if recent_ci is None:
        if calculate_deforestation:
            return deforestation, degradation
        return degradation
    # Propagate uncertainty
    recent_ci_filled = np.where(fill_mask, 0, recent_ci)
    relative_uncertainty, ci_halfwidth = propagate_uncertainty(
        recent_mean_filled, recent_ci_filled, baseline_mean, baseline_ci)
    if calculate_deforestation:
        deforestation_ci = np.where(baseline_nodata, nodata, np.where(recent_nodata, ci_halfwidth, 0))
        deforestation_uncertainty = np.where(baseline_nodata, nodata, np.where(recent_nodata, relative_uncertainty, 0))
    degradation_ci = np.where(recent_nodata | baseline_nodata, nodata, ci_halfwidth)
    degradation_uncertainty = np.where(recent_nodata | baseline_nodata, nodata, relative_uncertainty)
    if calculate_deforestation:
        return ((deforestation, deforestation_ci, deforestation_uncertainty),
                (degradation, degradation_ci, degradation_uncertainty))
    return (degradation, degradation_ci, degradation_uncertainty)

# Restoration
# - Reforestation:
# -- Non-forest to forest transition (actual has no-data, alternate scenario has data).
# -- The actual nodata is replaced with 0 where the alternate has data to calculate reforestation magnitude.
# - Recovery:
# -- AGBD gain within existing forest (both scenarios in the pair have data).
def calculate_restoration(scenario_mean, actual_mean, nodata, calculate_reforestation,
                          scenario_ci=None, actual_ci=None):
    scenario_nodata = (scenario_mean == nodata)
    actual_nodata = (actual_mean == nodata)
    # Fill actual where alternate scenario has data (reforestation case)
    fill_mask = actual_nodata & ~scenario_nodata
    actual_mean_filled = np.where(fill_mask, 0, actual_mean)
    diff = scenario_mean - actual_mean_filled
    # Reforestation: AGBD gain from nonforest-to-forest transitions
    if calculate_reforestation:
        reforestation = np.where(scenario_nodata, nodata, np.where(actual_nodata, diff, 0))
    # Restoration potential: total gain masked by alternate scenario
    restoration = np.where(scenario_nodata, nodata, diff)
    if scenario_ci is None:
        if calculate_reforestation:
            return restoration, reforestation
        return restoration
    # Propagate uncertainty
    actual_ci_filled = np.where(fill_mask, 0, actual_ci)
    relative_uncertainty, ci_halfwidth = propagate_uncertainty(
        scenario_mean, scenario_ci, actual_mean_filled, actual_ci_filled, is_restoration=True)
    if calculate_reforestation:
        reforestation_ci = np.where(scenario_nodata, nodata, np.where(actual_nodata, ci_halfwidth, 0))
        reforestation_uncertainty = np.where(scenario_nodata, nodata, np.where(actual_nodata, relative_uncertainty, 0))
    restoration_ci = np.where(scenario_nodata, nodata, ci_halfwidth)
    restoration_uncertainty = np.where(scenario_nodata, nodata, relative_uncertainty)
    if calculate_reforestation:
        return ((restoration, restoration_ci, restoration_uncertainty),
                (reforestation, reforestation_ci, reforestation_uncertainty))
    return (restoration, restoration_ci, restoration_uncertainty)

# Uncertainty (if used as source_dir) is propagated for AGBD disturbance / restoration
# magnitude, not forest state transitions. The Landsat-derived classification products
# (e.g. TMF) used for masking non-forest have their own measures of uncertainty.
# For absolute differences (Z = X - Y) IPCC Approach 1 (Equation 3.2) applies:
# CI_z = sqrt(CI_x**2 + CI_y**2)
# This holds because variances add for differences of normals, and CIs scale linearly
# with standard deviation under normality, preserving the CI interval symmetry.
# Output CI values represent half-widths of symmetric 95% confidence intervals.
# The limitation is that this assumes independence between scenario uncertainties,
# when scenarios predicted with identical models and features have some covariance.
# The results are therefore conservative (overestimated) uncertainty bounds.
# Values of 0 or the wrong sign (+ for disturbance, - for restoration) do not have
# uncertainty propagated. 0 indicates neither disturbance nor restoration took place,
# i.e. there is no magnitude for uncertainty, while the wrong sign is a rare indication
# of model instability given the constraints of the scenario pairs.

# Percentage uncertainty is calculated as CI_z / |X - Y|. The denominator is different
# from that published Liang et al. (2023), which report |X + Y| (which may be a typo).

# References:
# IPCC (2006) Guidelines Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# IPCC (2019) Refinement Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# Liang et al. (2023) Remote Sensing of Environment 284:113367

def propagate_uncertainty(mean1, ci1, mean2, ci2, is_restoration=False):
    mean_diff = mean1 - mean2
    # Forest transitions: one scenario has nodata (converted to 0 mean, 0 CI)
    deforestation_case = (ci1 == 0) & (mean1 == 0) & (ci2 != 0) & (mean2 != 0)
    reforestation_case = (ci1 != 0) & (mean1 != 0) & (ci2 == 0) & (mean2 == 0)
    # Combine absolute uncertainties: CI_z = sqrt(CI_x**2 + CI_y**2)
    ci_halfwidth = np.sqrt(np.square(ci1) + np.square(ci2))
    # Relative uncertainty: CI_z / |X - Y|
    denominator = np.abs(mean_diff)
    standard_relative_uncertainty = np.divide(
        ci_halfwidth, denominator,
        out=np.zeros_like(ci_halfwidth, dtype=np.float64),
        where=(denominator != 0))
    # Forest transition: uncertainty from forested scenario only
    deforestation_relative_uncertainty = np.divide(
        ci2, np.abs(mean2),
        out=np.zeros_like(ci2, dtype=np.float64),
        where=(mean2 != 0))
    reforestation_relative_uncertainty = np.divide(
        ci1, np.abs(mean1),
        out=np.zeros_like(ci1, dtype=np.float64),
        where=(mean1 != 0))
    # Zero uncertainty for unexpected sign
    unexpected_sign = (mean_diff < 0) if is_restoration else (mean_diff > 0)
    percentage_uncertainty = np.where(
        deforestation_case, deforestation_relative_uncertainty,
        np.where(reforestation_case, reforestation_relative_uncertainty,
                 np.where(unexpected_sign | (denominator == 0), 0, standard_relative_uncertainty)))
    return percentage_uncertainty * 100.0, ci_halfwidth

# Determine processing mode
use_uncertainty = source_dir == uncertainty_dir

# Combine all disturbance dictionaries
all_disturbance_dictionaries = {}
all_disturbance_dictionaries.update(disturbance_since_dictionary)
all_disturbance_dictionaries.update(degradation_interval_dictionary)
all_disturbance_dictionaries.update(degradation_single_year_dictionary)
all_disturbance_dictionaries.update(disturbance_area_dictionary)
all_disturbance_dictionaries.update(restoration_potential_dictionary)

# Progress tracking
total_operations = len(all_disturbance_dictionaries)
progress_index = 0
progress_label = widgets.Label(f"Forest disturbance calculation progress: {progress_index}/{total_operations}")
display(progress_label)

# Process all forest disturbance calculations
for (recent_key, baseline_key), value in all_disturbance_dictionaries.items():
    # Determine calculation type
    is_restoration = (recent_key, baseline_key) in restoration_potential_dictionary
    is_interval = (recent_key, baseline_key) in degradation_interval_dictionary
    output_dir = restoration_dir if is_restoration else disturbance_dir
    # Determine output type from dictionary value structure
    # Tuples indicate forest state transitions (deforestation/reforestation) are calculated
    if isinstance(value, tuple):
        if is_restoration:
            restoration_name, reforestation_name = value
            has_reforestation = True
            has_deforestation = False
        else:
            deforestation_name, degradation_name = value
            has_deforestation = True
            has_reforestation = False
    else:
        has_deforestation = False
        has_reforestation = False
        if is_restoration:
            restoration_name = value
        else:
            degradation_name = value
    # Define output paths and check existence
    if use_uncertainty:
        if is_restoration:
            output_paths = {
                'restoration_mean': join(output_dir, f"mean__{restoration_name}__{selected_model}.tif"),
                'restoration_ci': join(output_dir, f"ci_95__{restoration_name}__{selected_model}.tif"),
                'restoration_uncertainty': join(output_dir, f"uncertainty__{restoration_name}__{selected_model}.tif")}
            if has_reforestation:
                output_paths.update({
                    'reforestation_mean': join(output_dir, f"mean__{reforestation_name}__{selected_model}.tif"),
                    'reforestation_ci': join(output_dir, f"ci_95__{reforestation_name}__{selected_model}.tif"),
                    'reforestation_uncertainty': join(output_dir, f"uncertainty__{reforestation_name}__{selected_model}.tif")})
        else:
            output_paths = {
                'degradation_mean': join(output_dir, f"mean__{degradation_name}__{selected_model}.tif"),
                'degradation_ci': join(output_dir, f"ci_95__{degradation_name}__{selected_model}.tif"),
                'degradation_uncertainty': join(output_dir, f"uncertainty__{degradation_name}__{selected_model}.tif")}
            if has_deforestation:
                output_paths.update({
                    'deforestation_mean': join(output_dir, f"mean__{deforestation_name}__{selected_model}.tif"),
                    'deforestation_ci': join(output_dir, f"ci_95__{deforestation_name}__{selected_model}.tif"),
                    'deforestation_uncertainty': join(output_dir, f"uncertainty__{deforestation_name}__{selected_model}.tif")})
    else:
        if is_restoration:
            output_paths = {'restoration': join(output_dir, f"{restoration_name}__{selected_model}.tif")}
            if has_reforestation:
                output_paths['reforestation'] = join(output_dir, f"{reforestation_name}__{selected_model}.tif")
        else:
            output_paths = {'degradation': join(output_dir, f"{degradation_name}__{selected_model}.tif")}
            if has_deforestation:
                output_paths['deforestation'] = join(output_dir, f"{deforestation_name}__{selected_model}.tif")
    if all(exists(p) for p in output_paths.values()):
        progress_index += 1
        progress_label.value = f"Forest disturbance calculation progress: {progress_index}/{total_operations}"
        continue
    # Validate input paths
    if use_uncertainty:
        recent_mean_path = join(predictions_dir, f"mean__{recent_key}__{selected_model}.tif")
        recent_ci_path = join(predictions_dir, f"ci_95__{recent_key}__{selected_model}.tif")
        baseline_mean_path = join(predictions_dir, f"mean__{baseline_key}__{selected_model}.tif")
        baseline_ci_path = join(predictions_dir, f"ci_95__{baseline_key}__{selected_model}.tif")
        assert exists(recent_mean_path), f"Missing: {recent_mean_path}"
        assert exists(recent_ci_path), f"Missing: {recent_ci_path}"
        assert exists(baseline_mean_path), f"Missing: {baseline_mean_path}"
        assert exists(baseline_ci_path), f"Missing: {baseline_ci_path}"
    else:
        recent_path = join(predictions_dir, f"{recent_key}__{selected_model}.tif")
        baseline_path = join(predictions_dir, f"{baseline_key}__{selected_model}.tif")
        assert exists(recent_path), f"Missing: {recent_path}"
        assert exists(baseline_path), f"Missing: {baseline_path}"
    # Interval pairs: apply actual year mask to degradation
    if is_interval:
        year = recent_key.split('_')[0]
        interval_mask_arr = read_raster_as_array(join(scenario_masks_dir, f"{year}.tif"))
    # Load inputs and calculate components
    if use_uncertainty:
        recent_mean = read_raster_as_array(recent_mean_path)
        recent_ci = read_raster_as_array(recent_ci_path)
        baseline_mean = read_raster_as_array(baseline_mean_path)
        baseline_ci = read_raster_as_array(baseline_ci_path)
        template_path = recent_mean_path if is_restoration else baseline_mean_path
        if is_restoration:
            result = calculate_restoration(
                recent_mean, baseline_mean, nodatavalue, has_reforestation, recent_ci, baseline_ci)
            if has_reforestation:
                (restoration, restoration_ci, restoration_uncertainty), \
                (reforestation, reforestation_ci, reforestation_uncertainty) = result
            else:
                restoration, restoration_ci, restoration_uncertainty = result
        else:
            result = calculate_disturbance(
                recent_mean, baseline_mean, nodatavalue, has_deforestation, recent_ci, baseline_ci)
            if has_deforestation:
                (deforestation, deforestation_ci, deforestation_uncertainty), \
                (degradation, degradation_ci, degradation_uncertainty) = result
            else:
                degradation, degradation_ci, degradation_uncertainty = result
        # Interval pairs: apply actual year mask to degradation
        if is_interval:
            degradation = np.where(interval_mask_arr == nodatavalue, nodatavalue, degradation)
            degradation_ci = np.where(interval_mask_arr == nodatavalue, nodatavalue, degradation_ci)
            degradation_uncertainty = np.where(interval_mask_arr == nodatavalue, nodatavalue, degradation_uncertainty)
        # Round restoration mean, zero uncertainty where mean rounds to zero
        if is_restoration:
            restoration_rounded = round_array(restoration, mean_precision)
            restoration_ci = np.where(restoration_rounded == 0, 0, restoration_ci)
            restoration_uncertainty = np.where(restoration_rounded == 0, 0, restoration_uncertainty)
            # Export restoration
            export_array_as_tif(restoration_rounded, output_paths['restoration_mean'], template=template_path)
            export_array_as_tif(round_array(restoration_ci, ci_precision), output_paths['restoration_ci'], template=template_path)
            export_array_as_tif(round_array(restoration_uncertainty, uncertainty_precision), output_paths['restoration_uncertainty'], template=template_path)
            # Round reforestation mean, zero uncertainty where mean rounds to zero
            if has_reforestation:
                reforestation_rounded = round_array(reforestation, mean_precision)
                reforestation_ci = np.where(reforestation_rounded == 0, 0, reforestation_ci)
                reforestation_uncertainty = np.where(reforestation_rounded == 0, 0, reforestation_uncertainty)
                # Export reforestation
                export_array_as_tif(reforestation_rounded, output_paths['reforestation_mean'], template=template_path)
                export_array_as_tif(round_array(reforestation_ci, ci_precision), output_paths['reforestation_ci'], template=template_path)
                export_array_as_tif(round_array(reforestation_uncertainty, uncertainty_precision), output_paths['reforestation_uncertainty'], template=template_path)
        # Round degradation mean, zero uncertainty where mean rounds to zero
        else:
            degradation_rounded = round_array(degradation, mean_precision)
            degradation_ci = np.where(degradation_rounded == 0, 0, degradation_ci)
            degradation_uncertainty = np.where(degradation_rounded == 0, 0, degradation_uncertainty)
            # Export degradation
            export_array_as_tif(degradation_rounded, output_paths['degradation_mean'], template=template_path)
            export_array_as_tif(round_array(degradation_ci, ci_precision), output_paths['degradation_ci'], template=template_path)
            export_array_as_tif(round_array(degradation_uncertainty, uncertainty_precision), output_paths['degradation_uncertainty'], template=template_path)
            # Round deforestation mean, zero uncertainty where mean rounds to zero
            if has_deforestation:
                deforestation_rounded = round_array(deforestation, mean_precision)
                deforestation_ci = np.where(deforestation_rounded == 0, 0, deforestation_ci)
                deforestation_uncertainty = np.where(deforestation_rounded == 0, 0, deforestation_uncertainty)
                # Export deforestation
                export_array_as_tif(deforestation_rounded, output_paths['deforestation_mean'], template=template_path)
                export_array_as_tif(round_array(deforestation_ci, ci_precision), output_paths['deforestation_ci'], template=template_path)
                export_array_as_tif(round_array(deforestation_uncertainty, uncertainty_precision), output_paths['deforestation_uncertainty'], template=template_path)
    else:
        recent_arr = read_raster_as_array(recent_path)
        baseline_arr = read_raster_as_array(baseline_path)
        template_path = recent_path if is_restoration else baseline_path
        if is_restoration:
            result = calculate_restoration(recent_arr, baseline_arr, nodatavalue, has_reforestation)
            if has_reforestation:
                restoration, reforestation = result
            else:
                restoration = result
        else:
            result = calculate_disturbance(recent_arr, baseline_arr, nodatavalue, has_deforestation)
            if has_deforestation:
                deforestation, degradation = result
            else:
                degradation = result
        # Interval pairs: apply actual year mask to degradation
        if is_interval:
            degradation = np.where(interval_mask_arr == nodatavalue, nodatavalue, degradation)
        # Export
        if is_restoration:
            export_array_as_tif(round_array(restoration, mean_precision), output_paths['restoration'], template=template_path)
            if has_reforestation:
                export_array_as_tif(round_array(reforestation, mean_precision), output_paths['reforestation'], template=template_path)
        else:
            export_array_as_tif(round_array(degradation, mean_precision), output_paths['degradation'], template=template_path)
            if has_deforestation:
                export_array_as_tif(round_array(deforestation, mean_precision), output_paths['deforestation'], template=template_path)

    progress_index += 1
    progress_label.value = f"Forest disturbance calculation progress: {progress_index}/{total_operations}"
print("All forest disturbance calculations complete.\n")

# Intactness

## Percentage loss

In [None]:
# Intactness is measured as relative percentage loss of AGBD within an area of interest

# Build list of alternate scenario baselines for intactness calculation
# Actual scenario is derived from alternate scenario (year prefix)
use_uncertainty = source_dir == uncertainty_dir

intactness_alternate_baselines = set()

for file in os.listdir(predictions_dir):
    is_mean = 'mean__' in file
    if use_uncertainty and not is_mean: continue
    if '_no_disturbance_since_' not in file: continue
    baseline_scenario = file.split('__')[1 if is_mean else 0]
    year = baseline_scenario.split('_')[0]
    actual_file = f"{'mean__' if is_mean else ''}{year}__{selected_model}.tif"
    if exists(join(predictions_dir, actual_file)):
        intactness_alternate_baselines.add(baseline_scenario)

print("intactness_alternate_baselines = [")
for baseline in sorted(intactness_alternate_baselines):
    print(f"    '{baseline}',")
print("]")

In [None]:
intactness_alternate_baselines = [
    '2021_no_disturbance_since_1993',
    '2021_no_disturbance_since_oldgrowth',
    '2024_no_disturbance_since_1996',
    '2024_no_disturbance_since_oldgrowth',
]

In [None]:
percentage_loss_precision = 0
percentage_ci_precision = 2

# Percentage loss quantifies AGBD change relative to baseline: (actual / baseline) * 100 - 100.
# Values >0 indicate AGBD gain, which (within scenario constraints) are artefacts of
# model instability. These are clipped to 0.

# Uncertainty (if used as source_dir) of percentage loss is propagated for the magnitude
# of percentage loss from AGBD disturbance, not forest state transitions.
# The Landsat-derived classification products (e.g. TMF) used for masking non-forest
# have their own measures of uncertainty. Therefore values of -100 % (deforestation)
# are set to a CI value of 0. Magnitude of deforestation is not measured in this case,
# only degradation (>-100 <0). Similarly, percentage loss of 0 is also set to a
# percentage loss CI of 0.

# For absolute differences (Z = X - Y) used for disturbance and restoration,
# IPCC Approach 1 (Equation 3.1) applies: CI_z = sqrt(CI_x**2 + CI_y**2).
# This holds because variances add for differences of normals, and CIs scale linearly
# with standard deviation under normality, preserving the CI interval symmetry.

# For ratios like percentage loss (Z = X / Y), there is no equivalent formula for CI_z.
# IPCC Approach 1 is not applicable to divisions like percentage loss.
# The distribution of the quotient is not normal even when X and Y are both normal.
# The resulting distribution has heavy tails and is inherently asymmetric.
# Monte Carlo simulation (IPCC Approach 2, IPCC 2006 and 2019) is therefore used instead,
# as in the initial uncertainty calculations of scenarios. Percentage loss for each
# prediction iteration pair is calculated, then confidence intervals from percentiles
# are derived from the empirical (actual rather than theoretical) distribution.
# Percentiles directly characterise the distribution without assuming normality,
# are robust to outliers, and handle bounded distributions (percentage loss <= 0).
# CI95 upper and lower are stored alongside the half-width for inspection of asymmetry.

# References:
# IPCC (2006) Guidelines Vol.1 Ch.3: Uncertainties, Section 3.2.3.2
# IPCC (2019) Refinement Vol.1 Ch.3: Uncertainties, Section 3.2.3.2

# Sort key for iteration files (1..100)
def iteration_sort_key(path):
    match = re.search(r'iteration_(\d+)', path)
    return int(match.group(1)) if match else 0

# Get sorted iteration paths for a scenario
def get_iteration_paths(scenario):
    iterations_dir = join(scenario_iterations_dir, f"{scenario}_iterations")
    return sorted(
        [join(iterations_dir, f) for f in os.listdir(iterations_dir) if f.endswith(".tif")],
        key=iteration_sort_key)

# Calculate percentage loss from actual and baseline arrays
def calculate_percentage(actual, baseline, nodata):
    actual = np.where(actual == nodata, np.nan, actual).astype(np.float64)
    baseline = np.where((baseline == nodata) | (baseline == 0), np.nan, baseline).astype(np.float64)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", RuntimeWarning)
        return (actual / baseline) * 100 - 100

# Progress labels
n_baselines = len(intactness_alternate_baselines)
baseline_progress_label = widgets.Label(value=f"Baseline progress: 0 / {n_baselines}")
iter_progress_label = widgets.Label(value="Iteration progress: -")
display(baseline_progress_label, iter_progress_label)

for baseline_index, baseline_scenario in enumerate(intactness_alternate_baselines):
    # Parse actual scenario from baseline scenario name
    year = baseline_scenario.split('_')[0]
    actual_scenario = year
    intactness_subdir = join(intactness_dir, baseline_scenario)
    makedirs(intactness_subdir, exist_ok=True)

    # Define output paths
    percentage_filename = f"percentage_loss__{baseline_scenario}__{selected_model}.tif"
    percentage_path = join(intactness_subdir, percentage_filename)
    ci_halfwidth_path = None
    ci_upper_path = None
    ci_lower_path = None
    if use_uncertainty:
        ci_halfwidth_path = join(intactness_subdir, f"ci_95_halfwidth__percentage_loss__{baseline_scenario}__{selected_model}.tif")
        ci_upper_path = join(intactness_subdir, f"ci_95_upper__percentage_loss__{baseline_scenario}__{selected_model}.tif")
        ci_lower_path = join(intactness_subdir, f"ci_95_lower__percentage_loss__{baseline_scenario}__{selected_model}.tif")
    outputs_exist = exists(percentage_path) and (not use_uncertainty or (exists(ci_halfwidth_path) and exists(ci_upper_path) and exists(ci_lower_path)))
    if outputs_exist:
        iter_progress_label.value = "Iteration progress: skipped (exists)"
        baseline_progress_label.value = f"Baseline progress: {baseline_index + 1} / {n_baselines}"
        continue

    if use_uncertainty:
        # Collect iteration paths for both scenarios
        actual_paths = get_iteration_paths(actual_scenario)
        baseline_paths = get_iteration_paths(baseline_scenario)
        n_iterations = len(actual_paths)
        iter_progress_label.value = f"Iteration progress: 0 / {n_iterations}"

        # Load first pair to define masks (consistent across all iterations)
        first_actual = read_raster_as_array(actual_paths[0])
        first_baseline = read_raster_as_array(baseline_paths[0])
        actual_nodata_mask = first_actual == nodatavalue
        baseline_nodata_mask = first_baseline == nodatavalue
        both_valid = ~baseline_nodata_mask & ~actual_nodata_mask
        deforestation_mask = ~baseline_nodata_mask & actual_nodata_mask
        zero_baseline_mask = both_valid & (first_baseline == 0)
        template_path = actual_paths[0]

        # Calculate percentage loss per iteration pair, accumulate for percentile calculation
        percentage_arrays = []
        for iter_index, (actual_path, baseline_path) in enumerate(zip(actual_paths, baseline_paths)):
            actual_iter = read_raster_as_array(actual_path)
            baseline_iter = read_raster_as_array(baseline_path)
            percentage_iter = calculate_percentage(actual_iter, baseline_iter, nodatavalue)
            percentage_arrays.append(percentage_iter)
            iter_progress_label.value = f"Iteration progress: {iter_index + 1} / {n_iterations}"

        # Stack and calculate mean and 95% CI from distribution
        percentage_stack = np.stack(percentage_arrays, axis=0)
        percentage_arrays = None

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            percentage_arr = np.nanmean(percentage_stack, axis=0)

            # Sort once, index at percentile positions. Faster than np.nanpercentile.
            # This works because nodata mask is identical across iterations.
            sorted_stack = np.sort(percentage_stack, axis=0)
            percentage_stack = None
            n = sorted_stack.shape[0]
            lower_idx = 0.025 * (n - 1)
            upper_idx = 0.975 * (n - 1)
            lower_floor = int(np.floor(lower_idx))
            upper_floor = int(np.floor(upper_idx))
            lower_frac = lower_idx - lower_floor
            upper_frac = upper_idx - upper_floor
            ci_lower_arr = (sorted_stack[lower_floor] * (1 - lower_frac) +
                            sorted_stack[min(lower_floor + 1, n - 1)] * lower_frac)
            ci_upper_arr = (sorted_stack[upper_floor] * (1 - upper_frac) +
                            sorted_stack[min(upper_floor + 1, n - 1)] * upper_frac)
            sorted_stack = None

        # CI halfwidth (symmetric approximation)
        ci_halfwidth_arr = (ci_upper_arr - ci_lower_arr) / 2

        # Set nodata pixels
        percentage_arr = np.where(baseline_nodata_mask, nodatavalue, percentage_arr)
        ci_halfwidth_arr = np.where(baseline_nodata_mask, nodatavalue, ci_halfwidth_arr)
        ci_upper_arr = np.where(baseline_nodata_mask, nodatavalue, ci_upper_arr)
        ci_lower_arr = np.where(baseline_nodata_mask, nodatavalue, ci_lower_arr)

    else:
        iter_progress_label.value = "Iteration progress: n/a (no uncertainty)"

        # Load single prediction rasters from scenarios dir
        baseline_path_in = join(predictions_dir, f"{baseline_scenario}__{selected_model}.tif")
        actual_path_in = join(predictions_dir, f"{actual_scenario}__{selected_model}.tif")
        baseline_arr = read_raster_as_array(baseline_path_in)
        actual_arr = read_raster_as_array(actual_path_in)
        template_path = baseline_path_in

        # Define masks
        actual_nodata_mask = actual_arr == nodatavalue
        baseline_nodata_mask = baseline_arr == nodatavalue
        both_valid = ~baseline_nodata_mask & ~actual_nodata_mask
        deforestation_mask = ~baseline_nodata_mask & actual_nodata_mask
        zero_baseline_mask = both_valid & (baseline_arr == 0)

        # Calculate percentage loss
        percentage_arr = calculate_percentage(actual_arr, baseline_arr, nodatavalue)
        percentage_arr = np.where(baseline_nodata_mask, nodatavalue, percentage_arr)

    # Deforestation: -100% loss, zero CI
    percentage_arr[deforestation_mask] = -100
    if use_uncertainty:
        ci_halfwidth_arr[deforestation_mask] = 0
        ci_upper_arr[deforestation_mask] = -100
        ci_lower_arr[deforestation_mask] = -100

    # Zero baseline: 0% loss, zero CI
    percentage_arr[zero_baseline_mask] = 0
    if use_uncertainty:
        ci_halfwidth_arr[zero_baseline_mask] = 0
        ci_upper_arr[zero_baseline_mask] = 0
        ci_lower_arr[zero_baseline_mask] = 0

    # Clip percentage to [-100, 0]
    percentage_arr[both_valid] = np.clip(percentage_arr[both_valid], -100, 0)

    # Round outputs. Zero CI where percentage rounds to zero.
    percentage_arr = round_array(percentage_arr, percentage_loss_precision)
    if use_uncertainty:
        ci_halfwidth_arr = np.where(percentage_arr == 0, 0, ci_halfwidth_arr)
        ci_upper_arr = np.where(percentage_arr == 0, 0, ci_upper_arr)
        ci_lower_arr = np.where(percentage_arr == 0, 0, ci_lower_arr)
        ci_halfwidth_arr = round_array(ci_halfwidth_arr, percentage_ci_precision)
        ci_upper_arr = round_array(ci_upper_arr, percentage_ci_precision)
        ci_lower_arr = round_array(ci_lower_arr, percentage_ci_precision)

    # Export outputs
    export_array_as_tif(percentage_arr, percentage_path, template=template_path)
    if use_uncertainty:
        export_array_as_tif(ci_halfwidth_arr, ci_halfwidth_path, template=template_path)
        export_array_as_tif(ci_upper_arr, ci_upper_path, template=template_path)
        export_array_as_tif(ci_lower_arr, ci_lower_path, template=template_path)

    baseline_progress_label.value = f"Baseline progress: {baseline_index + 1} / {n_baselines}"

print("Percentage loss calculations complete.")

## Quantiles (relative intactness)

In [None]:
# Use additional polygons for masking relative intactness quantiles
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg', 'prediction_area.gpkg']

# Select alternate scenario / degradation pairs to measure relative intactness
print("baseline_percentage_loss_list = [")
for dir in os.listdir(intactness_dir):
  print(f"'{dir}',")
print("]\n")

# Select polygons to mask and calculate quantiles
print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"'{polygon}',")
print("]")

In [None]:
baseline_percentage_loss_list = [
'2021_no_disturbance_since_1993',
'2021_no_disturbance_since_oldgrowth',
'2024_no_disturbance_since_1996',
'2024_no_disturbance_since_oldgrowth',
]

mask_polygons = [
# 'project_area.gpkg',
# 'peninsular_malaysia.gpkg',
# 'lu_yong.gpkg',
# 'lu_yong_lipis.gpkg',
# 'lu_berkelah_jerantut.gpkg',
# 'lu_tekai_tembeling.gpkg',
# 'lu_ais.gpkg',
# 'lu_tekam.gpkg',
# 'lu_berkelah_temerloh.gpkg',
# 'lu_remen_chereh.gpkg',
# 'lu_berkelah_kuantan.gpkg',
'forest_reserves.gpkg',
# 'lu_old-growth_protected_areas.gpkg',
# 'road_mat_daling.gpkg',
# 'road_mat_daling_buffered_30.gpkg',
# 'asartr_phase_2.gpkg',
# 'tekai_tembeling.gpkg',
# 'non_forest_biome.gpkg',
]

In [None]:
# Define top score for intactness rating (e.g. 10 for 1 - 10 scale)
top_score = 10

# Calculate actual number of quantiles for non-zero values
num_quantiles = top_score - 1

print(f"Calculating {num_quantiles} quantiles for negative percentage loss (scores 1-{num_quantiles}), with score {top_score} reserved for 0% difference.\n")

# Ensure prediction_area.gpkg is always processed, plus any selected mask_polygons
required_polygon = 'prediction_area.gpkg'
if required_polygon not in mask_polygons:
    mask_polygons_full = [required_polygon] + [p for p in mask_polygons if p is not None]
else:
    mask_polygons_full = [p for p in mask_polygons if p is not None]

# Create polygon mask array using template tif
template_array = read_raster_as_array(template_tif_path)

polygon_masks = {}
for mask_polygon in mask_polygons_full:
    # Create an inverse project area path for masking
    template_polygon_path = join(polygons_dir, "template.gpkg")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    if not exists(inverse_polygon_path):
        polygon_path = join(polygons_dir, mask_polygon)
        template_polygon = gpd.read_file(template_polygon_path)
        polygon_read = gpd.read_file(polygon_path)
        polygon_crs = polygon_read.crs.to_epsg()
        inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
        inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
        inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
        print(f"An inverse masking polygon for {mask_polygon} has been created in {polygons_dir}.")
    else: print(f"An inverse masking polygon for {mask_polygon} already exists.")

    # Create and store individual mask for this polygon
    print(f"Creating polygon mask for {mask_polygon}.")
    temp_mask_path = join(intactness_dir, f"temp_mask_{mask_polygon[:-5]}.tif")
    copyfile(template_tif_path, temp_mask_path)
    burn_polygon_to_raster(temp_mask_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
    temp_mask_array = read_raster_as_array(temp_mask_path)
    individual_mask = np.ones_like(template_array, dtype=bool)
    individual_mask[temp_mask_array == nodatavalue] = False
    polygon_masks[mask_polygon] = individual_mask
    os.remove(temp_mask_path)

for baseline_name in baseline_percentage_loss_list:
    intactness_baseline_dir = join(intactness_dir, baseline_name)
    percentage_filename = f"percentage_loss__{baseline_name}__{selected_model}"
    percentage_path = join(intactness_baseline_dir, f"{percentage_filename}.tif")

    # Load scenario mask for this year
    year = baseline_name[:4]
    scenario_mask_path = join(scenario_masks_dir, f"{year}.tif")
    scenario_mask_array = read_raster_as_array(scenario_mask_path)
    scenario_valid_mask = scenario_mask_array == 1

    for mask_polygon in mask_polygons_full:
        # Create subdirectory for this polygon within baseline directory
        polygon_subdir = join(intactness_baseline_dir, mask_polygon[:-5])
        makedirs(polygon_subdir, exist_ok=True)

        # Copy the percentage raster for potential masking
        percentage_masked_filename = f"{percentage_filename}__masked_{mask_polygon[:-5]}.tif"
        percentage_masked_path = join(polygon_subdir, percentage_masked_filename)
        if not exists(percentage_masked_path):
            print(f"Copying {percentage_filename} for masking...")
            copyfile(percentage_path, percentage_masked_path)
            print(f"Masking {percentage_filename} with {mask_polygon}...")
            inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
            burn_polygon_to_raster(percentage_masked_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
            # Recompress the prediction after burning the polygon masks
            percentage_masked_array = read_raster_as_array(percentage_masked_path)
            export_array_as_tif(percentage_masked_array, percentage_masked_path)
            print(f"{percentage_filename} masked.")
        else: print(f"{percentage_masked_filename} already exists.")

        # Define output paths
        relative_intactness_name = f'intactness__{mask_polygon[:-5]}_{top_score}_quantiles__{baseline_name}__{selected_model}'
        relative_intactness_path = join(polygon_subdir, f'{relative_intactness_name}.tif')
        if exists(relative_intactness_path):
            print(f"{relative_intactness_name} already exists.")
            continue

        # Load original percentage raster and track nodata pixels
        original_percentage_array = read_raster_as_array(percentage_path)
        originally_nodata_mask = original_percentage_array == nodatavalue

        # Apply polygon masking to percentage array using pre-created mask
        percentage_array = original_percentage_array.copy()
        percentage_array[~polygon_masks[mask_polygon]] = nodatavalue

        # Identify deforestation: valid percentage data outside scenario mask
        deforestation_mask = (percentage_array != nodatavalue) & (~scenario_valid_mask)

        # Capture degradation data for histogram (within scenario mask only)
        original_valid_elements = percentage_array[(percentage_array != nodatavalue) & scenario_valid_mask]

        relative_intactness_array = np.full_like(percentage_array, nodatavalue, dtype=np.int16)

        # Set all values above 0 to 0
        percentage_array[percentage_array > 0] = 0

        # Separate zero and non-zero values within scenario mask only
        zero_elements = (percentage_array == 0) & scenario_valid_mask
        # Exclude nodata and deforestation from quantile calculation
        quantile_mask = (percentage_array != nodatavalue) & (~originally_nodata_mask) & (percentage_array != 0) & scenario_valid_mask
        non_zero_valid_elements = percentage_array[quantile_mask]

        # Calculate quantiles for non-zero valid elements only
        quantiles = np.percentile(non_zero_valid_elements, np.linspace(0, 100, num_quantiles + 1)[1:-1]) if len(non_zero_valid_elements) > 0 else []

        # Assign scores 1 to num_quantiles for non-zero values
        for i in range(1, num_quantiles + 1):
            lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
            upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
            relative_intactness_array[
                (percentage_array > lower_bound) & (percentage_array <= upper_bound) &
                (percentage_array != 0) & (percentage_array != nodatavalue) & scenario_valid_mask] = i

        # Set zero percentage loss to top score
        relative_intactness_array[zero_elements] = top_score

        # Set deforestation areas to score 0
        relative_intactness_array[deforestation_mask] = 0

        # Set areas outside polygon to nodatavalue using pre-created mask
        relative_intactness_array[~polygon_masks[mask_polygon]] = nodatavalue

        export_array_as_tif(relative_intactness_array, relative_intactness_path)

        # Prepare data for CSV: Collect lower and upper bounds for each category
        ranges_data = {'Score': [], 'Lower_Bound': [], 'Upper_Bound': []}

        # Add ranges for scores 1 to num_quantiles (non-zero values)
        for i in range(1, num_quantiles + 1):
            lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
            if i == num_quantiles: upper_bound = -0.000000001
            else: upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
            ranges_data['Score'].append(i)
            ranges_data['Lower_Bound'].append(lower_bound)
            ranges_data['Upper_Bound'].append(upper_bound)

        # Add entry for top score (value of 0)
        ranges_data['Score'].append(top_score)
        ranges_data['Lower_Bound'].append(0)
        ranges_data['Upper_Bound'].append(0)

        # Create DataFrame and save to CSV
        relative_intactness_df = pd.DataFrame(ranges_data)
        relative_intactness_csv_path = join(polygon_subdir, f'{relative_intactness_name}.csv')
        relative_intactness_df.to_csv(relative_intactness_csv_path, index=False)

        # Generate and save histogram for degradation data as .png
        histogram_path = join(polygon_subdir, f'{relative_intactness_name}.png')
        plt.figure()
        counts, bins, patches = plt.hist(original_valid_elements.flatten(), bins=100)

        # Count deforested pixels (score 0)
        deforestation_count = np.sum(deforestation_mask)

        # Find the zero bin and set its frequency to 0 for display
        zero_idx = next((i for i, (l, r) in enumerate(zip(bins[:-1], bins[1:])) if l <= 0 <= r), None)
        if zero_idx is not None:
            counts[zero_idx] = 0
            plt.clf()
            plt.bar(bins[:-1], counts, width=np.diff(bins), align='edge')
            x_center = (bins.min() + bins.max()) / 2
            y_max = max(counts)
            plt.text(x_center, y_max * 0.9,
                    f'Deforested pixels (score 0) = {deforestation_count:,}',
                    ha='center', va='center', fontweight='bold',
                    bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9))
        plt.title(f'{relative_intactness_name} Histogram')
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: format(int(x), ',')))
        plt.tight_layout()
        plt.savefig(histogram_path)
        plt.close()

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()