<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/8_differences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas
!pip install rasterio
!apt-get install -y gdal-bin

In [None]:
# Imports
import geopandas as gpd
from google.colab import runtime
from os import makedirs
from os.path import join, exists
from osgeo import gdal, ogr
gdal.UseExceptions()
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.features import rasterize
from shutil import copyfile

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
scenarios_dir = join(base_dir, "6_scenarios")
uncertainty_dir = join(base_dir, "7_uncertainty")
differences_dir = join(base_dir, "8_differences")

# Create directories
makedirs(differences_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -11111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ['COMPRESS=ZSTD', 'ZSTD_LEVEL=1'] # Good speed / size ratio
    else: options = []
    if input_array.dtype == 'int16': dtype = gdal.GDT_Int16
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Global function: burn a polygon to raster
def burn_polygon_to_raster(raster_path, polygon_path, fixed=True, fixed_value=1, column_name=None, all_touched=True):
    raster = vector = None
    try:
        raster = gdal.Open(raster_path, gdal.GA_Update)
        vector = ogr.Open(polygon_path)
        if not raster or not vector:
            raise ValueError("Cannot open input files")
        layer = vector.GetLayer()
        options = ["ALL_TOUCHED=TRUE"] if all_touched else []
        if fixed:
            gdal.RasterizeLayer(raster, [1], layer, burn_values=[fixed_value], options=options)
        else:
            attr_name = column_name or layer.GetLayerDefn().GetFieldDefn(0).GetName()
            options.append(f"ATTRIBUTE={attr_name}")
            gdal.RasterizeLayer(raster, [1], layer, options=options)
    finally:
        if raster: raster.FlushCache()
        raster = vector = None

# Select source and model

In [None]:
# Select if to source predictions from scenarios_dir or uncertainty_dir
# If available, uncertainty_dir should be selected so that uncertainty can
# be propagated and scenario 'mean' iteration values used.

source_dir = uncertainty_dir
# source_dir = scenarios_dir

print(f"{source_dir.split('/')[-1]} has been selected as the source directory for predictions")
print("to calculate disturbance and intactness.\n")

# If uncertainty selected, check it exists
if not exists(uncertainty_dir) and source_dir == uncertainty_dir:
  print("The uncertainty directory does not yet exist. Defaulting to scenarios directory.")
  source_dir = scenarios_dir

source_dir_name = f"{source_dir.split('_')[-1]}_dir"

# Select the model
for subdir in os.listdir(source_dir):
  if 'scenario_masks' not in subdir:
    print(f"selected_model = '{subdir}'")

In [None]:
selected_model = 'agbd_251203_161707'

selected_model_dir = join(source_dir, selected_model)
if source_dir == scenarios_dir: predictions_dir = join(selected_model_dir, 'scenario_predictions')
if source_dir == uncertainty_dir:
  predictions_dir = join(selected_model_dir, 'uncertainty_predictions')
  predictions_unmasked_dir = join(selected_model_dir, 'uncertainty_predictions_unmasked')

scenario_masks_dir = join(scenarios_dir, selected_model, "scenario_masks")

# Check predictions exist to calculate differences
if len(os.listdir(predictions_dir)) < 2: print(f"At least 2 predictions must exist in {source_dir} to calculate differences.")
else:
  model_differences_dir = join(differences_dir, f"{selected_model}_{source_dir_name}")
  disturbance_dir = join(model_differences_dir, 'disturbance')
  intactness_dir = join(model_differences_dir, 'intactness')
  restoration_dir = join(model_differences_dir, 'restoration')
  makedirs(model_differences_dir, exist_ok=True)
  makedirs(disturbance_dir, exist_ok=True)
  makedirs(intactness_dir, exist_ok=True)

# Scenario differences

## Define type and period

In [None]:
# Forest change is measured as absolute AGBD loss
# This block builds dictionaries of options based on available files/
# All dictionaries output 2-tuples: (deforestation, degradation) or degradation-only strings
# Decomposition uses nodata patterns:
# - Deforestation: pixel transitions from forest (data) to non-forest (nodata)
# - Degradation: pixel remains forest but loses AGBD
# Deforestation AGBD loss only calculated for oldgrowth and area-based baselines.
# Deforestation is the cumulative process from intact forest to complete removal.

# Extract all available scenarios from scenarios predictions directory
if source_dir == scenarios_dir:
    scenarios = set()
    for file in os.listdir(predictions_dir):
        scenarios.add(file.split("__")[0])
# Or extract all available scenarios from uncertainty predictions directory
if source_dir == uncertainty_dir:
    prediction_stats = {}
    for file in os.listdir(predictions_dir):
        parts = file.split("__")
        if len(parts) >= 2:
            stat, scenario = parts[0], parts[1]
            if scenario not in prediction_stats:
                prediction_stats[scenario] = set()
            prediction_stats[scenario].add(stat)
    scenarios = {scenario for scenario, stats in prediction_stats.items()
                 if 'uncertainty' in stats and 'mean' in stats}
# Categorise years from scenarios
years = set()
plain_years = set()
for s in scenarios:
    if s.isdigit():
        years.add(int(s))
        plain_years.add(int(s))
    elif "_no_disturbance_since_" in s:
        year = s.split("_")[0]
        if year.isdigit():
            years.add(int(year))
        since_part = s.split("_since_")[1]
        if since_part.isdigit():
            years.add(int(since_part) - 1)
years_sorted = sorted(list(years))


# 1. Disturbance since dictionary
# Compares actual AGBD against no_disturbance counterfactual.
# Keys: (actual_year, counterfactual_scenario)
# Values: (deforestation_name, degradation_name) for oldgrowth, degradation_name otherwise
print("disturbance_since_dictionary = {\n")
disturbance_since_dictionary = {}
for year_a in years_sorted:
    a_str = str(year_a)
    if year_a not in plain_years: continue
    # Oldgrowth entry first
    counterfactual_oldgrowth = f"{a_str}_no_disturbance_since_oldgrowth"
    if counterfactual_oldgrowth in scenarios:
        print(f"  # Disturbance in {a_str} caused by events since oldgrowth")
        print(f"  ('{a_str}', '{counterfactual_oldgrowth}'):")
        print(f"    ('{a_str}_deforestation_since_oldgrowth',")
        print(f"     '{a_str}_degradation_since_oldgrowth'),")
        print("")
        disturbance_since_dictionary[(a_str, counterfactual_oldgrowth)] = (
            f"{a_str}_deforestation_since_oldgrowth",
            f"{a_str}_degradation_since_oldgrowth")
    # Year-based entries in chronological order
    for year_b in years_sorted:
        if year_a <= year_b: continue
        b_plus1 = str(year_b + 1)
        counterfactual_scenario = f"{a_str}_no_disturbance_since_{b_plus1}"
        if counterfactual_scenario in scenarios:
            print(f"  # Disturbance in {a_str} caused by events since {b_plus1}")
            print(f"  ('{a_str}', '{counterfactual_scenario}'):")
            print(f"    '{a_str}_degradation_since_{b_plus1}',")
            print("")
            disturbance_since_dictionary[(a_str, counterfactual_scenario)] = \
                f"{a_str}_degradation_since_{b_plus1}"
print("}\n")


# 2. Degradation interval dictionary
# Calculates degradation over a multi-year interval from a baseline
# (oldgrowth or earliest available counterfactual) to a recent year.
# Single-year intervals handled by degradation_single_year_dictionary.
# Keys: (counterfactual_recent, counterfactual_baseline) counterfactual pairs
# Values: degradation_name
print("degradation_interval_dictionary = {\n")
degradation_interval_dictionary = {}
for y in years_sorted:
    y_str = str(y)
    if y not in plain_years: continue
    # Collect available no_disturbance counterfactuals
    counterfactual_dist = {}
    if f"{y_str}_no_disturbance_since_oldgrowth" in scenarios:
        counterfactual_dist["oldgrowth"] = f"{y_str}_no_disturbance_since_oldgrowth"
    for since_y in years_sorted:
        if since_y >= y:
            continue
        sp = str(since_y + 1)
        dist_counterfactual = f"{y_str}_no_disturbance_since_{sp}"
        if dist_counterfactual in scenarios:
            counterfactual_dist[sp] = dist_counterfactual
    if len(counterfactual_dist) < 2: continue
    since_parts = sorted([k for k in counterfactual_dist if k != "oldgrowth"], key=int)
    # Check if any multi-year intervals exist for this year
    has_entries = False
    if "oldgrowth" in counterfactual_dist and since_parts:
        has_entries = True
    if len(since_parts) > 1:
        earliest = min(since_parts, key=int)
        for sp in since_parts:
            if sp != earliest and int(sp) - int(earliest) > 1:
                has_entries = True
                break
    if not has_entries: continue
    # Oldgrowth baseline pairs first
    if "oldgrowth" in counterfactual_dist:
        for sp in since_parts:
            recent_year = str(int(sp) - 1)
            print(f"  # Degradation in {y_str} from events since oldgrowth to {recent_year}")
            print(f"  ('{counterfactual_dist[sp]}', '{counterfactual_dist['oldgrowth']}'):")
            print(f"    '{y_str}_degradation_from_oldgrowth_to_{recent_year}',")
            print("")
            degradation_interval_dictionary[(counterfactual_dist[sp], counterfactual_dist["oldgrowth"])] = \
                f"{y_str}_degradation_from_oldgrowth_to_{recent_year}"
    # Earliest year baseline pairs (excluding single-year differences)
    if len(since_parts) > 1:
        earliest = min(since_parts, key=int)
        for sp in since_parts:
            if sp != earliest and int(sp) - int(earliest) > 1:
                recent_year = str(int(sp) - 1)
                print(f"  # Degradation in {y_str} from events in {earliest} to {recent_year}")
                print(f"  ('{counterfactual_dist[sp]}', '{counterfactual_dist[earliest]}'):")
                print(f"    '{y_str}_degradation_from_{earliest}_to_{recent_year}',")
                print("")
                degradation_interval_dictionary[(counterfactual_dist[sp], counterfactual_dist[earliest])] = \
                    f"{y_str}_degradation_from_{earliest}_to_{recent_year}"
print("}\n")


# 3. Degradation single year dictionary
# Calculates degradation effect from a single year using consecutive counterfactuals.
# Keys: (counterfactual_next, counterfactual_current) or (actual, counterfactual_current) for same-year
# Values: degradation_name
print("degradation_single_year_dictionary = {\n")
degradation_single_year_dictionary = {}
for y in years_sorted:
    y_str = str(y)
    if y not in plain_years: continue
    # Collect available no_disturbance counterfactuals
    counterfactual_dist = {}
    for since_y in years_sorted:
        if since_y >= y: continue
        sp = str(since_y + 1)
        dist_counterfactual = f"{y_str}_no_disturbance_since_{sp}"
        if dist_counterfactual in scenarios:
            counterfactual_dist[int(sp)] = dist_counterfactual
    if not counterfactual_dist: continue
    since_years = sorted(counterfactual_dist.keys())
    # Consecutive year pairs
    for i in range(len(since_years) - 1):
        current_year = since_years[i]
        next_year = since_years[i + 1]
        if next_year == current_year + 1:
            effect_year = current_year
            print(f"  # Degradation in {y_str} from events in {effect_year}")
            print(f"  ('{counterfactual_dist[next_year]}', '{counterfactual_dist[current_year]}'):")
            print(f"    '{y_str}_effect_of_degradation_in_{effect_year}',")
            print("")
            degradation_single_year_dictionary[(counterfactual_dist[next_year], counterfactual_dist[current_year])] = \
                f"{y_str}_effect_of_degradation_in_{effect_year}"
    # Same-year case (actual vs no_disturbance_since_year)
    max_since = max(since_years)
    if max_since == y:
        print(f"  # Degradation in {y_str} from events in {y}")
        print(f"  ('{y_str}', '{counterfactual_dist[max_since]}'):")
        print(f"    '{y_str}_effect_of_degradation_in_{y}',")
        print("")
        degradation_single_year_dictionary[(y_str, counterfactual_dist[max_since])] = \
            f"{y_str}_effect_of_degradation_in_{y}"
print("}\n")


# 4. Disturbance area dictionary
# Calculates disturbance from polygon-based alternate scenarios.
# Unlike historical counterfactuals, these represent forecast scenarios of potential
# loss, so AGBD loss from deforestation is calculated.
# Keys: (alternate_scenario, actual_year)
# Values: (deforestation_name, degradation_name)
print("disturbance_area_dictionary = {\n")
disturbance_area_dictionary = {}
polygon_names = set()
if os.path.exists(polygons_dir):
    for file in os.listdir(polygons_dir):
        if file.endswith('.gpkg'):
            polygon_names.add(file[:-5])
area_based_entries = []
for scenario in scenarios:
    parts = scenario.split('_')
    # Check for deforestation scenarios (ends with "Xm_degradation_buffer")
    if len(parts) >= 5 and parts[-1] == 'buffer' and parts[-2] == 'degradation' and parts[-3].endswith('m'):
        alt_year, year_affix, dist_type = parts[0], parts[-4], parts[-5]
        polygon_name = '_'.join(parts[1:-5])
        if polygon_name in polygon_names and dist_type == 'deforestation':
            output_base = f"{alt_year}_deforestation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_base))
    # Check for degradation scenarios (ends with "degradation_YYYY")
    elif len(parts) >= 3 and parts[-2] == 'degradation' and parts[-1].isdigit() and len(parts[-1]) == 4:
        alt_year, year_affix = parts[0], parts[-1]
        polygon_name = '_'.join(parts[1:-2])
        if polygon_name in polygon_names:
            output_base = f"{alt_year}_degradation_of_{polygon_name}_{year_affix}"
            area_based_entries.append((scenario, alt_year, output_base))
if area_based_entries:
    for scenario, alt_year, output_base in sorted(area_based_entries):
        print(f"  # Area-based disturbance: {output_base}")
        print(f"  ('{scenario}', '{alt_year}'):")
        print(f"    ('{output_base}_deforestation',")
        print(f"     '{output_base}_degradation'),")
        print("")
        disturbance_area_dictionary[(scenario, alt_year)] = (
            f"{output_base}_deforestation",
            f"{output_base}_degradation")

print("}\n")


# 5. Restoration potential dictionary
# Calculates potential AGBD gain from restoration to oldgrowth state.
# Unlike disturbance dictionaries, output is positive (gain not loss).
# Recovery: gain in existing forest (oldgrowth_recovery scenario).
# Reforestation and recovery: gain including cleared areas (no_disturbance_since_oldgrowth).
# Keys: (restoration_scenario, actual_year)
# Values: restoration_potential_name
print("restoration_potential_dictionary = {\n")
restoration_potential_dictionary = {}
for year_a in years_sorted:
    a_str = str(year_a)
    if year_a not in plain_years: continue
    # Recovery potential (existing forest only)
    recovery_scenario = f"{a_str}_oldgrowth_recovery"
    if recovery_scenario in scenarios:
        print(f"  # Recovery potential in {a_str}")
        print(f"  ('{recovery_scenario}', '{a_str}'):")
        print(f"    '{a_str}_recovery_potential',")
        print("")
        restoration_potential_dictionary[(recovery_scenario, a_str)] = \
            f"{a_str}_recovery_potential"
    # Reforestation and recovery potential (including cleared areas)
    counterfactual_oldgrowth = f"{a_str}_no_disturbance_since_oldgrowth"
    if counterfactual_oldgrowth in scenarios:
        print(f"  # Reforestation and recovery potential in {a_str}")
        print(f"  ('{counterfactual_oldgrowth}', '{a_str}'):")
        print(f"    '{a_str}_reforestation_and_recovery_potential',")
        print("")
        restoration_potential_dictionary[(counterfactual_oldgrowth, a_str)] = \
            f"{a_str}_reforestation_and_recovery_potential"
print("}\n")

In [None]:
disturbance_since_dictionary = {

  # Disturbance in 2021 caused by events since oldgrowth
  ('2021', '2021_no_disturbance_since_oldgrowth'):
    ('2021_deforestation_since_oldgrowth',
     '2021_degradation_since_oldgrowth'),

  # Disturbance in 2021 caused by events since 1993
  ('2021', '2021_no_disturbance_since_1993'):
    '2021_degradation_since_1993',

  # Disturbance in 2024 caused by events since oldgrowth
  ('2024', '2024_no_disturbance_since_oldgrowth'):
    ('2024_deforestation_since_oldgrowth',
     '2024_degradation_since_oldgrowth'),

  # Disturbance in 2024 caused by events since 1996
  ('2024', '2024_no_disturbance_since_1996'):
    '2024_degradation_since_1996',

  # # Disturbance in 2024 caused by events since 1997
  # ('2024', '2024_no_disturbance_since_1997'):
  #   '2024_degradation_since_1997',

  # # Disturbance in 2024 caused by events since 1998
  # ('2024', '2024_no_disturbance_since_1998'):
  #   '2024_degradation_since_1998',

  # # Disturbance in 2024 caused by events since 1999
  # ('2024', '2024_no_disturbance_since_1999'):
  #   '2024_degradation_since_1999',

  # # Disturbance in 2024 caused by events since 2000
  # ('2024', '2024_no_disturbance_since_2000'):
  #   '2024_degradation_since_2000',

  # # Disturbance in 2024 caused by events since 2001
  # ('2024', '2024_no_disturbance_since_2001'):
  #   '2024_degradation_since_2001',

  # # Disturbance in 2024 caused by events since 2002
  # ('2024', '2024_no_disturbance_since_2002'):
  #   '2024_degradation_since_2002',

  # # Disturbance in 2024 caused by events since 2003
  # ('2024', '2024_no_disturbance_since_2003'):
  #   '2024_degradation_since_2003',

  # # Disturbance in 2024 caused by events since 2004
  # ('2024', '2024_no_disturbance_since_2004'):
  #   '2024_degradation_since_2004',

  # # Disturbance in 2024 caused by events since 2005
  # ('2024', '2024_no_disturbance_since_2005'):
  #   '2024_degradation_since_2005',

  # # Disturbance in 2024 caused by events since 2006
  # ('2024', '2024_no_disturbance_since_2006'):
  #   '2024_degradation_since_2006',

  # # Disturbance in 2024 caused by events since 2007
  # ('2024', '2024_no_disturbance_since_2007'):
  #   '2024_degradation_since_2007',

  # # Disturbance in 2024 caused by events since 2008
  # ('2024', '2024_no_disturbance_since_2008'):
  #   '2024_degradation_since_2008',

  # # Disturbance in 2024 caused by events since 2009
  # ('2024', '2024_no_disturbance_since_2009'):
  #   '2024_degradation_since_2009',

  # # Disturbance in 2024 caused by events since 2010
  # ('2024', '2024_no_disturbance_since_2010'):
  #   '2024_degradation_since_2010',

  # # Disturbance in 2024 caused by events since 2011
  # ('2024', '2024_no_disturbance_since_2011'):
  #   '2024_degradation_since_2011',

  # # Disturbance in 2024 caused by events since 2012
  # ('2024', '2024_no_disturbance_since_2012'):
  #   '2024_degradation_since_2012',

  # # Disturbance in 2024 caused by events since 2013
  # ('2024', '2024_no_disturbance_since_2013'):
  #   '2024_degradation_since_2013',

  # # Disturbance in 2024 caused by events since 2014
  # ('2024', '2024_no_disturbance_since_2014'):
  #   '2024_degradation_since_2014',

  # # Disturbance in 2024 caused by events since 2015
  # ('2024', '2024_no_disturbance_since_2015'):
  #   '2024_degradation_since_2015',

  # # Disturbance in 2024 caused by events since 2016
  # ('2024', '2024_no_disturbance_since_2016'):
  #   '2024_degradation_since_2016',

  # # Disturbance in 2024 caused by events since 2017
  # ('2024', '2024_no_disturbance_since_2017'):
  #   '2024_degradation_since_2017',

  # # Disturbance in 2024 caused by events since 2018
  # ('2024', '2024_no_disturbance_since_2018'):
  #   '2024_degradation_since_2018',

  # # Disturbance in 2024 caused by events since 2019
  # ('2024', '2024_no_disturbance_since_2019'):
  #   '2024_degradation_since_2019',

  # # Disturbance in 2024 caused by events since 2020
  # ('2024', '2024_no_disturbance_since_2020'):
  #   '2024_degradation_since_2020',

  # # Disturbance in 2024 caused by events since 2021
  # ('2024', '2024_no_disturbance_since_2021'):
  #   '2024_degradation_since_2021',

  # # Disturbance in 2024 caused by events since 2022
  # ('2024', '2024_no_disturbance_since_2022'):
  #   '2024_degradation_since_2022',

  # # Disturbance in 2024 caused by events since 2023
  # ('2024', '2024_no_disturbance_since_2023'):
  #   '2024_degradation_since_2023',

  # # Disturbance in 2024 caused by events since 2024
  # ('2024', '2024_no_disturbance_since_2024'):
  #   '2024_degradation_since_2024',

}

degradation_interval_dictionary = {

  # Degradation in 2021 from events since oldgrowth to 1992
  ('2021_no_disturbance_since_1993', '2021_no_disturbance_since_oldgrowth'):
    '2021_degradation_from_oldgrowth_to_1992',

  # Degradation in 2024 from events since oldgrowth to 1995
  ('2024_no_disturbance_since_1996', '2024_no_disturbance_since_oldgrowth'):
    '2024_degradation_from_oldgrowth_to_1995',

  # # Degradation in 2024 from events since oldgrowth to 1996
  # ('2024_no_disturbance_since_1997', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1996',

  # # Degradation in 2024 from events since oldgrowth to 1997
  # ('2024_no_disturbance_since_1998', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1997',

  # # Degradation in 2024 from events since oldgrowth to 1998
  # ('2024_no_disturbance_since_1999', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1998',

  # # Degradation in 2024 from events since oldgrowth to 1999
  # ('2024_no_disturbance_since_2000', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_1999',

  # # Degradation in 2024 from events since oldgrowth to 2000
  # ('2024_no_disturbance_since_2001', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2000',

  # # Degradation in 2024 from events since oldgrowth to 2001
  # ('2024_no_disturbance_since_2002', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2001',

  # # Degradation in 2024 from events since oldgrowth to 2002
  # ('2024_no_disturbance_since_2003', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2002',

  # # Degradation in 2024 from events since oldgrowth to 2003
  # ('2024_no_disturbance_since_2004', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2003',

  # # Degradation in 2024 from events since oldgrowth to 2004
  # ('2024_no_disturbance_since_2005', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2004',

  # # Degradation in 2024 from events since oldgrowth to 2005
  # ('2024_no_disturbance_since_2006', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2005',

  # # Degradation in 2024 from events since oldgrowth to 2006
  # ('2024_no_disturbance_since_2007', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2006',

  # # Degradation in 2024 from events since oldgrowth to 2007
  # ('2024_no_disturbance_since_2008', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2007',

  # # Degradation in 2024 from events since oldgrowth to 2008
  # ('2024_no_disturbance_since_2009', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2008',

  # # Degradation in 2024 from events since oldgrowth to 2009
  # ('2024_no_disturbance_since_2010', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2009',

  # # Degradation in 2024 from events since oldgrowth to 2010
  # ('2024_no_disturbance_since_2011', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2010',

  # # Degradation in 2024 from events since oldgrowth to 2011
  # ('2024_no_disturbance_since_2012', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2011',

  # # Degradation in 2024 from events since oldgrowth to 2012
  # ('2024_no_disturbance_since_2013', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2012',

  # # Degradation in 2024 from events since oldgrowth to 2013
  # ('2024_no_disturbance_since_2014', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2013',

  # # Degradation in 2024 from events since oldgrowth to 2014
  # ('2024_no_disturbance_since_2015', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2014',

  # # Degradation in 2024 from events since oldgrowth to 2015
  # ('2024_no_disturbance_since_2016', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2015',

  # # Degradation in 2024 from events since oldgrowth to 2016
  # ('2024_no_disturbance_since_2017', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2016',

  # # Degradation in 2024 from events since oldgrowth to 2017
  # ('2024_no_disturbance_since_2018', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2017',

  # # Degradation in 2024 from events since oldgrowth to 2018
  # ('2024_no_disturbance_since_2019', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2018',

  # # Degradation in 2024 from events since oldgrowth to 2019
  # ('2024_no_disturbance_since_2020', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2019',

  # # Degradation in 2024 from events since oldgrowth to 2020
  # ('2024_no_disturbance_since_2021', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2020',

  # # Degradation in 2024 from events since oldgrowth to 2021
  # ('2024_no_disturbance_since_2022', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2021',

  # # Degradation in 2024 from events since oldgrowth to 2022
  # ('2024_no_disturbance_since_2023', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2022',

  # # Degradation in 2024 from events since oldgrowth to 2023
  # ('2024_no_disturbance_since_2024', '2024_no_disturbance_since_oldgrowth'):
  #   '2024_degradation_from_oldgrowth_to_2023',

  # # Degradation in 2024 from events in 1996 to 1997
  # ('2024_no_disturbance_since_1998', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_1997',

  # # Degradation in 2024 from events in 1996 to 1998
  # ('2024_no_disturbance_since_1999', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_1998',

  # # Degradation in 2024 from events in 1996 to 1999
  # ('2024_no_disturbance_since_2000', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_1999',

  # # Degradation in 2024 from events in 1996 to 2000
  # ('2024_no_disturbance_since_2001', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2000',

  # # Degradation in 2024 from events in 1996 to 2001
  # ('2024_no_disturbance_since_2002', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2001',

  # # Degradation in 2024 from events in 1996 to 2002
  # ('2024_no_disturbance_since_2003', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2002',

  # # Degradation in 2024 from events in 1996 to 2003
  # ('2024_no_disturbance_since_2004', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2003',

  # # Degradation in 2024 from events in 1996 to 2004
  # ('2024_no_disturbance_since_2005', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2004',

  # # Degradation in 2024 from events in 1996 to 2005
  # ('2024_no_disturbance_since_2006', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2005',

  # # Degradation in 2024 from events in 1996 to 2006
  # ('2024_no_disturbance_since_2007', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2006',

  # # Degradation in 2024 from events in 1996 to 2007
  # ('2024_no_disturbance_since_2008', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2007',

  # # Degradation in 2024 from events in 1996 to 2008
  # ('2024_no_disturbance_since_2009', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2008',

  # # Degradation in 2024 from events in 1996 to 2009
  # ('2024_no_disturbance_since_2010', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2009',

  # # Degradation in 2024 from events in 1996 to 2010
  # ('2024_no_disturbance_since_2011', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2010',

  # # Degradation in 2024 from events in 1996 to 2011
  # ('2024_no_disturbance_since_2012', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2011',

  # # Degradation in 2024 from events in 1996 to 2012
  # ('2024_no_disturbance_since_2013', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2012',

  # # Degradation in 2024 from events in 1996 to 2013
  # ('2024_no_disturbance_since_2014', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2013',

  # # Degradation in 2024 from events in 1996 to 2014
  # ('2024_no_disturbance_since_2015', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2014',

  # # Degradation in 2024 from events in 1996 to 2015
  # ('2024_no_disturbance_since_2016', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2015',

  # # Degradation in 2024 from events in 1996 to 2016
  # ('2024_no_disturbance_since_2017', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2016',

  # # Degradation in 2024 from events in 1996 to 2017
  # ('2024_no_disturbance_since_2018', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2017',

  # # Degradation in 2024 from events in 1996 to 2018
  # ('2024_no_disturbance_since_2019', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2018',

  # # Degradation in 2024 from events in 1996 to 2019
  # ('2024_no_disturbance_since_2020', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2019',

  # # Degradation in 2024 from events in 1996 to 2020
  # ('2024_no_disturbance_since_2021', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2020',

  # # Degradation in 2024 from events in 1996 to 2021
  # ('2024_no_disturbance_since_2022', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2021',

  # # Degradation in 2024 from events in 1996 to 2022
  # ('2024_no_disturbance_since_2023', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2022',

  # # Degradation in 2024 from events in 1996 to 2023
  # ('2024_no_disturbance_since_2024', '2024_no_disturbance_since_1996'):
  #   '2024_degradation_from_1996_to_2023',

}

degradation_single_year_dictionary = {

  # Degradation in 2024 from events in 1996
  ('2024_no_disturbance_since_1997', '2024_no_disturbance_since_1996'):
    '2024_effect_of_degradation_in_1996',

  # Degradation in 2024 from events in 1997
  ('2024_no_disturbance_since_1998', '2024_no_disturbance_since_1997'):
    '2024_effect_of_degradation_in_1997',

  # Degradation in 2024 from events in 1998
  ('2024_no_disturbance_since_1999', '2024_no_disturbance_since_1998'):
    '2024_effect_of_degradation_in_1998',

  # Degradation in 2024 from events in 1999
  ('2024_no_disturbance_since_2000', '2024_no_disturbance_since_1999'):
    '2024_effect_of_degradation_in_1999',

  # Degradation in 2024 from events in 2000
  ('2024_no_disturbance_since_2001', '2024_no_disturbance_since_2000'):
    '2024_effect_of_degradation_in_2000',

  # Degradation in 2024 from events in 2001
  ('2024_no_disturbance_since_2002', '2024_no_disturbance_since_2001'):
    '2024_effect_of_degradation_in_2001',

  # Degradation in 2024 from events in 2002
  ('2024_no_disturbance_since_2003', '2024_no_disturbance_since_2002'):
    '2024_effect_of_degradation_in_2002',

  # Degradation in 2024 from events in 2003
  ('2024_no_disturbance_since_2004', '2024_no_disturbance_since_2003'):
    '2024_effect_of_degradation_in_2003',

  # Degradation in 2024 from events in 2004
  ('2024_no_disturbance_since_2005', '2024_no_disturbance_since_2004'):
    '2024_effect_of_degradation_in_2004',

  # Degradation in 2024 from events in 2005
  ('2024_no_disturbance_since_2006', '2024_no_disturbance_since_2005'):
    '2024_effect_of_degradation_in_2005',

  # Degradation in 2024 from events in 2006
  ('2024_no_disturbance_since_2007', '2024_no_disturbance_since_2006'):
    '2024_effect_of_degradation_in_2006',

  # Degradation in 2024 from events in 2007
  ('2024_no_disturbance_since_2008', '2024_no_disturbance_since_2007'):
    '2024_effect_of_degradation_in_2007',

  # Degradation in 2024 from events in 2008
  ('2024_no_disturbance_since_2009', '2024_no_disturbance_since_2008'):
    '2024_effect_of_degradation_in_2008',

  # Degradation in 2024 from events in 2009
  ('2024_no_disturbance_since_2010', '2024_no_disturbance_since_2009'):
    '2024_effect_of_degradation_in_2009',

  # Degradation in 2024 from events in 2010
  ('2024_no_disturbance_since_2011', '2024_no_disturbance_since_2010'):
    '2024_effect_of_degradation_in_2010',

  # Degradation in 2024 from events in 2011
  ('2024_no_disturbance_since_2012', '2024_no_disturbance_since_2011'):
    '2024_effect_of_degradation_in_2011',

  # Degradation in 2024 from events in 2012
  ('2024_no_disturbance_since_2013', '2024_no_disturbance_since_2012'):
    '2024_effect_of_degradation_in_2012',

  # Degradation in 2024 from events in 2013
  ('2024_no_disturbance_since_2014', '2024_no_disturbance_since_2013'):
    '2024_effect_of_degradation_in_2013',

  # Degradation in 2024 from events in 2014
  ('2024_no_disturbance_since_2015', '2024_no_disturbance_since_2014'):
    '2024_effect_of_degradation_in_2014',

  # Degradation in 2024 from events in 2015
  ('2024_no_disturbance_since_2016', '2024_no_disturbance_since_2015'):
    '2024_effect_of_degradation_in_2015',

  # Degradation in 2024 from events in 2016
  ('2024_no_disturbance_since_2017', '2024_no_disturbance_since_2016'):
    '2024_effect_of_degradation_in_2016',

  # Degradation in 2024 from events in 2017
  ('2024_no_disturbance_since_2018', '2024_no_disturbance_since_2017'):
    '2024_effect_of_degradation_in_2017',

  # Degradation in 2024 from events in 2018
  ('2024_no_disturbance_since_2019', '2024_no_disturbance_since_2018'):
    '2024_effect_of_degradation_in_2018',

  # Degradation in 2024 from events in 2019
  ('2024_no_disturbance_since_2020', '2024_no_disturbance_since_2019'):
    '2024_effect_of_degradation_in_2019',

  # Degradation in 2024 from events in 2020
  ('2024_no_disturbance_since_2021', '2024_no_disturbance_since_2020'):
    '2024_effect_of_degradation_in_2020',

  # Degradation in 2024 from events in 2021
  ('2024_no_disturbance_since_2022', '2024_no_disturbance_since_2021'):
    '2024_effect_of_degradation_in_2021',

  # Degradation in 2024 from events in 2022
  ('2024_no_disturbance_since_2023', '2024_no_disturbance_since_2022'):
    '2024_effect_of_degradation_in_2022',

  # Degradation in 2024 from events in 2023
  ('2024_no_disturbance_since_2024', '2024_no_disturbance_since_2023'):
    '2024_effect_of_degradation_in_2023',

  # Degradation in 2024 from events in 2024
  ('2024', '2024_no_disturbance_since_2024'):
    '2024_effect_of_degradation_in_2024',

}

disturbance_area_dictionary = {

  # Area-based disturbance: 2024_deforestation_of_road_mat_daling_2023
  ('2024_road_mat_daling_deforestation_2023_30m_degradation_buffer', '2024'):
    ('2024_deforestation_of_road_mat_daling_2023_deforestation',
     '2024_deforestation_of_road_mat_daling_2023_degradation'),

}

restoration_potential_dictionary = {

  # Recovery potential in 2021
  ('2021_oldgrowth_recovery', '2021'):
    '2021_recovery_potential',

  # Reforestation and recovery potential in 2021
  ('2021_no_disturbance_since_oldgrowth', '2021'):
    '2021_reforestation_and_recovery_potential',

  # Recovery potential in 2024
  ('2024_oldgrowth_recovery', '2024'):
    '2024_recovery_potential',

  # Reforestation and recovery potential in 2024
  ('2024_no_disturbance_since_oldgrowth', '2024'):
    '2024_reforestation_and_recovery_potential',

}

## Calculate difference

In [None]:
# Precision settings for output rasters
mean_precision = 2
ci_precision = 2
uncertainty_precision = 2
confidence_level = 95

def load_raster(path):
    ds = gdal.Open(path)
    arr = ds.ReadAsArray()
    ds = None
    return arr

def round_array(arr, precision):
    rounded = np.round(arr, precision)
    return rounded.astype(np.int16) if precision == 0 else rounded

# Propagate uncertainty for forest AGBD loss calculations using confidence intervals.
# Measures uncertainty of forest AGBD change from disturbance events only.

# Mathematical basis: For difference Z = X - Y with confidence intervals CI_x, CI_y:
# Combined CI: CI_z = √[CI_x² + CI_y²] (IPCC 2006, Eq. 3.2; 2019, Eq. 3.2A)
# Relative uncertainty: CI_z / |Z| = CI_z / |X - Y|

# Note: Liang et al. (2023) incorrectly used |X + Y| as denominator, violating standard
# uncertainty propagation theory for differences. IPCC guidelines (2006 Section 3.2.3.1,
# 2019 Section 3.2.3.1) specify the denominator must be the absolute value of the
# difference |X - Y| for mathematically correct relative uncertainty calculations.

# Limitation: This approach assumes independence between scenario uncertainties, but
# scenarios using identical models and predictors are highly correlated. This results
# in conservative (overestimated) uncertainty bounds. Liang et al. (2023) has the same
# correlation limitation plus the mathematical error noted above.

# Forest classification from external dataset determines data availability per scenario.
# External disturbance classification determines whether forest AGBD change occurred.
# Uncertainty quantifies confidence in magnitude of forest AGBD change from disturbance.

# References:
# - IPCC (2006) Guidelines Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# - IPCC (2019) Refinement Vol.1 Ch.3: Uncertainties, Section 3.2.3.1
# - Liang et al. (2023) Remote Sensing of Environment 284:113367

# Propagate uncertainty function
# mean1, mean2: Forest AGBD values for two scenarios/timepoints (Mg/ha)
# ci1, ci2: Confidence interval half-widths (Mg/ha)
# is_restoration: If True, expect positive diff (gain); if False, expect negative diff (loss)
# relative_uncertainty: Percentage (0-100)
# ci_combined: Absolute CI for downstream calculations
def propagate_uncertainty(mean1, ci1, mean2, ci2, is_restoration=False):
    mean_diff = mean1 - mean2
    # Forest/non-forest transitions where one scenario has nodata (converted to 0 mean, 0 CI).
    # Uncertainty reflects confidence in original forest AGBD estimate, not the forest mask.
    deforestation_case = (ci1 == 0) & (mean1 == 0) & (ci2 != 0) & (mean2 != 0)
    reforestation_case = (ci1 != 0) & (mean1 != 0) & (ci2 == 0) & (mean2 == 0)
    # Combine absolute uncertainties using IPCC error propagation formula
    ci_combined = np.sqrt(np.square(ci1) + np.square(ci2))
    # Relative uncertainty using absolute difference as denominator (IPCC standard)
    denominator = np.abs(mean_diff)
    standard_rel_unc = np.divide(ci_combined, denominator,
                                 out=np.zeros_like(ci_combined, dtype=np.float64),
                                 where=(denominator != 0))
    # Forest transition uncertainties
    defor_rel_unc = np.divide(ci2, np.abs(mean2),
                              out=np.zeros_like(ci2, dtype=np.float64),
                              where=(mean2 != 0))
    refor_rel_unc = np.divide(ci1, np.abs(mean1),
                              out=np.zeros_like(ci1, dtype=np.float64),
                              where=(mean1 != 0))
    # Zero uncertainty when unexpected sign: gain for disturbance, loss for restoration
    unexpected_sign = (mean_diff < 0) if is_restoration else (mean_diff > 0)
    relative_uncertainty = np.where(
        deforestation_case, defor_rel_unc,
        np.where(reforestation_case, refor_rel_unc,
                 np.where(unexpected_sign | (denominator == 0), 0, standard_rel_unc)))
    return relative_uncertainty * 100.0, ci_combined

# Calculate forest disturbance components using nodata-based decomposition.
# recent: scenario with more disturbance (lower AGBD)
# baseline: scenario with less disturbance (higher AGBD)
# calculate_deforestation: whether to compute deforestation component

# Decomposition logic (diff = fill(recent, 0) - baseline):
#   Deforestation: nodata if baseline_nodata, elif recent_nodata: diff, else 0
#   Degradation:   nodata if recent_nodata OR baseline_nodata, else diff
def calculate_forest_disturbance_components(recent_mean, baseline_mean, nodata, calculate_deforestation,
                                       recent_ci=None, baseline_ci=None):
    recent_nodata = (recent_mean == nodata)
    baseline_nodata = (baseline_mean == nodata)
    # Fill recent where baseline has data (deforestation case)
    fill_mask = recent_nodata & ~baseline_nodata
    recent_mean_filled = np.where(fill_mask, 0, recent_mean)
    diff = recent_mean_filled - baseline_mean
    # Deforestation: AGBD loss from forest-to-nonforest transitions
    if calculate_deforestation:
        defor = np.where(baseline_nodata, nodata, np.where(recent_nodata, diff, 0))
    # Degradation: AGBD loss within persistent forest
    deg = np.where(recent_nodata | baseline_nodata, nodata, diff)
    if recent_ci is None:
        if calculate_deforestation: return defor, deg
        return deg
    # Propagate uncertainty
    recent_ci_filled = np.where(fill_mask, 0, recent_ci)
    rel_unc, ci_combined = propagate_uncertainty(recent_mean_filled, recent_ci_filled,
                                                 baseline_mean, baseline_ci)
    if calculate_deforestation:
        defor_ci = np.where(baseline_nodata, nodata, np.where(recent_nodata, ci_combined, 0))
        defor_unc = np.where(baseline_nodata, nodata, np.where(recent_nodata, rel_unc, 0))
    deg_ci = np.where(recent_nodata | baseline_nodata, nodata, ci_combined)
    deg_unc = np.where(recent_nodata | baseline_nodata, nodata, rel_unc)
    if calculate_deforestation:
        return ((defor, defor_ci, defor_unc), (deg, deg_ci, deg_unc))
    return (deg, deg_ci, deg_unc)

# Calculate restoration potential using nodata-based masking.
# restoration: scenario representing potential restored state (higher AGBD)
# actual: current state (lower AGBD)
# Output masked by restoration scenario nodata only.

# Logic (diff = restoration - fill(actual, 0)):
#   Reforestation case: actual nodata filled with 0 where restoration has data
#   Restoration potential: nodata if restoration_nodata, else diff
def calculate_restoration_potential(restoration_mean, actual_mean, nodata,
                                    restoration_ci=None, actual_ci=None):
    restoration_nodata = (restoration_mean == nodata)
    actual_nodata = (actual_mean == nodata)
    # Fill actual where restoration has data (reforestation case)
    fill_mask = actual_nodata & ~restoration_nodata
    actual_mean_filled = np.where(fill_mask, 0, actual_mean)
    diff = restoration_mean - actual_mean_filled
    # Restoration potential: mask by restoration scenario
    pot = np.where(restoration_nodata, nodata, diff)
    if restoration_ci is None:
        return pot
    # Propagate uncertainty
    actual_ci_filled = np.where(fill_mask, 0, actual_ci)
    rel_unc, ci_combined = propagate_uncertainty(restoration_mean, restoration_ci,
                                                 actual_mean_filled, actual_ci_filled,
                                                 is_restoration=True)
    pot_ci = np.where(restoration_nodata, nodata, ci_combined)
    pot_unc = np.where(restoration_nodata, nodata, rel_unc)
    return (pot, pot_ci, pot_unc)

# Determine processing mode
use_uncertainty = source_dir == uncertainty_dir

# Combine all disturbance dictionaries
all_disturbance_dictionaries = {}
all_disturbance_dictionaries.update(disturbance_since_dictionary)
all_disturbance_dictionaries.update(degradation_interval_dictionary)
all_disturbance_dictionaries.update(degradation_single_year_dictionary)
all_disturbance_dictionaries.update(disturbance_area_dictionary)
all_disturbance_dictionaries.update(restoration_potential_dictionary)

# Progress tracking
total_operations = len(all_disturbance_dictionaries)
progress_index = 0
progress_label = widgets.Label(f"Forest disturbance calculation progress: {progress_index}/{total_operations}")
display(progress_label)

# Process all forest disturbance calculations
for (recent_key, baseline_key), value in all_disturbance_dictionaries.items():
    # Determine output type from dictionary value structure
    if isinstance(value, tuple):
        defor_name, deg_name = value
        has_deforestation = True
    else:
        deg_name = value
        has_deforestation = False
    # Determine calculation type
    is_restoration = (recent_key, baseline_key) in restoration_potential_dictionary
    is_interval = (recent_key, baseline_key) in degradation_interval_dictionary
    output_dir = restoration_dir if is_restoration else disturbance_dir
    # Define output paths and check existence
    if use_uncertainty:
        output_paths = {
            'deg_mean': join(output_dir, f"mean__{deg_name}__{selected_model}.tif"),
            'deg_ci': join(output_dir, f"ci_{confidence_level}__{deg_name}__{selected_model}.tif"),
            'deg_unc': join(output_dir, f"uncertainty__{deg_name}__{selected_model}.tif"),}
        if has_deforestation:
            output_paths.update({
                'defor_mean': join(output_dir, f"mean__{defor_name}__{selected_model}.tif"),
                'defor_ci': join(output_dir, f"ci_{confidence_level}__{defor_name}__{selected_model}.tif"),
                'defor_unc': join(output_dir, f"uncertainty__{defor_name}__{selected_model}.tif"),})
    else:
        output_paths = {'deg': join(output_dir, f"{deg_name}__{selected_model}.tif")}
        if has_deforestation:
            output_paths['defor'] = join(output_dir, f"{defor_name}__{selected_model}.tif")
    if all(exists(p) for p in output_paths.values()):
        progress_index += 1
        progress_label.value = f"Forest disturbance calculation progress: {progress_index}/{total_operations}"
        continue
    # Validate input paths
    if use_uncertainty:
        recent_mean_path = join(predictions_dir, f"mean__{recent_key}__{selected_model}.tif")
        recent_ci_path = join(predictions_dir, f"ci_{confidence_level}__{recent_key}__{selected_model}.tif")
        baseline_mean_path = join(predictions_dir, f"mean__{baseline_key}__{selected_model}.tif")
        baseline_ci_path = join(predictions_dir, f"ci_{confidence_level}__{baseline_key}__{selected_model}.tif")
        assert exists(recent_mean_path), f"Missing: {recent_mean_path}"
        assert exists(recent_ci_path), f"Missing: {recent_ci_path}"
        assert exists(baseline_mean_path), f"Missing: {baseline_mean_path}"
        assert exists(baseline_ci_path), f"Missing: {baseline_ci_path}"
    else:
        recent_path = join(predictions_dir, f"{recent_key}__{selected_model}.tif")
        baseline_path = join(predictions_dir, f"{baseline_key}__{selected_model}.tif")
        assert exists(recent_path), f"Missing: {recent_path}"
        assert exists(baseline_path), f"Missing: {baseline_path}"
    # Interval pairs: apply actual year mask to degradation
    if is_interval:
        year = recent_key.split('_')[0]
        interval_mask_arr = load_raster(join(scenario_masks_dir, f"{year}.tif"))
    # Load inputs and calculate components
    if use_uncertainty:
        recent_mean = load_raster(recent_mean_path)
        recent_ci = load_raster(recent_ci_path)
        baseline_mean = load_raster(baseline_mean_path)
        baseline_ci = load_raster(baseline_ci_path)
        template_path = recent_mean_path if is_restoration else baseline_mean_path
        if is_restoration:
            deg, deg_ci, deg_unc = calculate_restoration_potential(
                recent_mean, baseline_mean, nodatavalue, recent_ci, baseline_ci)
        else:
            result = calculate_forest_disturbance_components(
                recent_mean, baseline_mean, nodatavalue, has_deforestation, recent_ci, baseline_ci)
            if has_deforestation: (defor, defor_ci, defor_unc), (deg, deg_ci, deg_unc) = result
            else: deg, deg_ci, deg_unc = result
        # Interval pairs: apply actual year mask to degradation
        if is_interval:
            deg = np.where(interval_mask_arr == nodatavalue, nodatavalue, deg)
            deg_ci = np.where(interval_mask_arr == nodatavalue, nodatavalue, deg_ci)
            deg_unc = np.where(interval_mask_arr == nodatavalue, nodatavalue, deg_unc)
        # Round degradation mean, zero uncertainty where mean rounds to zero
        deg_rounded = round_array(deg, mean_precision)
        deg_ci = np.where(deg_rounded == 0, 0, deg_ci)
        deg_unc = np.where(deg_rounded == 0, 0, deg_unc)
        # Export degradation
        export_array_as_tif(deg_rounded, output_paths['deg_mean'], template=template_path)
        export_array_as_tif(round_array(deg_ci, ci_precision), output_paths['deg_ci'], template=template_path)
        export_array_as_tif(round_array(deg_unc, uncertainty_precision), output_paths['deg_unc'], template=template_path)
        # Export deforestation
        if has_deforestation:
            defor_rounded = round_array(defor, mean_precision)
            defor_ci = np.where(defor_rounded == 0, 0, defor_ci)
            defor_unc = np.where(defor_rounded == 0, 0, defor_unc)
            export_array_as_tif(defor_rounded, output_paths['defor_mean'], template=template_path)
            export_array_as_tif(round_array(defor_ci, ci_precision), output_paths['defor_ci'], template=template_path)
            export_array_as_tif(round_array(defor_unc, uncertainty_precision), output_paths['defor_unc'], template=template_path)
    else:
        recent_arr = load_raster(recent_path)
        baseline_arr = load_raster(baseline_path)
        template_path = recent_path if is_restoration else baseline_path
        if is_restoration:
            deg = calculate_restoration_potential(recent_arr, baseline_arr, nodatavalue)
        else:
            result = calculate_forest_disturbance_components(recent_arr, baseline_arr, nodatavalue, has_deforestation)
            if has_deforestation: defor, deg = result
            else: deg = result
        # Interval pairs: apply actual year mask to degradation
        if is_interval:
            deg = np.where(interval_mask_arr == nodatavalue, nodatavalue, deg)
        if has_deforestation:
            export_array_as_tif(round_array(defor, mean_precision), output_paths['defor'], template=template_path)
        export_array_as_tif(round_array(deg, mean_precision), output_paths['deg'], template=template_path)

    progress_index += 1
    progress_label.value = f"Forest disturbance calculation progress: {progress_index}/{total_operations}"
print("All forest disturbance calculations complete.\n")

# Intactness

## Percentage loss

In [None]:
# Intactness is measured as relative percentage loss of AGBD within an area of interest

# Build dictionary of degradation rasters and their corresponding baseline predictions.
# Degradation: {year}_degradation_since_{baseline}
# Baseline: {year}_no_disturbance_since_oldgrowth or {year}_no_disturbance_since_{year}

intactness_pairs = {}

for deg_file in os.listdir(disturbance_dir):
    is_mean = 'mean__' in deg_file
    if source_dir == uncertainty_dir and not is_mean: continue
    if '_degradation_since_' not in deg_file: continue
    deg_scenario = deg_file.split('__')[1 if is_mean else 0]
    year, since = deg_scenario.split('_degradation_since_')
    baseline_scenario = f"{year}_no_disturbance_since_oldgrowth" if since == 'oldgrowth' else f"{year}_no_disturbance_since_{since}"
    baseline_file = f"{'mean__' if is_mean else ''}{baseline_scenario}__{selected_model}.tif"
    if baseline_file in os.listdir(predictions_dir):
        intactness_pairs[deg_scenario] = baseline_scenario
print("intactness_pairs = {")
for deg, baseline in sorted(intactness_pairs.items()):
    print(f"    '{deg}': '{baseline}',")
print("}")

In [None]:
intactness_pairs = {
    '2021_degradation_since_1993': '2021_no_disturbance_since_1993',
    '2021_degradation_since_oldgrowth': '2021_no_disturbance_since_oldgrowth',
    '2024_degradation_since_1996': '2024_no_disturbance_since_1996',
    '2024_degradation_since_oldgrowth': '2024_no_disturbance_since_oldgrowth',
}

percentage_loss_precision = 0

for deg_scenario, baseline_scenario in intactness_pairs.items():
    year = deg_scenario.split('_')[0]
    base_name = f"{baseline_scenario}__{deg_scenario}"
    intactness_subdir = join(intactness_dir, base_name)
    makedirs(intactness_subdir, exist_ok=True)
    percentage_filename = f"percentage_change__{base_name}__{selected_model}.tif"
    percentage_path = join(intactness_subdir, percentage_filename)
    if exists(percentage_path):
        print(f"{percentage_filename} already exists.")
        continue

    # Define input paths
    prefix = "mean__" if source_dir == uncertainty_dir else ""
    baseline_path = join(predictions_dir, f"{prefix}{baseline_scenario}__{selected_model}.tif")
    deg_path = join(disturbance_dir, f"{prefix}{deg_scenario}__{selected_model}.tif")
    mask_path = join(scenario_masks_dir, f"{year}.tif")

    # Load arrays
    ds = gdal.Open(baseline_path)
    baseline_arr = ds.ReadAsArray()
    ds = gdal.Open(deg_path)
    deg_arr = ds.ReadAsArray()
    ds = gdal.Open(mask_path)
    mask_arr = ds.ReadAsArray()
    ds = None

    # Calculate percentage: degradation / baseline * 100
    percentage_arr = np.full_like(baseline_arr, nodatavalue, dtype=np.float64)
    valid = (mask_arr != nodatavalue) & (baseline_arr != nodatavalue) & (deg_arr != nodatavalue)
    nonzero = valid & (baseline_arr != 0)
    percentage_arr[valid & (baseline_arr == 0)] = 0
    percentage_arr[nonzero] = (deg_arr[nonzero] / baseline_arr[nonzero]) * 100
    percentage_arr = np.round(percentage_arr, percentage_loss_precision)
    export_array_as_tif(percentage_arr, percentage_path, template=baseline_path)
    print(f"{percentage_filename} exported.")

## Quantiles (relative intactness)

In [None]:
# Use additional polygons for masking relative intactness quantiles
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']

# Select baseline / disturbance pairs to measure relative intactness
print("baseline_disturbance_pairs = [")
for dir in os.listdir(intactness_dir):
  print(f"'{dir}',")
print("]\n")

# Select polygons to mask and calculate quantiles
print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"'{polygon}',")
print(None)
print("]")

In [None]:
baseline_disturbance_pairs = [
'2021_no_disturbance_since_1993__2021_degradation_since_1993',
'2021_no_disturbance_since_oldgrowth__2021_degradation_since_oldgrowth',
'2024_no_disturbance_since_1996__2024_degradation_since_1996',
'2024_no_disturbance_since_oldgrowth__2024_degradation_since_oldgrowth',
]

mask_polygons = [
# 'project_area.gpkg',
# 'peninsular_malaysia.gpkg',
# 'lu_yong.gpkg',
# 'lu_yong_lipis.gpkg',
# 'lu_berkelah_jerantut.gpkg',
# 'lu_tekai_tembeling.gpkg',
# 'lu_ais.gpkg',
# 'lu_pa_taman_negara_krau.gpkg',
# 'lu_tekam.gpkg',
# 'lu_berkelah_temerloh.gpkg',
# 'lu_remen_chereh.gpkg',
# 'lu_berkelah_kuantan.gpkg',
'forest_reserves.gpkg',
'gedi_area.gpkg',
# None
]

# Convert nodata values inside the mask to a score of 0 (representing non-forest areas)
# Otherwise both non-forest and masked areas will be nodatavalue
convert_non_forest_nodatavalue_to_0 = True

# Define top score for intactness rating (e.g. 10 for 1 - 10 scale)
top_score = 10

# Calculate actual number of quantiles for non-zero values
num_quantiles = top_score - 1

print(f"Calculating {num_quantiles} quantiles for negative percentage change (scores 1-{num_quantiles}), with score {top_score} reserved for 0% change.\n")

# Create polygon mask array using template tif
template = gdal.Open(template_tif_path)
template_array = template.ReadAsArray()
template = None
polygon_mask_array = np.ones_like(template_array, dtype=bool)

polygon_masks = {}
for mask_polygon in mask_polygons:
  if mask_polygon is not None:
    # Create an inverse project area path for masking
    template_polygon_path = join(polygons_dir, "template.gpkg")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    if not exists(inverse_polygon_path):
      polygon_path = join(polygons_dir, mask_polygon)
      template_polygon = gpd.read_file(template_polygon_path)
      polygon_read = gpd.read_file(polygon_path)
      polygon_crs = polygon_read.crs.to_epsg()
      inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
      inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
      inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
      print(f"An inverse masking polygon for {mask_polygon} has been created in {polygons_dir}.")
    else: print(f"An inverse masking polygon for {mask_polygon} already exists.")

    # Create and store individual mask for this polygon
    print(f"Creating polygon mask for {mask_polygon}.")
    inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
    temp_mask_path = join(intactness_dir, f"temp_mask_{mask_polygon[:-5]}.tif")
    copyfile(template_tif_path, temp_mask_path)
    burn_polygon_to_raster(temp_mask_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
    temp_mask = gdal.Open(temp_mask_path)
    temp_mask_array = temp_mask.ReadAsArray()
    temp_mask = None
    del temp_mask
    individual_mask = np.ones_like(template_array, dtype=bool)
    individual_mask[temp_mask_array == nodatavalue] = False
    polygon_masks[mask_polygon] = individual_mask
    os.remove(temp_mask_path)

for base_dist_name in baseline_disturbance_pairs:
  intactness_baseline_dist_dir = join(intactness_dir, base_dist_name)
  percentage_filename = f"percentage_change__{base_dist_name}__{selected_model}"
  percentage_path = join(intactness_baseline_dist_dir, f"{percentage_filename}.tif")

  for mask_polygon in mask_polygons:

    if mask_polygon is not None:
      # Copy the percentage raster for potential masking
      percentage_masked_filename = f"{percentage_filename}__masked_{mask_polygon[:-5]}.tif"
      percentage_masked_path = join(intactness_baseline_dist_dir, percentage_masked_filename)
      if not exists(percentage_masked_path):
        print(f"Copying {percentage_filename} for masking...")
        copyfile(percentage_path, percentage_masked_path)
        print(f"Masking {percentage_filename} with {mask_polygon}...")
        inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
        burn_polygon_to_raster(percentage_masked_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
        # Recompress the prediction after burning the polygon masks
        percentage_masked = gdal.Open(percentage_masked_path)
        percentage_masked_array = percentage_masked.ReadAsArray()
        del percentage_masked
        percentage_masked = None
        export_array_as_tif(percentage_masked_array, percentage_masked_path)
        print(f"{percentage_filename} masked.")
      else: print(f"{percentage_masked_filename} already exists.")

    # Define paths and arrays
    if mask_polygon is None: relative_intactness_name = f'intactness__{top_score}_quantiles__{base_dist_name}__{selected_model}'
    else: relative_intactness_name = f'intactness__{mask_polygon[:-5]}_{top_score}_quantiles__{base_dist_name}__{selected_model}'
    relative_intactness_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.tif')
    if not exists(relative_intactness_path):
      # Always track originally nodata pixels from the original percentage raster
      original_percentage = gdal.Open(percentage_path)
      original_percentage_array = original_percentage.ReadAsArray()
      original_percentage = None
      originally_nodata_mask = original_percentage_array == nodatavalue

      # Apply polygon masking to percentage array using pre-created mask
      if mask_polygon is None:
        percentage_array = original_percentage_array.copy()
      else:
        percentage_array = original_percentage_array.copy()
        percentage_array[~polygon_masks[mask_polygon]] = nodatavalue

      # Capture original data for histogram before conversions
      original_valid_elements = percentage_array[percentage_array != nodatavalue]

      relative_intactness_array = np.full_like(percentage_array, nodatavalue, dtype=np.int16)

      # Set all values above 0 to 0
      percentage_array[percentage_array > 0] = 0

      # Separate valid and invalid (nodatavalue) elements
      valid_elements = percentage_array[percentage_array != nodatavalue]

      # Separate zero and non-zero values, excluding originally nodata pixels from quantile calculation
      zero_elements = percentage_array == 0
      # Exclude pixels that were originally nodata from quantile calculation
      quantile_mask = (percentage_array != nodatavalue) & (~originally_nodata_mask) & (percentage_array != 0)
      non_zero_valid_elements = percentage_array[quantile_mask]

      # Calculate quantiles for non-zero valid elements only
      quantiles = np.percentile(non_zero_valid_elements, np.linspace(0, 100, num_quantiles + 1)[1:-1]) if len(non_zero_valid_elements) > 0 else []

      # Assign scores 1 to num_quantiles for non-zero values
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          relative_intactness_array[
              (percentage_array > lower_bound) & (percentage_array <= upper_bound) &
              (percentage_array != 0) & (percentage_array != nodatavalue)] = i

      # Set all zero values to top score
      relative_intactness_array[zero_elements] = top_score

      # Set areas outside polygon to nodatavalue using pre-created mask
      if mask_polygon is not None:
        relative_intactness_array[~polygon_masks[mask_polygon]] = nodatavalue

      # Convert non-forest areas inside polygon to 0
      if convert_non_forest_nodatavalue_to_0:
        if mask_polygon is None:
          non_forest_inside_polygon = originally_nodata_mask
        else:
          non_forest_inside_polygon = originally_nodata_mask & polygon_masks[mask_polygon]
        relative_intactness_array[non_forest_inside_polygon] = 0

      export_array_as_tif(relative_intactness_array, relative_intactness_path)

      # Prepare data for CSV: Collect lower and upper bounds for each category
      ranges_data = {'Score': [], 'Lower_Bound': [], 'Upper_Bound': []}

      # Add ranges for scores 1 to num_quantiles (non-zero values)
      for i in range(1, num_quantiles + 1):
          lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
          if i == num_quantiles:
            upper_bound = -0.000000001
          else: upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
          ranges_data['Score'].append(i)
          ranges_data['Lower_Bound'].append(lower_bound)
          ranges_data['Upper_Bound'].append(upper_bound)

      # Add entry for top score (value of 0)
      ranges_data['Score'].append(top_score)
      ranges_data['Lower_Bound'].append(0)
      ranges_data['Upper_Bound'].append(0)

      # Create DataFrame and save to CSV
      relative_intactness_df = pd.DataFrame(ranges_data)
      relative_intactness_csv_path = os.path.join(intactness_baseline_dist_dir, f'{relative_intactness_name}.csv')
      relative_intactness_df.to_csv(relative_intactness_csv_path, index=False)

      # Generate and save histogram for converted data as .png
      histogram_path = join(intactness_baseline_dist_dir, f'{relative_intactness_name}.png')
      plt.figure()
      counts, bins, patches = plt.hist(original_valid_elements.flatten(), bins=100)

      # Count how many values became 0 after conversions
      zero_count_after_conversion = np.sum(valid_elements == 0)

      # Find the zero bin and set its frequency to 0
      zero_idx = next((i for i, (l, r) in enumerate(zip(bins[:-1], bins[1:])) if l <= 0 <= r), None)
      if zero_idx is not None:
          counts[zero_idx] = 0
          plt.clf()
          plt.bar(bins[:-1], counts, width=np.diff(bins), align='edge')
          x_center = (bins.min() + bins.max()) / 2
          y_max = max(counts)
          plt.text(x_center, y_max * 0.9,
                  f'0 value frequency = {zero_count_after_conversion:,}',
                  ha='center', va='center', fontweight='bold',
                  bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.9))
      plt.title(f'{relative_intactness_name} Histogram')
      plt.xlabel('Value')
      plt.ylabel('Frequency')
      plt.gca().yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: format(int(x), ',')))
      plt.tight_layout()
      plt.savefig(histogram_path)
      plt.close()

    else: print(f"{relative_intactness_name} already exists.")

# Disconnect runtime

In [67]:
# Useful for stopping background execution
runtime.unassign()