<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/dev/6_scenarios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi_asartr"
# base_dir = '/content/drive/MyDrive/masfi_asartr'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install astropy
!pip install geopandas
!pip install rasterio
!pip install tensorflow
!pip install xgboost --upgrade
!apt-get install -y gdal-bin

In [None]:
# Reload imports, replacing those in the cache
%reload_ext autoreload
%autoreload 2
# Imports
from astropy.convolution import convolve, Gaussian2DKernel
import geopandas as gpd
from google.colab import runtime
import itertools
import json
from os import makedirs
from os.path import join, exists
from osgeo import gdal
import ipywidgets as widgets
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import psutil
import rasterio
from rasterio.features import rasterize
import re
from scipy import ndimage
import shutil
from shutil import copyfile
import tensorflow as tf
import xgboost as xgb

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")

feature_dir = join(base_dir, "3_features")
feature_resampled_dir = join(feature_dir, "resampled")
feature_final_dir = join(feature_dir, "final")
models_dir = join(base_dir, "5_models")
scenarios_dir = join(base_dir, "6_scenarios")
masks_dir = join(scenarios_dir, "scenario_masks")

# Create directories
makedirs(scenarios_dir, exist_ok=True)
makedirs(masks_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress):
  template = gdal.Open(template)
  template_band = template.GetRasterBand(1)
  template_dimensions, template_projection = template.GetGeoTransform(), template.GetProjection()
  if compress: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32,
                                                options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
  if compress == False: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32)
  driver.GetRasterBand(1).WriteArray(input_array)
  driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
  driver.SetGeoTransform(template_dimensions)
  driver.SetProjection(template_projection)

# Burn a polygon to raster
def burn_polygon_to_raster(raster, polygon, fixed=True, fixed_value=1, column_name=None, all_touched=True):
  with rasterio.open(raster, 'r+') as src:
      array = src.read(1)
      transform = src.transform
      gdf = gpd.read_file(polygon)
      for geom in gdf.geometry:
          if not fixed and column_name == None:
              column_name = gdf.columns[0]
          if not fixed: burn_value = gdf.loc[gdf.geometry == geom, column_name].values[0]
          else: burn_value = fixed_value
          rasterize([(geom, burn_value)], out=array, transform=transform,
              all_touched=all_touched, dtype=src.meta['dtype'], out_shape=src.shape)
      src.write(array, 1)

# Select model and scenario area

In [None]:
# Select a model
model_exists = False
for subdir, dirs, files in os.walk(models_dir):
  for file in files:
    if file == 'model.json':
      print(f'selected_model = "{subdir.split(f"{models_dir}/",1)[1]}"')
      model_exists = True
if not model_exists:
  print("No model exists.")

In [None]:
selected_model = "agbd_historic_250429_223033"
categorise_target = False # If the target was categorised in 5_models

# Define model directories
selected_model_dir = join(models_dir,selected_model)
selected_model_json = join(selected_model_dir, "model.json")
selected_model_descr_dir = join(selected_model_dir, "model_description.json")
selected_model_dataset_path = join(selected_model_dir, f"{selected_model}.pkl")
selected_model_dataset = pd.read_pickle(selected_model_dataset_path)

# Read description for model dataset attributes
with open(join(selected_model_dir,"model_dataset_description.json")) as model_dataset_description_json:
  model_dataset_description = json.load(model_dataset_description_json)
model_dataset_name = model_dataset_description["model_dataset_name"]
number_of_columns = model_dataset_description["number_of_columns"]
number_of_rows = model_dataset_description["number_of_rows"]
id_column = model_dataset_description["id_column"]
selected_target = model_dataset_description["selected_target"]
uncertainty = model_dataset_description["uncertainty"]
covariates_renamed = model_dataset_description["covariates_renamed"]
covariates_categorised = model_dataset_description["covariates_categorised"]
selected_features = model_dataset_description["selected_features"] + model_dataset_description["covariates_renamed"]
categorical_columns = model_dataset_description["categorical_columns"]
descriptive_parameters = model_dataset_description["descriptive_parameters"]
filter_parameter = model_dataset_description["filter_parameter"]
filter_values_to_include = model_dataset_description["filter_values_to_include"]
sample_imported_dataset = model_dataset_description["sample_imported_dataset"]
sample_imported_dataset_by_percent = model_dataset_description["sample_imported_dataset_by_percent"]
sample_imported_dataset_value = model_dataset_description["sample_imported_dataset_value"]

covariates = [covariate[4:] for covariate in covariates_renamed]

# Create scenarios model directory
scenarios_model_dir = join(scenarios_dir, selected_model)
makedirs(scenarios_model_dir, exist_ok=True)

# Copy model_dataset_description.json
with open(join(scenarios_model_dir, "model_dataset_description.json"), "w") as file:
  file.write(json.dumps(model_dataset_description))

# Select a scenario area
scenario_area_exists = False
for subdir in os.listdir(scenarios_model_dir):
  if not subdir.endswith('.csv') and not subdir.endswith('.json'):
    print(f'selected_scenario_area = "{subdir}"')
    scenario_area_exists = True
if not scenario_area_exists:
  print(f"Create a scenario area directory in {scenarios_model_dir}")

In [None]:
selected_scenario_area = "asartr"

# Define scenario area directory
scenario_area_dir = join(scenarios_model_dir,selected_scenario_area)
makedirs(scenario_area_dir, exist_ok=True)

# Create subdirectories
features_dir = join(scenario_area_dir, "features")
tile_templates_dir = join(scenario_area_dir, 'tile_templates')
tile_features_dir = join(scenario_area_dir, "tile_features")
tile_feature_stacks_dir = join(scenario_area_dir, "tile_feature_stacks")
tile_prediction_cache_dir = join(scenario_area_dir,"tile_prediction_cache")
scenario_predictions_unmasked_dir = join(scenario_area_dir,"scenario_predictions_unmasked")
scenario_predictions_dir = join(scenario_area_dir, "scenario_predictions")
scenario_dist_dir = join(scenario_area_dir, "scenario_disturbance")
intactness_dir = join(scenario_area_dir, 'intactness')

makedirs(features_dir, exist_ok=True)
makedirs(tile_templates_dir, exist_ok=True)
makedirs(tile_features_dir, exist_ok=True)
makedirs(tile_feature_stacks_dir, exist_ok=True)
makedirs(tile_prediction_cache_dir, exist_ok=True)
makedirs(scenario_predictions_unmasked_dir, exist_ok=True)
makedirs(scenario_predictions_dir, exist_ok=True)
makedirs(scenario_dist_dir, exist_ok=True)
makedirs(intactness_dir, exist_ok=True)

# Copy features from the final features directory
for feature in os.listdir(feature_final_dir):
  if feature not in os.listdir(features_dir):
    feature_original_path = join(feature_final_dir, feature)
    feature_copy_path = join(features_dir, feature)
    copyfile(feature_original_path, feature_copy_path)
print(f"All features present in the following directory have already been copied over: {feature_final_dir}")

# Define scenarios

In [None]:
# Yearly scenarios may include years after the model scenario if feature data is available later than GEDI data.
# Date of the prediction is ~December 31st, e.g. '2024' is 31/12/2024, requiring features up to 2024.

model_scenario_override = None # set if cannot be automatically determined from model features

yearly_features = ["forest_with_edge_effects", "disturbance_with_edge_effects"]

# Remove the 'fea_' prefix from each feature
model_features = sorted([feature[4:] for feature in selected_features])

# Create a list of feature years from the model's features
model_feature_years = []
for feature in model_features:
  for yearly_feature in yearly_features:
    if yearly_feature in feature:
      model_feature_years.append(int(feature[-4:]))

# Determine the model scenario from the maximum year
if model_scenario_override != None: model_scenario = model_scenario_override
else: model_scenario = max(model_feature_years)
model_scenario_filename = f"{model_scenario}.csv"
model_scenario_dir = join(scenarios_model_dir,model_scenario_filename)
print(f"The maximum year used in the model is {model_scenario}, which has been created as the first scenario.\n")
print(f"The {model_scenario} scenario feature list has been saved to:\n {model_scenario_dir}\n")
print(f"Ensure all features in this list have been copied to:\n{features_dir}\n")

# Save the model scenario features as a .csv
pd.DataFrame(model_features).to_csv(model_scenario_dir, index=False)

# Determine available feature years
final_feature_years = []
for final_feature in os.listdir(feature_final_dir):
  if final_feature.endswith('.tif') and final_feature[-9] == '_':
    try: final_feature_years.append(int(final_feature[-8:-4]))
    except: continue

# Find the first and last feature years
first_feature_year = min(final_feature_years)
last_feature_year = max(final_feature_years)
additional_feature_years = last_feature_year - model_scenario
print(f"The first available feature year is {first_feature_year} and the last is {last_feature_year}.\n")

# Calculate the range of scenario years and minimum scenario year
model_scenario_year_range = max(model_feature_years) - (min(model_feature_years))
minimum_yearly_scenario = first_feature_year + model_scenario_year_range
print(f"The earliest scenario year that can be predicted is {minimum_yearly_scenario}.")
print(f"The latest scenario year that can be predicted is {last_feature_year}.")
print(f"This is based on the number of yearly features used to train the model and the total availability of features.")

In [None]:
# Select constant features which are the same in every scenario, e.g. topography
print("constant_features = [")
for feature in model_features:
  print(f'  "{feature}",')
print("]")

In [None]:
constant_features = [
  "coast_proximity_km",
  "latitude",
  "longitude",
  "topo_dtm_smooth_aspect_cosine",
  "topo_dtm_smooth_aspect_sine",
  "topo_dtm_smooth_circular_variance_aspect_03",
  "topo_dtm_smooth_circular_variance_aspect_07",
  "topo_dtm_smooth_circular_variance_aspect_11",
  "topo_dtm_smooth_deviation_mean_elevation_03",
  "topo_dtm_smooth_deviation_mean_elevation_07",
  "topo_dtm_smooth_deviation_mean_elevation_11",
  "topo_dtm_smooth_eastness",
  "topo_dtm_smooth_elevation",
  "topo_dtm_smooth_northness",
  "topo_dtm_smooth_profile_curvature",
  "topo_dtm_smooth_roughness_03",
  "topo_dtm_smooth_roughness_07",
  "topo_dtm_smooth_roughness_11",
  "topo_dtm_smooth_slope",
  "topo_dtm_smooth_stream_power_index_log10",
  "topo_dtm_smooth_surface_area_ratio",
  "topo_dtm_smooth_tangential_curvature",
  "topo_dtm_smooth_topographic_position_index_03",
  "topo_dtm_smooth_topographic_position_index_07",
  "topo_dtm_smooth_topographic_position_index_11",
  "topo_dtm_smooth_topographic_ruggedness_index",
  "topo_dtm_smooth_topographic_wetness_index",
  "topo_dtm_unsmooth_aspect_cosine",
  "topo_dtm_unsmooth_aspect_sine",
  "topo_dtm_unsmooth_circular_variance_aspect_03",
  "topo_dtm_unsmooth_circular_variance_aspect_07",
  "topo_dtm_unsmooth_circular_variance_aspect_11",
  "topo_dtm_unsmooth_deviation_mean_elevation_03",
  "topo_dtm_unsmooth_deviation_mean_elevation_07",
  "topo_dtm_unsmooth_deviation_mean_elevation_11",
  "topo_dtm_unsmooth_eastness",
  "topo_dtm_unsmooth_elevation",
  "topo_dtm_unsmooth_northness",
  "topo_dtm_unsmooth_profile_curvature",
  "topo_dtm_unsmooth_roughness_03",
  "topo_dtm_unsmooth_roughness_07",
  "topo_dtm_unsmooth_roughness_11",
  "topo_dtm_unsmooth_slope",
  "topo_dtm_unsmooth_stream_power_index_log10",
  "topo_dtm_unsmooth_surface_area_ratio",
  "topo_dtm_unsmooth_tangential_curvature",
  "topo_dtm_unsmooth_topographic_position_index_03",
  "topo_dtm_unsmooth_topographic_position_index_07",
  "topo_dtm_unsmooth_topographic_position_index_11",
  "topo_dtm_unsmooth_topographic_ruggedness_index",
  "topo_dtm_unsmooth_topographic_wetness_index",
]

In [None]:
# Note that areas that were forested in a historic yearly scenario but were reservoirs
# or flooded at the time topographic feature data was collected (~2014 for GLO-30 DEM)
# will be predicted based on a flat topography at the elevation of the water's surface.

# Set scenario features as all non-constant features
scenario_features = sorted(list(set(model_features) - set(constant_features)))

# Create feature lists for all possible yearly scenarios
for yearly_scenario in range(minimum_yearly_scenario, last_feature_year +1):
  year_difference = model_scenario - yearly_scenario
  yearly_scenario_features = []
  for scenario_feature in scenario_features:
    try:
      year_change = int(scenario_feature[-4:]) - year_difference
      yearly_scenario_feature = scenario_feature[:-4] + str(year_change)
      yearly_scenario_features.append(yearly_scenario_feature)
    except: yearly_scenario_features.append(scenario_feature)
  # Compile yearly features and save as a .csv
  yearly_scenario_features = sorted(yearly_scenario_features + constant_features)
  yearly_scenario_filename = f"{yearly_scenario}.csv"
  yearly_scenario_dir = join(scenarios_model_dir,yearly_scenario_filename)
  pd.DataFrame(yearly_scenario_features).to_csv(yearly_scenario_dir, index=False)

# Open the most recent yearly scenario feature list
most_recent_scenario_csv = join(scenarios_model_dir,f"{last_feature_year}.csv")
most_recent_scenario_features = pd.Series.tolist(pd.read_csv(most_recent_scenario_csv).iloc[:,0])

print(f"Lists of features for all possible yearly scenarios have been exported to {scenarios_model_dir}/.")
print(f"Ensure all features in these lists have been copied to:\n{features_dir}\n")

# Create a 'no disturbance' feature for alternate scenarios.
# Assumes the minimum possible value is present in the first scenario year.
minimum_disturbance_name = f"disturbance_with_edge_effects_0000"
minimum_disturbance_path = join(features_dir, f"{minimum_disturbance_name}.tif")
if not exists(minimum_disturbance_path):
  example_disturbance = join(features_dir, f"disturbance_with_edge_effects_{first_feature_year}.tif")
  example_disturbance_array = gdal.Open(example_disturbance).ReadAsArray()
  minimum_disturbance_value = example_disturbance_array.min()
  minimum_disturbance_array = np.where(example_disturbance_array, minimum_disturbance_value, minimum_disturbance_value)
  export_array_as_tif(minimum_disturbance_array, minimum_disturbance_path, template = example_disturbance)
  print(f"The minimum disturbance value is {minimum_disturbance_value}\n,")
  print(f"which has been used to create the 'minimum disturbance' feature {minimum_disturbance_name}.")
else: print(f"The minimum disturbance feature {minimum_disturbance_name} already exists.")

In [None]:
# These alternate scenarios remove degradation for specific time ranges

# Define ranges for 'no degradation' scenarios
define_no_degradation_scenarios = True

# No degradation ranges as tuples of (start_year, end_year)
no_degradation_ranges = [
    (1991, 2014),
    (2015, 2024),
    (2022, 2022),
    (2023, 2023),
    (2024, 2024),
    (2022, 2024),
    (2023, 2024),
]

# Create a feature list for 'no degradation' scenarios
if define_no_degradation_scenarios:
  for start_year, end_year in no_degradation_ranges:
    assert end_year <= last_feature_year, "End years must be at or before the last feature year."
    assert end_year >= minimum_yearly_scenario, "End years must be at or after the minimum yearly scenario."
    assert start_year >= first_feature_year, "Start years must be at or after the first feature year."
    assert start_year >= end_year - model_scenario_year_range, "Start years must be within the model scenario range of the end year."
    assert start_year <= end_year, "The start year must less than or equal to the end year."

    # Determine base features based on the end year of the range
    scenario_features_csv = join(scenarios_model_dir, f"{end_year}.csv")
    base_features = pd.Series.tolist(pd.read_csv(scenario_features_csv).iloc[:,0])

    no_degradation_features = []
    for scenario_feature in base_features:
      if "disturbance_with_edge_effects" in scenario_feature:
        scenario_feature_year = int(scenario_feature[-4:])
        # Replace disturbance feature if it falls within the specified range
        if scenario_feature_year >= start_year:
          no_degradation_features.append(minimum_disturbance_name)
        else:
          no_degradation_features.append(scenario_feature)
      else:
        no_degradation_features.append(scenario_feature)

    no_degradation_scenario_filename = f"{end_year}_no_degradation_since_{start_year}.csv"
    no_degradation_scenario_path = join(scenarios_model_dir, no_degradation_scenario_filename)
    pd.DataFrame(no_degradation_features).to_csv(no_degradation_scenario_path, index=False)
    print(f"Feature list for a scenario without degradation between {start_year} and {end_year} exported to {no_degradation_scenario_filename}.")
else:
  print("The 'no degradation' scenarios are not enabled.")

In [None]:
# These alternate scenarios replace degradation history of an 'actual year' with an 'alternate year'
# Forest extent remains the same. This allows deforestation alone (without the effect of degradation and recovery)
# To be measured by, e.g., 2024_with_2014_degradation - 2014 = 2024_deforestation_actual_since_2014.
# Otherwise can only measure deforestation assuming a year was old-growth, e.g.
# 2024_oldgrowth - 2014_oldgrowth = 2024_deforestation_oldgrowth_since_2014

# Define ranges for 'alternate degradation' scenarios
define_alternate_degradation_scenarios = True

# Alternate degradation scenarios as tuples of (alternate_year, actual_year)
alternate_degradation_ranges = [
    # (1990, 2014), # Not possible without 1990 scenario
    (2014, 2024),
    (2021, 2022),
    (2022, 2023),
    (2023, 2024),
    (2021, 2024),
    (2022, 2024),

]

# Create a feature list for 'no degradation' scenarios
if define_alternate_degradation_scenarios:
  for alternate_year, actual_year in alternate_degradation_ranges:
    assert actual_year <= last_feature_year, "Actual years must be at or before the last feature year."
    assert actual_year >= minimum_yearly_scenario + 1, "Actual years must be at least one year after the minimum yearly scenario."
    assert alternate_year >= first_feature_year, "Alternate years must be at or after the first feature year."
    assert alternate_year >= actual_year - model_scenario_year_range, "Alternate years must be within the model scenario range of the end year."
    assert alternate_year < actual_year, "Alternate years must be before the actual year."

    # Determine base features based on the end year of the range
    scenario_features_csv = join(scenarios_model_dir, f"{actual_year}.csv")
    base_features = pd.Series.tolist(pd.read_csv(scenario_features_csv).iloc[:,0])
    actual_alternate_diff = actual_year - alternate_year

    alternate_degradation_features = []
    for scenario_feature in base_features:
      if "disturbance_with_edge_effects" in scenario_feature:
        # Replace actual disturbance feature with alternate disturbance feature
        scenario_feature_year = int(scenario_feature[-4:])
        alternate_feature_year = scenario_feature_year - actual_alternate_diff
        alternate_feature = scenario_feature.replace(str(scenario_feature_year), str(alternate_feature_year))
        alternate_degradation_features.append(alternate_feature)
      else:
        alternate_degradation_features.append(scenario_feature)

    alternate_degradation_scenario_filename = f"{actual_year}_alternate_degradation_{alternate_year}.csv"
    alternate_degradation_scenario_path = join(scenarios_model_dir, alternate_degradation_scenario_filename)
    pd.DataFrame(alternate_degradation_features).to_csv(alternate_degradation_scenario_path, index=False)
    print(f"Feature list for a {alternate_year} scenario with {actual_year} degradationm exported to {alternate_degradation_scenario_filename}.")
else:
  print("The 'alternate degradation' scenarios are not enabled.")

In [None]:
# These alternate scenarios simulate old-growth forest using a proxy area specified by the user.
# Forest extent (i.e. 'no deforestation') can be set from yearly feature, or all historic / potential forest area.

define_oldgrowth_scenarios = True
oldgrowth_yearly_scenarios = [
    1990,
    2014,
    2021,
    2022,
    2023,
    last_feature_year
]

for year in oldgrowth_yearly_scenarios:
  assert year in final_feature_years, "Years in 'oldgrowth_yearly_scenarios' must be available in the final yearly features."

simulate_all_oldgrowth = True
all_oldgrowth_name = "all_oldgrowth"

# The feature that best indicates oldgrowth to the model, e.g. certain protected areas
# This will be modified to cover the entire scenario area for the old-growth scenarios
oldgrowth_feature = 'lu_oldgrowth_with_edge_effects'

# Some features may confound the old-growth proxy, e.g. protected areas that are not known to be old-growth
# These will be removed for the old-growth scenarios
oldgrowth_redundant_features = [

]

if define_oldgrowth_scenarios:
  # Expand the oldgrowth feature to the entire scenario area
  oldgrowth_feature_all_dir = join(features_dir, f"{oldgrowth_feature}_all.tif")
  if not exists(oldgrowth_feature_all_dir):
    oldgrowth_feature_dir = join(features_dir, f"{oldgrowth_feature}.tif")
    oldgrowth_feature_array = gdal.Open(oldgrowth_feature_dir).ReadAsArray()
    oldgrowth_feature_max_value = oldgrowth_feature_array.max()
    print(f"The maximum value for the oldgrowth feature '{oldgrowth_feature}' is {oldgrowth_feature_max_value}.")
    oldgrowth_feature_all_array = np.where(oldgrowth_feature_array, oldgrowth_feature_max_value, oldgrowth_feature_max_value)
    oldgrowth_feature_all_dir = join(features_dir, f"{oldgrowth_feature}_all.tif")
    export_array_as_tif(oldgrowth_feature_all_array, oldgrowth_feature_all_dir, template = oldgrowth_feature_dir)
    print(f"The oldgrowth proxy {oldgrowth_feature} has been expanded to the entire scenario area")
    print(f"And exported to {oldgrowth_feature_all_dir}")
  else: print(f"The oldgrowth feature '{oldgrowth_feature}_all.tif' already exists.\n")

  # Remove the redundant features from the oldgrowth scenario area
  for redundant_feature in oldgrowth_redundant_features:
    redundant_feature_none_dir = join(features_dir, f"{redundant_feature}_none.tif")
    if not exists(redundant_feature_none_dir):
      redundant_feature_dir = join(features_dir, f"{redundant_feature}.tif")
      redundant_feature_array = gdal.Open(redundant_feature_dir).ReadAsArray()
      redundant_feature_min_value = redundant_feature_array.min()
      print(f"The minimum value for the redundant feature {redundant_feature} is {redundant_feature_min_value}.")
      redundant_feature_none_array = np.where(redundant_feature_array, redundant_feature_min_value, redundant_feature_min_value)
      export_array_as_tif(redundant_feature_none_array, redundant_feature_none_dir, redundant_feature_dir)
      print(f"The oldgrowth redundant feature {oldgrowth_feature} has been removed from the entire scenario area")
      print(f"And exported to {oldgrowth_feature_all_dir}.")
    else: print(f"The oldgrowth redundant feature '{redundant_feature}_none.tif' already exists.\n")

  # Generate 'oldgrowth' scenarios for each specified year (i.e. forest extent in that year)
  for year in oldgrowth_yearly_scenarios:
    oldgrowth_features = []
    for scenario_feature in most_recent_scenario_features:
      old_growth_scenario_year_diff = last_feature_year - year
      if "disturbance_with_edge_effects" in scenario_feature:
        oldgrowth_features.append(minimum_disturbance_name)
      elif "forest_with_edge_effects" in scenario_feature:
        scenario_feature_year = int(scenario_feature[-4:])
        if scenario_feature_year-old_growth_scenario_year_diff > first_feature_year: # Replace with specified forest cover year
          oldgrowth_features.append(f"forest_with_edge_effects_{scenario_feature_year-old_growth_scenario_year_diff}")
        else: oldgrowth_features.append(f"forest_with_edge_effects_{first_feature_year}")
      elif scenario_feature == oldgrowth_feature:
        oldgrowth_features.append(f"{scenario_feature}_all")
      elif scenario_feature in oldgrowth_redundant_features:
        oldgrowth_features.append(f"{scenario_feature}_none")
      else: oldgrowth_features.append(scenario_feature)
    oldgrowth_filename = f"{year}_oldgrowth.csv"
    oldgrowth_dir = join(scenarios_model_dir, oldgrowth_filename)
    pd.DataFrame(oldgrowth_features).to_csv(oldgrowth_dir, index=False)
    print(f"Feature list for a scenario where all forest in {year} was old-growth")
    print(f"has been exported to {oldgrowth_filename}.\n")

  # Generate 'all_oldgrowth' features and scenarios
  if simulate_all_oldgrowth:
    # Based on the first TMF AnnualChanges land coverage
    # Create a forest feature for all land that exists in the first feature year (e.g. 1990)
    forest_all_oldgrowth_name = f"forest_with_edge_effects_{all_oldgrowth_name}"
    forest_all_oldgrowth_path = join(features_dir, f"{forest_all_oldgrowth_name}.tif")
    first_annual_changes_filename = f"tmf_AnnualChanges_Dec{first_feature_year}.tif"
    first_annual_changes_path = join(feature_resampled_dir, first_annual_changes_filename)
    if not exists(forest_all_oldgrowth_path):
      if exists(first_annual_changes_path):
        first_annual_changes_array = gdal.Open(first_annual_changes_path).ReadAsArray()
        # Convert all water values to 'nodata' and non-water values to '1'
        forest_all_oldgrowth_array = np.where(first_annual_changes_array == 5, nodatavalue, 1)
        # Set smoothing kernel and precision
        kernel, precision = Gaussian2DKernel(x_stddev=1, y_stddev=1), 2
        # Reclassify for binary differentiation after proximity conversion
        differentiator_array = forest_all_oldgrowth_array.copy()
        differentiator_array[differentiator_array == 1] = 10
        # Positive proximity
        positive_distances = ndimage.distance_transform_edt(forest_all_oldgrowth_array == 0) # target pixels
        positive_proximity_array = np.where(positive_distances > 2, 0, positive_distances) # max distance 2
        # Negative proximity
        negative_distances = ndimage.distance_transform_edt(forest_all_oldgrowth_array == 1) # target pixels
        negative_proximity_array = np.where(negative_distances > 2, 0, negative_distances) # max distance 2
        # Sum proximities and differentiator
        pixel_prox_summed =  differentiator_array + positive_proximity_array + negative_proximity_array
        # Reclassify for better semantic understanding of pixel proximity
        pixel_prox_reclassed = pixel_prox_summed.copy()
        pixel_prox_reclass_table = [(0, 0, -4), (1, 1, -1), (1.4, 1.5, -2), (2, 2, -3), (10, 10, 3), (11, 11, 0), (11.4, 11.5, 1), (12, 12, 2)]
        for min_value, max_value, new_value in pixel_prox_reclass_table:
          pixel_prox_reclassed[(pixel_prox_reclassed >= min_value) & (pixel_prox_reclassed <= max_value)] = new_value
        # Smooth binary array using 2D convolution
        binary_smoothed = convolve(forest_all_oldgrowth_array, kernel, boundary='extend')
        # Sum pixel proximity and smoothed binary array
        edge_effects_array = np.round(pixel_prox_reclassed + binary_smoothed, precision)
        # Export forest edge effects feature
        export_array_as_tif(edge_effects_array, forest_all_oldgrowth_path)
        print(f"{forest_all_oldgrowth_name} has been created and saved to\n{features_dir}\n")
      else: print(f"The first TMF annual changes year raster needed for comrec is not in the indicated directory:\n{first_annual_changes_path}\n")
    print(f"{forest_all_oldgrowth_name} already exists in\n{features_dir}\n")
    if exists(forest_all_oldgrowth_path):
      oldgrowth_all_features = []
      for scenario_feature in most_recent_scenario_features:
        if "disturbance_with_edge_effects" in scenario_feature:
          oldgrowth_all_features.append(minimum_disturbance_name)
        elif "forest_with_edge_effects" in scenario_feature:
          oldgrowth_all_features.append(forest_all_oldgrowth_name)
        elif scenario_feature == oldgrowth_feature:
          oldgrowth_all_features.append(f"{scenario_feature}_all")
        elif scenario_feature in oldgrowth_redundant_features:
          oldgrowth_all_features.append(f"{scenario_feature}_none")
        else: oldgrowth_all_features.append(scenario_feature)
      oldgrowth_all_filename = f"{all_oldgrowth_name}.csv"
      oldgrowth_all_dir = join(scenarios_model_dir, oldgrowth_all_filename)
      pd.DataFrame(oldgrowth_all_features).to_csv(oldgrowth_all_dir, index=False)
      print(f"Feature list for {all_oldgrowth_name} has been exported to {oldgrowth_all_filename}.\n")

    # Create A forest mask for 'all oldgrowth'
    # Also forests reservoirs since the first TMF annual changes year, though topography may be wrong
    if exists(join(scenarios_model_dir, f"{all_oldgrowth_name}.csv")):
      if exists(first_annual_changes_path):
        oldgrowth_all_mask_path = join(masks_dir, f"mask_forest_{all_oldgrowth_name}.tif")
        if not exists(oldgrowth_all_mask_path):
          first_annual_changes_array = gdal.Open(first_annual_changes_path).ReadAsArray()
          # Convert all water values to 'nodata' and non-water values to '1'
          oldgrowth_all_mask_array = np.where(first_annual_changes_array == 5, nodatavalue, 1)
          export_array_as_tif(oldgrowth_all_mask_array, oldgrowth_all_mask_path)
          print(f"A mask for {all_oldgrowth_name} has been created at\n{oldgrowth_all_mask_path}")
        else: print(f"A mask for {all_oldgrowth_name} already exists at\n{oldgrowth_all_mask_path}")
      else: print(f"The {first_annual_changes_filename} raster needed to mask {all_oldgrowth_name} doesn't exist.")
    else: print(f"The scenario csv for {all_oldgrowth_name} doesn't exist.")

else: print("Old-growth scenarios are not enabled.")

# Feature verification

In [None]:
# Check that all features in all scenario csvs exist
scenario_csv_list = []
all_features_exist = True # Changes to false if feature missing
for csv in os.listdir(scenarios_model_dir):
  if csv.endswith('.csv'):
    csv_dir = join(scenarios_model_dir, csv)
    csv_feature_list = pd.Series.tolist(pd.read_csv(csv_dir).iloc[:,0])
    csv_feature_dir_list = []
    for csv_feature in csv_feature_list:
      if csv_feature not in covariates: csv_feature_dir_list.append(f"{features_dir}/{csv_feature}.tif")
    for feature in csv_feature_dir_list:
      if not exists(feature):
        all_features_exist = False
        print(f"The following feature is missing:\n{feature}\n and is required for the scenario '{csv[:-4]}'")

if all_features_exist: print("All required features are present.")
print("Covariate features e.g. 'beam' and 'sensitivity' will be added at the prediction stage.")

In [None]:
# Is the scenario area equal to the original template area?
original_template_area = True

# If not, create a new template for the scenario area and upload to:
# '6_scenarios/[model]/[scenario_area]/template.tif'
if original_template_area: scenario_template_dir = join(areas_dir, "template.tif")
else: scenario_template_dir = join(scenario_area_dir, "template.tif")
print(f"The following is being used as a template to verify scenario feature dimensions and projections:\n{scenario_template_dir}")


scenario_template = gdal.Open(scenario_template_dir)
scenario_template_dimensions, scenario_template_projection = scenario_template.GetGeoTransform(), scenario_template.GetProjection()

In [None]:
feature_issue = False
for feature in os.listdir(features_dir):
  if feature.endswith('.tif'):
    feature_dir = join(features_dir, feature)
    feature_open = gdal.Open(feature_dir)
    feature_dimensions, feature_projection = feature_open.GetGeoTransform(), feature_open.GetProjection()
    if feature_dimensions != scenario_template_dimensions:
      print(f"{feature} dimensions:\n{feature_dimensions}\ndo not match the scenario template dimensions:\n{scenario_template_dimensions}\n")
      feature_issue = True
    if feature_projection != scenario_template_projection:
      print(f"{feature} projection:\n{feature_projection}\ndoes not match the scenario template projection:\n{scenario_template_projection}\n\n")
      feature_issue = True

if not feature_issue: print(f"All features in the following directory have the correct dimensions and projection:\n{features_dir}")
else: print("Correct and / or resample the feature(s).")

# Template tiles

In [None]:
# Load the model scenario features for tile template creation
model_scenario_features = pd.Series.tolist(pd.read_csv(model_scenario_dir).iloc[:,0])
model_scenario_features_dirs = [features_dir + '/' + feature + '.tif' for feature in model_features]
# Create a template feature array from the first feature that isn't a covariate (these are created later)
template_base_path = next(r for r in model_scenario_features_dirs if all(c not in r for c in covariates))
template_base = gdal.Open(template_base_path)
template_base_array = template_base.ReadAsArray()
template_base_xsize, template_base_ysize = template_base.GetRasterBand(1).XSize, template_base.GetRasterBand(1).YSize
print(f"The template feature is {template_base_xsize} x {template_base_ysize} pixels.")

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles_exist = len(template_tile_list)

if n_tiles_exist < 1: print("There are currently no template tiles. Run the next section.")
if n_tiles_exist >= 1:
  tile_size_y_rounded_exist = gdal.Open(join(tile_templates_dir,'template_tile_1.tif')).GetRasterBand(1).YSize
  tile_size_y_remainder_exist = gdal.Open(join(tile_templates_dir,f'template_tile_{n_tiles_exist}.tif')).GetRasterBand(1).YSize
  if n_tiles_exist == 1: print(f"There is a single 'tile' with a height of {tile_size_y_rounded_exist}."); tile_size_y_remainder_exist = 0
  else: print(f"There are {n_tiles_exist} template tiles, the first {n_tiles_exist-1} having a height of {tile_size_y_rounded_exist} pixels, the last {tile_size_y_remainder_exist} pixels.")

In [None]:
# Large scenario areas and / or numbers of features may be too much for the available memory.
# This section defines how to split predictions into tiles that can then be merged.
override_n_tiles = True  # Useful if the tile number has already been tested.
n_tiles_override = 1
tile_size_y_rounded_override = int(14910/n_tiles_override)

memory_utilisation = 0.8 # Set to 0.8 to ensure crashes are avoided

assert memory_utilisation > 0 and memory_utilisation <= 1, "Set memory_utilisation to a value between 0 and 1"

# Calculate total size of feature stack
feature_stack_size = template_base_array.size * len(model_scenario_features_dirs)

# Calculate memory and the number of tiles required
total_memory_needed = 64 / 8 * feature_stack_size
print(f'RAM required for each prediction: ~{total_memory_needed/(1024**3):.3f} GB')
print(f'RAM currently available: {psutil.virtual_memory().free / (1024**3):.3f} GB')
n_tiles_temp = int(np.ceil(total_memory_needed / (psutil.virtual_memory().free * memory_utilisation)))

# Calculate template tile size (split on the y axis only)
tile_size_y_rounded = int(np.ceil(template_base_ysize/n_tiles_temp)) # Round the number of y pixels in each tile
tile_size_y_remainder = template_base_ysize%tile_size_y_rounded # Calculate the remainder for the last tile
n_tiles = max(1, len(range(0, template_base_ysize, tile_size_y_rounded))) # Update the number of tiles to include the remainder

if override_n_tiles:
  tile_size_y_rounded = tile_size_y_rounded_override
  tile_size_y_remainder = template_base_ysize%tile_size_y_rounded
  n_tiles = n_tiles_override
  print("n_tiles has been overridden.")

print(f'The prediction template should be divided into {n_tiles} tiles to avoid crashing.')

# Check if tiles need to be changed
change_tiles = True
if override_n_tiles:
  if n_tiles == n_tiles_exist: change_tiles = False
if n_tiles == n_tiles_exist and tile_size_y_rounded == tile_size_y_rounded_exist and tile_size_y_remainder == tile_size_y_remainder_exist:
  change_tiles = False

if change_tiles:
  # Clear all tile directories
  for tile in Path(tile_templates_dir).glob("**/*"):
    if tile.is_file(): tile.unlink()
  for tile in Path(tile_features_dir).glob("**/*"):
    if tile.is_file(): tile.unlink()
  for scenario_stack_dir in Path(tile_feature_stacks_dir).glob("**/*"):
    shutil.rmtree(scenario_stack_dir)
  for tile in Path(tile_prediction_cache_dir).glob("**/*"):
    if tile.is_file(): tile.unlink()

  # Generate new tile templates based on available memory
  tile_number = 1
  for y_start in range(0, template_base_ysize, tile_size_y_rounded):
    if tile_size_y_remainder != 0 and tile_number == n_tiles: tile_size_y = tile_size_y_remainder
    else: tile_size_y = tile_size_y_rounded
    tiling_string = "gdal_translate -of GTIFF -srcwin " + str(0)+ ", " + str(y_start) + ", " + str(template_base_xsize) + ", " + str(tile_size_y) + " " + str(template_base_path) + " " + str(tile_templates_dir) + "/template_tile_" + str(tile_number) + ".tif"
    os.system(tiling_string)
    tile_number += 1
  print("Template tile creation complete.")

else: print("No changes to existing tiles are required.")

# Feature tiles

In [None]:
# Create feature tiles.

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

assert n_tiles > 0, "There are no template tiles. Run the template tiles section, even if only one is created."
tile_size_y_rounded = gdal.Open(join(tile_templates_dir,'template_tile_1.tif')).GetRasterBand(1).YSize
print(f"There are {n_tiles} template tiles.")

if n_tiles == 1: print("Feature tile creation skipped. Feature stack creation will use the original features.")
else:
  # Progress
  n_features = len(os.listdir(features_dir))
  feature_progress_index, feature_progress_label = 0, widgets.Label(value=f"Feature progress: 0 / {n_features}")
  display(feature_progress_label)
  tile_progress_index, tile_progress_label = 0, widgets.Label(value=f"Tile progress: 0 / {n_tiles}")
  display(tile_progress_label)

  # Loop through each feature in the 6_scenarios features directory
  for feature in os.listdir(features_dir):
    # Create list of tile directories
    feature_dir = join(features_dir, feature)
    feature_array = gdal.Open(feature_dir).ReadAsArray()
    # Split the feature array into chunks based on tile size
    feature_chunks = np.array_split(feature_array, np.arange(tile_size_y_rounded, len(feature_array), tile_size_y_rounded))
    tile_count = 1
    # Loop through tiles and export as .tif
    for tile in range(n_tiles):
      feature_tile_filename = f"{feature[:-4]}_{tile_count}.tif"
      # Check if tile already exists
      feature_tile_exists = False
      for feature_tile in os.listdir(tile_features_dir):
        if feature_tile == feature_tile_filename: feature_tile_exists=True
        # If feature tile does not exist:
      if feature_tile_exists == False:
        template_tile_dir = join(tile_templates_dir, f"template_tile_{tile_count}.tif")
        export_array_as_tif(feature_chunks[tile_count-1], join(tile_features_dir,feature_tile_filename), template_tile_dir, compress = False)
        tile_count += 1
      # Update tile progress
      tile_progress_index += 1
      tile_progress_label.value = f"Tile progress: {tile_progress_index} / {n_tiles}"
    tile_progress_index = 0
    # Update feature progress
    feature_progress_index += 1
    feature_progress_label.value = f"Feature progress: {feature_progress_index} / {n_features}"

# Feature stacks

In [None]:
# Create feature stack arrays for each scenario

# Collect scenarios with .csv feature lists
scenarios_list = []
for csv in os.listdir(scenarios_model_dir):
  if csv.endswith('.csv'):
    scenarios_list.append(csv[:-4])

# Select scenarios to generate tiled feature stacks
print("scenarios_to_stack = [")
for scenario in sorted(scenarios_list):
  print(f'  "{scenario}",')
print("]")

In [None]:
scenarios_to_stack = [
  "1990_oldgrowth",
  "2014",
  "2014_no_degradation_since_1991",
  "2014_oldgrowth",
  "2015",
  "2016",
  "2017",
  "2018",
  "2019",
  "2020",
  "2021",
  "2021_oldgrowth",
  "2022",
  "2022_alternate_degradation_2021",
  "2022_no_degradation_since_2022",
  "2022_oldgrowth",
  "2023",
  "2023_alternate_degradation_2022",
  "2023_no_degradation_since_2023",
  "2023_oldgrowth",
  "2024",
  "2024_alternate_degradation_2014",
  "2024_alternate_degradation_2021",
  "2024_alternate_degradation_2022",
  "2024_alternate_degradation_2023",
  "2024_no_degradation_since_2015",
  "2024_no_degradation_since_2022",
  "2024_no_degradation_since_2023",
  "2024_no_degradation_since_2024",
  "2024_oldgrowth",
  "all_oldgrowth",
]

assert set(scenarios_to_stack).issubset(scenarios_list), "Not all selected scenarios exist."

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

assert n_tiles > 0, "There are no template tiles. Run the template tiles section, even if only one is created."
print(f"There are {n_tiles} template tiles.")

# Progress
scenario_progress_index, scenario_progress_label = 0, widgets.Label(value=f"Scenario progress: 0 / {len(scenarios_to_stack)}")
display(scenario_progress_label)
stack_progress_index, stack_progress_label = 0, widgets.Label(value=f"Tiled feature stack progress: 0 / {n_tiles}")
display(stack_progress_label)

# Loop through each scenario
for scenario in scenarios_to_stack:
    # Define directory and features
    scenario_feature_stacks_dir = join(tile_feature_stacks_dir, scenario)
    makedirs(scenario_feature_stacks_dir, exist_ok=True)
    scenario_features_csv = join(scenarios_model_dir,f"{scenario}.csv")
    scenario_features = pd.Series.tolist(pd.read_csv(scenario_features_csv).iloc[:,0])
    # Set the number of stacks to the number of tiles
    if n_tiles == 0: n_stacks = 1
    else: n_stacks = n_tiles
    # Create a tile count to match the feature stack
    tile_count = 1
    for tile in range(n_stacks):
      scenario_stack_filename = f"feature_stack_{scenario}_{tile_count}.npy"
      # Check if feature stack already exists
      feature_stack_exists = False
      for feature_stack in os.listdir(scenario_feature_stacks_dir):
        if feature_stack == scenario_stack_filename: feature_stack_exists=True
      # If scenario prediction tile does not exist:
      if feature_stack_exists == False:
        scenario_tile_stack_dir = join(scenario_feature_stacks_dir, scenario_stack_filename)
        # Create feature chunks (arrays) from tiles
        if n_stacks == 1: feature_tiles_dirs = [f"{features_dir}/{feature}.tif" for feature in scenario_features]
        else: feature_tiles_dirs = [f"{tile_features_dir}/{feature}_{tile_count}.tif" for feature in scenario_features]
        feature_array_chunks = []
        for feature in feature_tiles_dirs:
          # Covariate raster will exist and should be ignored if prediction stage has already been attempted
          if feature.split('/')[-1].split('.')[0] not in covariates and feature.split('/')[-1].split('.')[0] not in [f"{cov}_{tile_count}" for cov in covariates]:
            feature_array_chunk = gdal.Open(feature).ReadAsArray()
            feature_array_chunks.append(feature_array_chunk)
        # Create a feature stack from chunks
        feature_stack = np.dstack(feature_array_chunks)
        feature_array_chunks = None # Flush chunks
        stack_height, stack_width, stack_n_features = feature_stack.shape
        # Convert feature stack to 2D numpy array with features as columns
        feature_stack_reshaped = feature_stack.reshape(stack_height * stack_width, stack_n_features)
        feature_stack = None # Flush stack
        # Save as a numpy file
        np.save(scenario_tile_stack_dir, feature_stack_reshaped)
        feature_stack_reshaped = None # Flush reshaped stack
      # Update progress
      tile_count += 1
      stack_progress_index += 1
      stack_progress_label.value = f"Tiled feature stack progress: {stack_progress_index} / {n_stacks}"
    # Reset tile progress
    stack_progress_index = 0
    scenario_progress_index += 1
    scenario_progress_label.value = f"Scenario progress: {scenario_progress_index} / {len(scenarios_to_stack)}"
print("\nFeature stacks complete.")

# Predict scenarios

In [None]:
# This is for testing models and scenarios, or making predictions where no
# uncertainty metric for the variate (e.g. standard error or stdev) is available.
# If these are available, proceed to 7_predictions.ipynb.

# Collect available scenarios from the feature stack tiles directory
scenario_stacks_list = []
for scenario in os.listdir(tile_feature_stacks_dir):
    scenario_stacks_list.append(scenario)

# Select scenarios to predict
print("scenarios_to_predict = [")
for scenario in sorted(scenario_stacks_list):
  print(f'  "{scenario}",')
print("]")

In [None]:
scenarios_to_predict = [
  "1990_oldgrowth",
  "2014",
  "2014_no_degradation_since_1991",
  "2014_oldgrowth",
  "2015",
  "2016",
  "2017",
  "2018",
  "2019",
  "2020",
  "2021",
  "2021_oldgrowth",
  "2022",
  "2022_alternate_degradation_2021",
  "2022_no_degradation_since_2022",
  "2022_oldgrowth",
  "2023",
  "2023_alternate_degradation_2022",
  "2023_no_degradation_since_2023",
  "2023_oldgrowth",
  "2024",
  "2024_alternate_degradation_2014",
  "2024_alternate_degradation_2021",
  "2024_alternate_degradation_2022",
  "2024_alternate_degradation_2023",
  "2024_no_degradation_since_2015",
  "2024_no_degradation_since_2022",
  "2024_no_degradation_since_2023",
  "2024_no_degradation_since_2024",
  "2024_oldgrowth",
  "all_oldgrowth",
]

assert set(scenarios_to_predict).issubset(scenario_stacks_list), "Not all selected scenarios exist."

In [None]:
add_covariates = True # Adds a selected covariate value as the feature
sensitivity_value = 0.99
beam_value = 5
# 5 is the first of the full beams, with the least bias on AGBD.
# Cover beams 1 - 4 underestimate. Full beams 7 - 8 overestimate. 5 - 6 tend give average values.

# Check for GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0': print('GPU device not found')
else: print(f"Found GPU at: {device_name}")

# Load model
booster = xgb.Booster()
booster.load_model(selected_model_json)
if categorise_target: XGBPredictor = xgb.XGBClassifier()
else: XGBPredictor = xgb.XGBRegressor()
XGBPredictor._Booster = booster

# Avoids issues using dataframe from CPU
xgb.set_config(verbosity=0, use_rmm=True)

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

assert n_tiles > 0, "There are no template tiles. Run the template tiles section, even if only one is created."
template_tile_x = gdal.Open(join(tile_templates_dir,'template_tile_1.tif')).GetRasterBand(1).XSize
print(f"There are {n_tiles} template tiles.")

# Scenario progress
scenario_progress_index = 0
scenario_progress_label = widgets.Label(f"Scenario progress: {scenario_progress_index}/{len(scenarios_to_predict)}")
display(scenario_progress_label)

# Tile progress
tile_progress_index, tile_progress_label = 0, widgets.Label(value=f"Tile progress: 0 / {n_tiles}")
display(tile_progress_label)

# Loop through each scenario
for scenario in scenarios_to_predict:
  # Define scenario filename and check if exists
  scenario_prediction_unmasked_filename = f"{scenario}__{selected_scenario_area}_{selected_model}_unmasked.tif"
  scenario_prediction_unmasked_exists=False
  for scenario_prediction in os.listdir(scenario_predictions_unmasked_dir):
    if scenario_prediction == scenario_prediction_unmasked_filename:
      scenario_prediction_unmasked_exists=True
  # If scenario prediction does not exist:
  if scenario_prediction_unmasked_exists == False:
    # Get number of stacks
    scenario_feature_stack_dir = join(tile_feature_stacks_dir, scenario)
    n_stacks = len(os.listdir(scenario_feature_stack_dir))
    # Create a tile cache directory for the prediction
    tile_cache_scenario_dir = join(tile_prediction_cache_dir, scenario_prediction_unmasked_filename[:-4])
    makedirs(tile_cache_scenario_dir, exist_ok=True)
    # Create a tile count to match the feature stack chunk
    for stack in range(1, n_stacks+1):
      scenario_tile_filename = f"scenario_tile_{stack}.tif"
      # Check if tile already exists
      scenario_tile_exists = False
      for scenario_tile in os.listdir(tile_cache_scenario_dir):
        if scenario_tile == scenario_tile_filename: scenario_tile_exists=True
      # If scenario prediction tile does not exist:
      if scenario_tile_exists == False:
        # Load template tile parameters
        template_tile_dir = join(tile_templates_dir, f"template_tile_{stack}.tif")
        template_tile_y = gdal.Open(template_tile_dir).GetRasterBand(1).YSize
        template_tile_x = gdal.Open(template_tile_dir).GetRasterBand(1).XSize
        # Load feature tile stack
        stack_filename = f"feature_stack_{scenario}_{stack}.npy"
        feature_stack = np.load(join(scenario_feature_stack_dir, stack_filename))
        # Add covariates (sensitivity and BEAM)
        if add_covariates: feature_stack = np.hstack((feature_stack,
                           np.full((feature_stack.shape[0], 1), beam_value, dtype=int),
                           np.full((feature_stack.shape[0], 1), sensitivity_value, dtype=float)
                           ))
        # Define prediction array and reshape
        prediction = XGBPredictor.predict(feature_stack)
        feature_stack = None # Flush feature stack
        prediction_tile = prediction.reshape((template_tile_y, template_tile_x))
        prediction = None # Flush prediction
        # Export prediction array as .tif
        export_array_as_tif(prediction_tile, join(tile_cache_scenario_dir, scenario_tile_filename), template = template_tile_dir, compress = False)
        prediction_tile = None # Flush prediction tile
        # Update progress
      tile_progress_index += 1
      tile_progress_label.value = f"Tile progress: {tile_progress_index} / {n_stacks}"
    # Prepare empty array for merging tiles
    prediction_array = np.empty((0,template_tile_x))
    # Read each tile .tif as an array, stack, then export as a .tif
    for subdir in os.listdir(tile_cache_scenario_dir):
      if subdir.endswith('.tif'):
        tile_dir = join(tile_cache_scenario_dir, subdir)
        prediction_array = np.vstack((prediction_array, gdal.Open(tile_dir).ReadAsArray()))
    # Define scenario template
    scenario_template = join(features_dir, os.listdir(features_dir)[0])
    scenario_prediction_unmasked_dir = join(scenario_predictions_unmasked_dir, f"{scenario_prediction_unmasked_filename}")
    export_array_as_tif(prediction_array, scenario_prediction_unmasked_dir, template = scenario_template, compress = True)
    # Delete scenario tile cache directory
    shutil.rmtree(tile_cache_scenario_dir)
  # Reset tile progress
  tile_progress_index = 0
  # Update scenario progress
  scenario_progress_index += 1
  scenario_progress_label.value = f"Scenario progress: {scenario_progress_index}/{len(scenarios_to_predict)}"
print("\nScenario predictions complete.")

# Mask scenario predictions



In [None]:
# Use polygons for masking, only areas inside the polygons will be included

# Exclude existing polygons from search
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']

print("mask_polygons = [")
for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"  '{polygon[:-5]}',")
print("]")

In [None]:
mask_polygons = [
  # 'project_area',
  'gedi_area',
  # 'peninsular_malaysia',
  # 'pa_taman_krau',
  # 'pa_ais',
]

# Create an inverse project area path for masking
template_polygon_path = join(polygons_dir, "template.gpkg")
for polygon in mask_polygons:
  inverse_polygon_path = join(polygons_dir, f"{polygon}_inverse.gpkg")
  if not exists(inverse_polygon_path):
    polygon_path = join(polygons_dir, f"{polygon}.gpkg")
    template_polygon = gpd.read_file(template_polygon_path)
    polygon_read = gpd.read_file(polygon_path)
    polygon_crs = polygon_read.crs.to_epsg()
    inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
    inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
    inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
    print(f"An inverse masking polygon for {polygon} has been created in {polygons_dir}.")
  else: print(f"An inverse masking polygon for {polygon} already exists.")

unmasked_predictions = []
for scenario_prediction in os.listdir(scenario_predictions_unmasked_dir):
  unmasked_predictions.append(scenario_prediction)

# Determine last feature year for masking future scenarios
final_feature_years = []
for final_feature in os.listdir(feature_final_dir):
  if final_feature.endswith('.tif') and final_feature[-9] == '_':
    try: final_feature_years.append(int(final_feature[-8:-4]))
    except: continue
last_feature_year = max(final_feature_years)

# Binary progress
masking_progress_index = 0
masking_progress_label = widgets.Label(f"Masking progress: {masking_progress_index}/{len(unmasked_predictions)}")
display(masking_progress_label)

# Mask scenarios with the relevatant mask
for scenario_prediction in unmasked_predictions: # Loop through each unmasked scenario
  scenario_masked_filename = f"{scenario_prediction[:-13]}.tif"
  scenario_masked_dir = join(scenario_predictions_dir, scenario_masked_filename)
  if not exists(scenario_masked_dir):
    mask_exists = False
    for mask in os.listdir(masks_dir):
      # Match all oldgrowth scenarios
      if 'all_oldgrowth' in mask or 'all_oldgrowth' in scenario_prediction:
        if 'all_oldgrowth' in mask and 'all_oldgrowth' in scenario_prediction:
          selected_mask_filename = mask
          selected_mask_dir = join(masks_dir, selected_mask_filename)
          mask_exists = True
      else: # Match all other historic scenarios
        scenario_year = int(scenario_prediction[:4])
        mask_year = int(mask[12:16])
        if scenario_year == mask_year:
          selected_mask_filename = mask
          selected_mask_dir = join(masks_dir, selected_mask_filename)
          mask_exists = True
        else: # Match future scenarios with most recent forest mask
          if scenario_year > last_feature_year and last_feature_year == mask_year:
            selected_mask_filename = mask
            selected_mask_dir = join(masks_dir, selected_mask_filename)
            mask_exists = True
    if mask_exists == False: print(f"A suitable mask for {scenario_prediction} does not exist.\n")
    else: # Mask the scenario prediction
      print(f"Masking {scenario_prediction} with {selected_mask_filename}...")
      mask_array = gdal.Open(selected_mask_dir).ReadAsArray()
      scenario_prediction_unmasked_dir = join(scenario_predictions_unmasked_dir, scenario_prediction)
      scenario_prediction_array = gdal.Open(scenario_prediction_unmasked_dir).ReadAsArray()
      # Mask where the mask array is not 1
      scenario_masked_array = np.where(mask_array != 1, nodatavalue, scenario_prediction_array)
      export_array_as_tif(scenario_masked_array, scenario_masked_dir, compress = True)
      if len(mask_polygons) > 0:
        for polygon_mask in mask_polygons:
          inverse_gedi_area_path = join(polygons_dir, f"{polygon_mask}_inverse.gpkg")
          print(f"Masking {scenario_prediction} with {polygon_mask}...")
          burn_polygon_to_raster(scenario_masked_dir, inverse_gedi_area_path, fixed_value=nodatavalue, all_touched=False)
        # Recompress the prediction after burning the polygon masks
        scenario_masked_array_2 = gdal.Open(scenario_masked_dir).ReadAsArray()
        export_array_as_tif(scenario_masked_array_2, scenario_masked_dir, compress = True)
      print(f"{scenario_masked_filename} exported.")
  # Update masking progress
  masking_progress_index += 1
  masking_progress_label.value = f"Masking progress: {masking_progress_index}/{len(unmasked_predictions)}"

# Scenario disturbance / change

In [None]:
# Tool to identify required scenarios for forest disturbance/change mapping
use_tool = True

def select_forest_scenarios():
    # Initialize variables
    calculation_note = None
    is_before_calculation = False

    # Print header
    print("\n" + "="*50)
    print("\nFOREST DISTURBANCE / CHANGE SELECTOR\n")
    print("="*50 + "\n")

    # Step 1: Collect disturbance type
    disturbance_prompt = (
        "Select disturbance type:\n"
        "1. Degradation\n"
        "2. Deforestation\n"
        "3. Degradation and deforestation (total disturbance)\n"
        "4. Change\n\n"
        "Enter your choice (1-4): "
    )
    disturbance_type = input(disturbance_prompt)

    # Handle Change type (separate workflow)
    if disturbance_type == "4":
        print("\n")
        year_of_interest = input("Enter year of interest: ")
        print("\n")
        baseline_year = input("Enter baseline year (must be before year of interest): ")

        # Validate year order
        if int(baseline_year) >= int(year_of_interest):
            print("\nError: Baseline year must be before year of interest")
            return None

        selected_difference = f"{year_of_interest}_change_{baseline_year}"
        scenario_pair = (year_of_interest, baseline_year)
        other_requirements = []

    else:
        # Step 2: Year of interest
        print("\n")
        year_of_interest = input("Enter year of interest: ")
        print("\n")

        # Step 3: Collect baseline type
        baseline_prompt = (
            "Select baseline type:\n"
            "1. Total (since a hypothetical old-growth state)\n"
            "2. Since a baseline year\n"
            "3. Before a baseline year\n\n"
            "Enter your choice (1-3): "
        )
        baseline_type = input(baseline_prompt)

        # Initialize result variables
        selected_difference = None
        scenario_pair = None
        other_requirements = []
        is_before_calculation = (baseline_type == "3")

        # Process based on baseline type
        if baseline_type == "1":  # Total (oldgrowth baseline)
            if disturbance_type == "1":  # Degradation
                selected_difference = f"{year_of_interest}_degradation_total"
                scenario_pair = (year_of_interest, f"{year_of_interest}_oldgrowth")

            elif disturbance_type == "2":  # Deforestation
                selected_difference = f"{year_of_interest}_deforestation_total"
                scenario_pair = (f"{year_of_interest}_oldgrowth", "all_oldgrowth")

            elif disturbance_type == "3":  # Degradation and deforestation
                # Define component requirements
                deg_total_diff = f"{year_of_interest}_degradation_total"
                deg_total_pair = (year_of_interest, f"{year_of_interest}_oldgrowth")
                def_total_diff = f"{year_of_interest}_deforestation_total"
                def_total_pair = (f"{year_of_interest}_oldgrowth", "all_oldgrowth")

                other_requirements.append((deg_total_diff, deg_total_pair))
                other_requirements.append((def_total_diff, def_total_pair))

                calculation_note = f"Equivalent to {year_of_interest} - all_oldgrowth"
                selected_difference = f"{year_of_interest}_degradation_deforestation_total"
                scenario_pair = (year_of_interest, "all_oldgrowth")

        elif baseline_type == "2":  # Since a baseline year
            # Get and validate baseline year
            print("\n")
            baseline_year = input("Enter baseline year (must be before year of interest): ")
            if int(baseline_year) >= int(year_of_interest):
                print("\nError: Baseline year must be before year of interest")
                return None

            baseline_year_plus1 = str(int(baseline_year) + 1)

            if disturbance_type == "1":  # Degradation
                selected_difference = f"{year_of_interest}_degradation_since_{baseline_year_plus1}"
                scenario_pair = (year_of_interest, f"{year_of_interest}_no_degradation_since_{baseline_year_plus1}")

            elif disturbance_type == "2":  # Deforestation
                selected_difference = f"{year_of_interest}_deforestation_since_{baseline_year_plus1}"
                scenario_pair = (f"{year_of_interest}_alternate_degradation_{baseline_year}", baseline_year)

            elif disturbance_type == "3":  # Degradation and deforestation since
                # Define component requirements
                degradation_since_diff = f"{year_of_interest}_degradation_since_{baseline_year_plus1}"
                degradation_since_pair = (year_of_interest, f"{year_of_interest}_no_degradation_since_{baseline_year_plus1}")

                deforestation_since_diff = f"{year_of_interest}_deforestation_since_{baseline_year_plus1}"
                deforestation_since_pair = (f"{year_of_interest}_alternate_degradation_{baseline_year}", baseline_year)

                other_requirements.append((degradation_since_diff, degradation_since_pair))
                other_requirements.append((deforestation_since_diff, deforestation_since_pair))

                selected_difference = f"{year_of_interest}_degradation_deforestation_since_{baseline_year_plus1}"
                scenario_pair = (year_of_interest, baseline_year)

        elif baseline_type == "3":  # Before a baseline year
            # Get and validate baseline year
            print("\n")
            baseline_year = input("Enter baseline year (must be before year of interest): ")
            if int(baseline_year) >= int(year_of_interest):
                print("\nError: Baseline year must be before year of interest")
                return None

            baseline_year_plus1 = str(int(baseline_year) + 1)

            if disturbance_type == "1":  # Degradation
                selected_difference = f"{year_of_interest}_degradation_before_{baseline_year_plus1}"

                # Define required difference components
                since_difference = f"{year_of_interest}_degradation_since_{baseline_year_plus1}"
                since_pair = (year_of_interest, f"{year_of_interest}_no_degradation_since_{baseline_year_plus1}")

                total_difference = f"{year_of_interest}_degradation_total"
                total_pair = (year_of_interest, f"{year_of_interest}_oldgrowth")

                other_requirements.append((since_difference, since_pair))
                other_requirements.append((total_difference, total_pair))

            elif disturbance_type == "2":  # Deforestation
                selected_difference = f"{year_of_interest}_deforestation_before_{baseline_year_plus1}"

                # Define required difference components
                since_difference = f"{year_of_interest}_deforestation_since_{baseline_year_plus1}"
                since_pair = (f"{year_of_interest}_alternate_degradation_{baseline_year}", baseline_year)

                total_difference = f"{year_of_interest}_deforestation_total"
                total_pair = (f"{year_of_interest}_oldgrowth", "all_oldgrowth")

                other_requirements.append((since_difference, since_pair))
                other_requirements.append((total_difference, total_pair))

            elif disturbance_type == "3":  # Degradation and deforestation before
                selected_difference = f"{year_of_interest}_degradation_deforestation_before_{baseline_year_plus1}"

                # Define requirements for degradation_before
                degradation_since_diff = f"{year_of_interest}_degradation_since_{baseline_year_plus1}"
                degradation_since_pair = (year_of_interest, f"{year_of_interest}_no_degradation_since_{baseline_year_plus1}")

                degradation_total_diff = f"{year_of_interest}_degradation_total"
                degradation_total_pair = (year_of_interest, f"{year_of_interest}_oldgrowth")

                # Define requirements for deforestation_before
                deforestation_since_diff = f"{year_of_interest}_deforestation_since_{baseline_year_plus1}"
                deforestation_since_pair = (f"{year_of_interest}_alternate_degradation_{baseline_year}", baseline_year)

                deforestation_total_diff = f"{year_of_interest}_deforestation_total"
                deforestation_total_pair = (f"{year_of_interest}_oldgrowth", "all_oldgrowth")

                # Add all requirements
                other_requirements.append((degradation_since_diff, degradation_since_pair))
                other_requirements.append((degradation_total_diff, degradation_total_pair))
                other_requirements.append((deforestation_since_diff, deforestation_since_pair))
                other_requirements.append((deforestation_total_diff, deforestation_total_pair))

        else:
            print("\nError: Invalid baseline type selection.")
            return None

    # Build result display
    result_text = []
    result_text.append("\n" + "="*50)
    result_text.append(f"\nSELECTED DIFFERENCE: {selected_difference}")

    # Add required differences and their scenario pairs
    if other_requirements:
        result_text.append(f"\nRequired difference and scenario pairs:")
        for diff, pair in other_requirements:
            result_text.append(f"'{diff}' {pair}")

        if calculation_note:
            result_text.append(f"\n{calculation_note}")
    # Only include scenario pair when no other requirements
    elif scenario_pair:
        result_text.append(f"\nScenario pair required: {scenario_pair}")

    result_text.append("\n" + "="*50)

    # Print results with fewer new lines
    print("\n\n")
    print("\n".join(result_text))

    # Return appropriate values
    if is_before_calculation:
        return selected_difference, other_requirements
    else:
        return selected_difference, scenario_pair, other_requirements

# Run the function
if use_tool:
  if __name__ == "__main__":
      select_forest_scenarios()

In [None]:
# Build dictionaries of disturbance / change options based on available files

# Extract all available scenarios from directory
scenarios = set()
for file in os.listdir(scenario_predictions_dir):
    scenarios.add(file.split("__")[0])

# Extract and categorize years from scenarios
years = set()
plain_years = set()  # Years as standalone scenarios (e.g. "2014")
oldgrowth_years = set()  # Years with oldgrowth variants

for s in scenarios:
    if s.isdigit():
        years.add(int(s))
        plain_years.add(int(s))
    elif "_oldgrowth" in s:
        year = s.split("_oldgrowth")[0]
        if year.isdigit():
            years.add(int(year))
            oldgrowth_years.add(int(year))
    elif any(pattern in s for pattern in ["_no_degradation_since_", "_alternate_degradation_"]):
        year = s.split("_")[0]
        if year.isdigit():
            years.add(int(year))

years_sorted = sorted(list(years))

# Track scenario availability and dependencies
deforest_since_scenarios = set()
direct_degradation_pairs = set()
direct_deforestation_pairs = set()

# Output dictionaries
scenario_difference_dictionary = {}

print("# Differences in scenario_difference_dictionary and in before_baseline_dictionary are ")
print("# calculated by subtracting the second scenario / difference from the first. The ")
print("# differences in degradation_deforestation_dictionary are summed.")
print("")
print("scenario_difference_dictionary = {")
print("")

# 1. Process oldgrowth baseline sections for direct differences
oldgrowth_entries = []
for year in years_sorted:
    y_str = str(year)

    # Only create section if all required metrics are possible
    if (year in plain_years and
        year in oldgrowth_years and
        "all_oldgrowth" in scenarios):

        if not oldgrowth_entries:
            oldgrowth_entries.append("# Disturbance using oldgrowth as a baseline")

        oldgrowth_entries.append(f"  ('{y_str}', '{y_str}_oldgrowth'):")
        oldgrowth_entries.append(f"    '{y_str}_degradation_total',")
        scenario_difference_dictionary[(y_str, f"{y_str}_oldgrowth")] = f"{y_str}_degradation_total"

        oldgrowth_entries.append(f"  ('{y_str}_oldgrowth', 'all_oldgrowth'):")
        oldgrowth_entries.append(f"    '{y_str}_deforestation_total',")
        scenario_difference_dictionary[(f"{y_str}_oldgrowth", "all_oldgrowth")] = f"{y_str}_deforestation_total"
        oldgrowth_entries.append("")

# Print oldgrowth entries if any exist
if oldgrowth_entries:
    print("\n".join(oldgrowth_entries))

# 2. Track deforestation scenarios for dependencies
for year_a in years_sorted:
    for year_b in years_sorted:
        if year_a <= year_b:
            continue

        a_str, b_str = str(year_a), str(year_b)
        b_plus1 = str(year_b + 1)

        # Check for deforestation_since scenarios
        if (year_b in plain_years and f"{a_str}_alternate_degradation_{b_str}" in scenarios):
            deforest_since_scenarios.add(f"{a_str}_deforestation_since_{b_plus1}")

# 3. Process year-to-year comparisons for direct differences by baseline year
baseline_entries = {}

for year_a in years_sorted:
    for year_b in years_sorted:
        if year_a <= year_b:
            continue

        a_str, b_str = str(year_a), str(year_b)
        b_plus1 = str(year_b + 1)

        deforest_since_key = f"{a_str}_deforestation_since_{b_plus1}"
        has_deforestation_since = deforest_since_key in deforest_since_scenarios

        entries = []

        # 1. Degradation since [year_b+1]
        if (year_a in plain_years and f"{a_str}_no_degradation_since_{b_plus1}" in scenarios):
            entries.append((
                f"  ('{a_str}', '{a_str}_no_degradation_since_{b_plus1}'):",
                f"    '{a_str}_degradation_since_{b_plus1}',"
            ))
            scenario_difference_dictionary[(a_str, f"{a_str}_no_degradation_since_{b_plus1}")] = f"{a_str}_degradation_since_{b_plus1}"
            direct_degradation_pairs.add((a_str, b_plus1))

        # 2. Deforestation since [year_b+1]
        if has_deforestation_since:
            entries.append((
                f"  ('{a_str}_alternate_degradation_{b_str}', '{b_str}'):",
                f"    '{a_str}_deforestation_since_{b_plus1}',"
            ))
            scenario_difference_dictionary[(f"{a_str}_alternate_degradation_{b_str}", b_str)] = f"{a_str}_deforestation_since_{b_plus1}"
            direct_deforestation_pairs.add((a_str, b_plus1))

        # Only add section if at least one comparison exists
        if entries:
            if b_str not in baseline_entries:
                baseline_entries[b_str] = []

            # Add section header and entries
            section_text = [f"# Disturbance by {a_str}, using {b_str} as a baseline"]
            for line1, line2 in sorted(entries, key=lambda x: x[1]):
                section_text.append(line1)
                section_text.append(line2)
            section_text.append("")

            baseline_entries[b_str].append((a_str, section_text))

# Print baseline entries if any exist
if baseline_entries:
    for b_str in sorted(baseline_entries.keys()):
        for a_str, section in sorted(baseline_entries[b_str], key=lambda x: x[0]):
            print("\n".join(section))

# 4. Process year-to-year change sections
years_available = sorted([y for y in years_sorted if y in plain_years])

if len(years_available) >= 2:
    # A. Single-year consecutive changes
    print("# Change between single years")
    for i in range(1, len(years_available)):
        current = str(years_available[i])
        previous = str(years_available[i-1])

        print(f"  ('{current}', '{previous}'):")
        print(f"    '{current}_change_{previous}',")
        scenario_difference_dictionary[(current, previous)] = f"{current}_change_{previous}"
    print("")

    # B. Multi-year comparison (earliest to latest only)
    if len(years_available) > 2:
        earliest = str(years_available[0])
        latest = str(years_available[-1])

        print("# Change between multiple years")
        print("# Add any other desired year combinations manually using the pattern below")
        print(f"  ('{latest}', '{earliest}'):")
        print(f"    '{latest}_change_{earliest}',")
        scenario_difference_dictionary[(latest, earliest)] = f"{latest}_change_{earliest}"
        print("")

print("}")

# Dictionary for calculated 'before' differences
print("")
print("before_baseline_dictionary = {")

# Collection for degradation before metrics
degradation_before_entries = []
for year_a, year_b_plus1 in sorted(direct_degradation_pairs):
    # Check if required components exist
    if (year_a, f"{year_a}_oldgrowth") in scenario_difference_dictionary:
        deg_since = f"{year_a}_degradation_since_{year_b_plus1}"
        deg_total = f"{year_a}_degradation_total"
        deg_before = f"{year_a}_degradation_before_{year_b_plus1}"

        degradation_before_entries.append(f"  '{deg_before}': ('{deg_total}', '{deg_since}'),")

# Print degradation before metrics if any exist
if degradation_before_entries:
    print("\n# Degradation before metrics (Total - Since)")
    for entry in degradation_before_entries:
        print(entry)

# Collection for deforestation before metrics
deforestation_before_entries = []
for year_a, year_b_plus1 in sorted(direct_deforestation_pairs):
    # Check if required components exist
    if (f"{year_a}_oldgrowth", "all_oldgrowth") in scenario_difference_dictionary:
        def_since = f"{year_a}_deforestation_since_{year_b_plus1}"
        def_total = f"{year_a}_deforestation_total"
        def_before = f"{year_a}_deforestation_before_{year_b_plus1}"

        deforestation_before_entries.append(f"  '{def_before}': ('{def_total}', '{def_since}'),")

# Print deforestation before metrics if any exist
if deforestation_before_entries:
    if degradation_before_entries:
        print("")
    print("# Deforestation before metrics (Total - Since)")
    for entry in deforestation_before_entries:
        print(entry)

print("}")

# Dictionary for combined degradation and deforestation calculations
print("")
print("degradation_deforestation_dictionary = {")

# Collection for combined disturbance totals
combined_total_entries = []
for year in years_sorted:
    y_str = str(year)

    # Check if required components exist
    if ((y_str, f"{y_str}_oldgrowth") in scenario_difference_dictionary and
        (f"{y_str}_oldgrowth", "all_oldgrowth") in scenario_difference_dictionary):
        deg_total = f"{y_str}_degradation_total"
        def_total = f"{y_str}_deforestation_total"
        combined = f"{y_str}_degradation_deforestation_total"

        combined_total_entries.append(f"  '{combined}': ('{deg_total}', '{def_total}'),")

# Print combined total entries if any exist
if combined_total_entries:
    print("\n# Combined degradation and deforestation totals")
    for entry in combined_total_entries:
        print(entry)

# Collection for combined since metrics
combined_pairs = direct_degradation_pairs.intersection(direct_deforestation_pairs)
combined_since_entries = []

for year_a, year_b_plus1 in sorted(combined_pairs):
    # Verify components exist
    deg_since = f"{year_a}_degradation_since_{year_b_plus1}"
    def_since = f"{year_a}_deforestation_since_{year_b_plus1}"
    combined = f"{year_a}_degradation_deforestation_since_{year_b_plus1}"

    combined_since_entries.append(f"  '{combined}': ('{deg_since}', '{def_since}'),")

# Print combined since entries if any exist
if combined_since_entries:
    if combined_total_entries:
        print("")
    print("# Combined degradation and deforestation since")
    for entry in combined_since_entries:
        print(entry)

# Collection for combined before metrics
combined_before_entries = []
for year_a, year_b_plus1 in sorted(combined_pairs):
    # Check if individual before metrics defined
    deg_before = f"{year_a}_degradation_before_{year_b_plus1}"
    def_before = f"{year_a}_deforestation_before_{year_b_plus1}"

    # Only include if components would exist
    if ((year_a, f"{year_a}_oldgrowth") in scenario_difference_dictionary and
        (f"{year_a}_oldgrowth", "all_oldgrowth") in scenario_difference_dictionary):
        combined = f"{year_a}_degradation_deforestation_before_{year_b_plus1}"

        combined_before_entries.append(f"  '{combined}': ('{deg_before}', '{def_before}'),")

# Print combined before entries if any exist
if combined_before_entries:
    if combined_total_entries or combined_since_entries:
        print("")
    print("# Combined degradation and deforestation before")
    for entry in combined_before_entries:
        print(entry)

print("}")

In [None]:
# Differences in scenario_difference_dictionary and in before_baseline_dictionary are
# calculated by subtracting the second scenario / difference from the first. The
# differences in degradation_deforestation_dictionary are summed.

scenario_difference_dictionary = {

# Disturbance using oldgrowth as a baseline
  ('2014', '2014_oldgrowth'):
    '2014_degradation_total',
  ('2014_oldgrowth', 'all_oldgrowth'):
    '2014_deforestation_total',

  ('2021', '2021_oldgrowth'):
    '2021_degradation_total',
  ('2021_oldgrowth', 'all_oldgrowth'):
    '2021_deforestation_total',

  ('2022', '2022_oldgrowth'):
    '2022_degradation_total',
  ('2022_oldgrowth', 'all_oldgrowth'):
    '2022_deforestation_total',

  ('2023', '2023_oldgrowth'):
    '2023_degradation_total',
  ('2023_oldgrowth', 'all_oldgrowth'):
    '2023_deforestation_total',

  ('2024', '2024_oldgrowth'):
    '2024_degradation_total',
  ('2024_oldgrowth', 'all_oldgrowth'):
    '2024_deforestation_total',

# Disturbance by 2014, using 1990 as a baseline
  ('2014', '2014_no_degradation_since_1991'):
    '2014_degradation_since_1991',

# Disturbance by 2024, using 2014 as a baseline
  ('2024_alternate_degradation_2014', '2014'):
    '2024_deforestation_since_2015',
  ('2024', '2024_no_degradation_since_2015'):
    '2024_degradation_since_2015',

# Disturbance by 2022, using 2021 as a baseline
  ('2022_alternate_degradation_2021', '2021'):
    '2022_deforestation_since_2022',
  ('2022', '2022_no_degradation_since_2022'):
    '2022_degradation_since_2022',

# Disturbance by 2024, using 2021 as a baseline
  ('2024_alternate_degradation_2021', '2021'):
    '2024_deforestation_since_2022',
  ('2024', '2024_no_degradation_since_2022'):
    '2024_degradation_since_2022',

# Disturbance by 2023, using 2022 as a baseline
  ('2023_alternate_degradation_2022', '2022'):
    '2023_deforestation_since_2023',
  ('2023', '2023_no_degradation_since_2023'):
    '2023_degradation_since_2023',

# Disturbance by 2024, using 2022 as a baseline
  ('2024_alternate_degradation_2022', '2022'):
    '2024_deforestation_since_2023',
  ('2024', '2024_no_degradation_since_2023'):
    '2024_degradation_since_2023',

# Disturbance by 2024, using 2023 as a baseline
  ('2024_alternate_degradation_2023', '2023'):
    '2024_deforestation_since_2024',
  ('2024', '2024_no_degradation_since_2024'):
    '2024_degradation_since_2024',

# Change between single years
  ('2015', '2014'):
    '2015_change_2014',
  ('2016', '2015'):
    '2016_change_2015',
  ('2017', '2016'):
    '2017_change_2016',
  ('2018', '2017'):
    '2018_change_2017',
  ('2019', '2018'):
    '2019_change_2018',
  ('2020', '2019'):
    '2020_change_2019',
  ('2021', '2020'):
    '2021_change_2020',
  ('2022', '2021'):
    '2022_change_2021',
  ('2023', '2022'):
    '2023_change_2022',
  ('2024', '2023'):
    '2024_change_2023',

# Change between multiple years
# Add any other desired year combinations manually using the pattern below
  ('2024', '2014'):
    '2024_change_2014',

}

before_baseline_dictionary = {

# Degradation before metrics (Total - Since)
  '2014_degradation_before_1991': ('2014_degradation_total', '2014_degradation_since_1991'),
  '2022_degradation_before_2022': ('2022_degradation_total', '2022_degradation_since_2022'),
  '2023_degradation_before_2023': ('2023_degradation_total', '2023_degradation_since_2023'),
  '2024_degradation_before_2015': ('2024_degradation_total', '2024_degradation_since_2015'),
  '2024_degradation_before_2022': ('2024_degradation_total', '2024_degradation_since_2022'),
  '2024_degradation_before_2023': ('2024_degradation_total', '2024_degradation_since_2023'),
  '2024_degradation_before_2024': ('2024_degradation_total', '2024_degradation_since_2024'),

# Deforestation before metrics (Total - Since)
  '2022_deforestation_before_2022': ('2022_deforestation_total', '2022_deforestation_since_2022'),
  '2023_deforestation_before_2023': ('2023_deforestation_total', '2023_deforestation_since_2023'),
  '2024_deforestation_before_2015': ('2024_deforestation_total', '2024_deforestation_since_2015'),
  '2024_deforestation_before_2022': ('2024_deforestation_total', '2024_deforestation_since_2022'),
  '2024_deforestation_before_2023': ('2024_deforestation_total', '2024_deforestation_since_2023'),
  '2024_deforestation_before_2024': ('2024_deforestation_total', '2024_deforestation_since_2024'),
}

degradation_deforestation_dictionary = {

# Combined degradation and deforestation totals
  '2014_degradation_deforestation_total': ('2014_degradation_total', '2014_deforestation_total'),
  '2021_degradation_deforestation_total': ('2021_degradation_total', '2021_deforestation_total'),
  '2022_degradation_deforestation_total': ('2022_degradation_total', '2022_deforestation_total'),
  '2023_degradation_deforestation_total': ('2023_degradation_total', '2023_deforestation_total'),
  '2024_degradation_deforestation_total': ('2024_degradation_total', '2024_deforestation_total'),

# Combined degradation and deforestation since
  '2022_degradation_deforestation_since_2022': ('2022_degradation_since_2022', '2022_deforestation_since_2022'),
  '2023_degradation_deforestation_since_2023': ('2023_degradation_since_2023', '2023_deforestation_since_2023'),
  '2024_degradation_deforestation_since_2015': ('2024_degradation_since_2015', '2024_deforestation_since_2015'),
  '2024_degradation_deforestation_since_2022': ('2024_degradation_since_2022', '2024_deforestation_since_2022'),
  '2024_degradation_deforestation_since_2023': ('2024_degradation_since_2023', '2024_deforestation_since_2023'),
  '2024_degradation_deforestation_since_2024': ('2024_degradation_since_2024', '2024_deforestation_since_2024'),

# Combined degradation and deforestation before
  '2022_degradation_deforestation_before_2022': ('2022_degradation_before_2022', '2022_deforestation_before_2022'),
  '2023_degradation_deforestation_before_2023': ('2023_degradation_before_2023', '2023_deforestation_before_2023'),
  '2024_degradation_deforestation_before_2015': ('2024_degradation_before_2015', '2024_deforestation_before_2015'),
  '2024_degradation_deforestation_before_2022': ('2024_degradation_before_2022', '2024_deforestation_before_2022'),
  '2024_degradation_deforestation_before_2023': ('2024_degradation_before_2023', '2024_deforestation_before_2023'),
  '2024_degradation_deforestation_before_2024': ('2024_degradation_before_2024', '2024_deforestation_before_2024'),
}

In [None]:
# Functions for differences and sums
def subtract_arrays(array1, array2):
    diff_array = array1 - array2
    return diff_array

def sum_arrays(array1, array2):
    sum_array = array1 + array2
    return sum_array

# Set up single progress indicator for all operations
total_operations = len(scenario_difference_dictionary) + len(before_baseline_dictionary) + len(degradation_deforestation_dictionary)
progress_index = 0
progress_label = widgets.Label(f"Difference calculation progress: {progress_index}/{total_operations}")

display(progress_label)

# 1. Process direct scenario differences
for (scenario1, scenario2), difference_name in scenario_difference_dictionary.items():
    # Define filenames and directories of difference .tifs
    diff_filename = f"{difference_name}__{selected_scenario_area}_{selected_model}.tif"
    diff_dir = join(scenario_dist_dir, diff_filename)

    if not exists(diff_dir):
        scenario1_filename = f"{scenario1}__{selected_scenario_area}_{selected_model}.tif"
        scenario2_filename = f"{scenario2}__{selected_scenario_area}_{selected_model}.tif"

        # Define difference directories, assert that both exist for both scenarios
        scenario1_dir = join(scenario_predictions_dir, scenario1_filename)
        assert exists(scenario1_dir), f"{scenario1_dir} does not exist."
        scenario2_dir = join(scenario_predictions_dir, scenario2_filename)
        assert exists(scenario2_dir), f"{scenario2_dir} does not exist."

        # Convert scenario .tifs to temporary arrays
        scenario1_array_temp = gdal.Open(scenario1_dir).ReadAsArray()
        scenario2_array_temp = gdal.Open(scenario2_dir).ReadAsArray()

        # Fill scenario nodata values with 0 if they are not nodatavalues in the other scenario
        scenario1_array = np.where((scenario1_array_temp == nodatavalue) & (scenario2_array_temp != nodatavalue), 0, scenario1_array_temp)
        scenario2_array = np.where((scenario2_array_temp == nodatavalue) & (scenario1_array != nodatavalue), 0, scenario2_array_temp)

        # Create difference arrays where the value is not 'nodatavalue'
        diff_array = np.where(scenario1_array==nodatavalue, nodatavalue, subtract_arrays(scenario1_array, scenario2_array))
        export_array_as_tif(diff_array, diff_dir, template = scenario1_dir)

    # Update progress
    progress_index += 1
    progress_label.value = f"Difference calculation progress: {progress_index}/{total_operations}"

# 2. Process before baseline differences (require subtracting one difference from another)
for difference_name, (diff1_name, diff2_name) in before_baseline_dictionary.items():
    # Define filenames and directories of difference .tifs
    output_filename = f"{difference_name}__{selected_scenario_area}_{selected_model}.tif"
    output_dir = join(scenario_dist_dir, output_filename)

    if not exists(output_dir):
        diff1_filename = f"{diff1_name}__{selected_scenario_area}_{selected_model}.tif"
        diff2_filename = f"{diff2_name}__{selected_scenario_area}_{selected_model}.tif"

        # Define difference directories, assert that both exist
        diff1_dir = join(scenario_dist_dir, diff1_filename)
        assert exists(diff1_dir), f"{diff1_dir} does not exist."
        diff2_dir = join(scenario_dist_dir, diff2_filename)
        assert exists(diff2_dir), f"{diff2_dir} does not exist."

        # Convert difference .tifs to temporary arrays
        diff1_array_temp = gdal.Open(diff1_dir).ReadAsArray()
        diff2_array_temp = gdal.Open(diff2_dir).ReadAsArray()

        # Fill difference nodata values with 0 if they are not nodatavalues in the other difference
        diff1_array = np.where((diff1_array_temp == nodatavalue) & (diff2_array_temp != nodatavalue), 0, diff1_array_temp)
        diff2_array = np.where((diff2_array_temp == nodatavalue) & (diff1_array != nodatavalue), 0, diff2_array_temp)

        # Create difference arrays where the value is not 'nodatavalue'
        result_array = np.where(diff1_array==nodatavalue, nodatavalue, subtract_arrays(diff1_array, diff2_array))
        export_array_as_tif(result_array, output_dir, template = diff1_dir)

    # Update progress
    progress_index += 1
    progress_label.value = f"Difference calculation progress: {progress_index}/{total_operations}"

# 3. Process combined degradation and deforestation (require summing two differences)
for difference_name, (diff1_name, diff2_name) in degradation_deforestation_dictionary.items():
    # Define filenames and directories of difference .tifs
    output_filename = f"{difference_name}__{selected_scenario_area}_{selected_model}.tif"
    output_dir = join(scenario_dist_dir, output_filename)

    if not exists(output_dir):
        diff1_filename = f"{diff1_name}__{selected_scenario_area}_{selected_model}.tif"
        diff2_filename = f"{diff2_name}__{selected_scenario_area}_{selected_model}.tif"

        # Define difference directories, assert that both exist
        diff1_dir = join(scenario_dist_dir, diff1_filename)
        assert exists(diff1_dir), f"{diff1_dir} does not exist."
        diff2_dir = join(scenario_dist_dir, diff2_filename)
        assert exists(diff2_dir), f"{diff2_dir} does not exist."

        # Convert difference .tifs to temporary arrays
        diff1_array_temp = gdal.Open(diff1_dir).ReadAsArray()
        diff2_array_temp = gdal.Open(diff2_dir).ReadAsArray()

        # Fill difference nodata values with 0 if they are not nodatavalues in the other difference
        diff1_array = np.where((diff1_array_temp == nodatavalue) & (diff2_array_temp != nodatavalue), 0, diff1_array_temp)
        diff2_array = np.where((diff2_array_temp == nodatavalue) & (diff1_array != nodatavalue), 0, diff2_array_temp)

        # Create sum arrays where the value is not 'nodatavalue' in both
        result_array = np.where(diff1_array==nodatavalue, nodatavalue, sum_arrays(diff1_array, diff2_array))
        export_array_as_tif(result_array, output_dir, template = diff1_dir)

    # Update progress
    progress_index += 1
    progress_label.value = f"Difference calculation progress: {progress_index}/{total_operations}"

print("All differences calculated.")

# Intactness

In [None]:
# Select which baseline and disturbance raster to use for calculating intactness
# percentage and relative intactness. Ideally this is the scenario with the least disturbance
# and the difference between that and the current reality.
for baseline in os.listdir(scenario_predictions_dir):
  print(f"selected_baseline = '{baseline}'")
for dist in os.listdir(scenario_dist_dir):
  print(f"selected_dist = '{dist}'")

In [None]:
selected_baseline = 'all_oldgrowth__asartr_agbd_historic_250429_223033.tif'
selected_dist = '2024_degradation_deforestation_total__asartr_agbd_historic_250429_223033.tif'
forest_mask_year = '2024'

percentage_filename = f"percentage_change__{selected_baseline.split('__')[0]}__{selected_dist.split('__')[0]}__{selected_dist.split('__')[1]}"
percentage_path = join(intactness_dir, percentage_filename)

if not exists(percentage_path):
  # Define filenames and directories
  selected_baseline_path = join(scenario_predictions_dir, selected_baseline)
  selected_dist_path = join(scenario_dist_dir, selected_dist)
  selected_mask_path = join(masks_dir, f"mask_forest_{forest_mask_year}.tif")

  # Convert to arrays
  selected_baseline_array = gdal.Open(selected_baseline_path).ReadAsArray()
  selected_dist_array = gdal.Open(selected_dist_path).ReadAsArray()
  selected_mask_array = gdal.Open(selected_mask_path).ReadAsArray()

  # Create percentage array where the value is not 'nodatavalue' in any of the inputs
  percentage_array = np.where((selected_mask_array==nodatavalue) | (selected_baseline_array==nodatavalue) | (selected_dist_array==nodatavalue), nodatavalue,
                              selected_dist_array/selected_baseline_array*100)
  export_array_as_tif(percentage_array, percentage_path, template = selected_baseline_path)
  print(f"{percentage_filename} has been exported.")

else: print(f"{percentage_filename} already exists.")

In [None]:
# Use additional polygons for masking relative intactness quantiles
polygons_to_exclude = ['template.gpkg', 'project_area_buffered_bbox.gpkg']

for polygon in os.listdir(polygons_dir):
  if polygon not in polygons_to_exclude:
    if 'inverse' not in polygon:
      print(f"mask_polygon = '{polygon}'")

In [None]:
mask_polygon = 'asartr_phase_2.gpkg'
# mask_polygon = None

if mask_polygon is not None:
  # Create an inverse project area path for masking
  template_polygon_path = join(polygons_dir, "template.gpkg")
  inverse_polygon_path = join(polygons_dir, f"{mask_polygon[:-5]}_inverse.gpkg")
  if not exists(inverse_polygon_path):
    polygon_path = join(polygons_dir, mask_polygon)
    template_polygon = gpd.read_file(template_polygon_path)
    polygon_read = gpd.read_file(polygon_path)
    polygon_crs = polygon_read.crs.to_epsg()
    inverse_polygon = template_polygon['geometry'].difference(polygon_read['geometry']).iloc[0]
    inverse_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_polygon]}, crs=f"EPSG:{polygon_crs}")
    inverse_polygon_gdf.to_file(inverse_polygon_path, driver="GPKG")
    print(f"An inverse masking polygon for {polygon} has been created in {polygons_dir}.")
  else: print(f"An inverse masking polygon for {polygon} already exists.")

  # Copy the percentage raster for potential masking
  percentage_masked_filename = f"{percentage_filename[:-4]}_masked_{mask_polygon[:-5]}.tif"
  percentage_masked_path = join(intactness_dir, percentage_masked_filename)
  if not exists(percentage_masked_path):
    print(f"Copying {percentage_filename} for masking...")
    copyfile(percentage_path, percentage_masked_path)
    print(f"Masking {percentage_filename} with {mask_polygon}...")
    burn_polygon_to_raster(percentage_masked_path, inverse_polygon_path, fixed_value=nodatavalue, all_touched=False)
    # Recompress the prediction after burning the polygon masks
    percentage_masked_array = gdal.Open(percentage_masked_path).ReadAsArray()
    export_array_as_tif(percentage_masked_array, percentage_masked_path, compress = True)
    print(f"{percentage_filename} masked.")
  else: print(f"{percentage_masked_path} already exists.")

else: print("No additional mask will be used to calculate relative intactness.")

In [None]:
# Define number of quantiles for intactness rating (e.g. 10 for 1 - 10)
num_quantiles = 10

# Define paths and arrays
if mask_polygon is None: relative_intactness_name = f'intactness__{num_quantiles}_quantiles'
else: relative_intactness_name = f'intactness__{mask_polygon[:-5]}_{num_quantiles}_quantiles'
relative_intactness_path = join(intactness_dir, f'{relative_intactness_name}.tif')
if mask_polygon is None: percentage_array = gdal.Open(percentage_path).ReadAsArray()
else: percentage_array = gdal.Open(percentage_masked_path).ReadAsArray()
relative_intactness_array = np.empty_like(percentage_array, dtype=object)

# Set all values above 0 to 0, assuming negative values are not intact
percentage_array[percentage_array > 0] = 0

# Separate valid and invalid (nodatavalue) elements
valid_elements = percentage_array[percentage_array != nodatavalue]
invalid_elements = percentage_array == nodatavalue

# Calculate quantiles for valid elements
quantiles = np.percentile(valid_elements, np.linspace(0, 100, num_quantiles + 1)[1:-1]) if len(valid_elements) > 0 else []
for i in range(1, num_quantiles + 1):
    lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
    upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
    relative_intactness_array[(percentage_array > lower_bound) & (percentage_array <= upper_bound)] = i
# if nodatavalue is not None:
    relative_intactness_array[invalid_elements] = nodatavalue
export_array_as_tif(relative_intactness_array, relative_intactness_path)

# Prepare data for CSV: Collect lower and upper bounds for each category
ranges_data = {'Lower_Bound': [], 'Upper_Bound': []}
for i in range(1, num_quantiles + 1):
    lower_bound = quantiles[i-2] if i > 1 and len(quantiles) >= i-1 else float('-inf')
    upper_bound = quantiles[i-1] if len(quantiles) >= i else float('inf')
    ranges_data['Lower_Bound'].append(lower_bound)
    ranges_data['Upper_Bound'].append(upper_bound)

# Create DataFrame and save to CSV
relative_intactness_df = pd.DataFrame(ranges_data)
relative_intactness_csv_path = os.path.join(intactness_dir, f'{relative_intactness_name}.csv')
relative_intactness_df.to_csv(relative_intactness_csv_path, index=False)

# Generate and save histogram as .png
histogram_path = join(intactness_dir, f'{relative_intactness_name}.png')
plt.figure()
plt.hist(valid_elements.flatten(), bins='auto')
plt.title(f'{relative_intactness_name} Histogram')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.savefig(histogram_path)
plt.show()
plt.close()

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()