<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/6_scenarios.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and global functions

In [None]:
# Define base directory
base_dir = "/gdrive/Shareddrives/masfi"

# Mount Google Drive and set base directory
from google.colab import drive
import os
import sys
drive.mount('/gdrive', force_remount=True)
_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install astropy
!pip install geopandas
!pip install rasterio
!pip install xgboost --upgrade
!apt-get install -y gdal-bin

In [None]:
# Reload imports, replacing those in the cache
%reload_ext autoreload
%autoreload 2
# Imports
from astropy.convolution import convolve, Gaussian2DKernel
import geopandas as gpd
from google.colab import runtime
import json
from os import makedirs
from os.path import join, exists
from osgeo import gdal
import ipywidgets as widgets
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
import psutil
import rasterio
from rasterio.features import rasterize
import re
from scipy import ndimage
import shutil
from shutil import copyfile
from skranger.ensemble import RangerForestRegressor
import tensorflow as tf
import xgboost as xgb

In [None]:
# Define directories
areas_dir = join(base_dir, "1_areas")
polygons_dir = join(areas_dir, "polygons")
masks_dir = join(areas_dir, "masks")
predictor_dir = join(base_dir, "3_predictors")
predictor_resampled_dir = join(predictor_dir, "resampled")
predictor_binary_dir = join(predictor_dir, "binary")
predictor_final_dir = join(predictor_dir, "final")
models_dir = join(base_dir, "5_models")
scenarios_dir = join(base_dir, "6_scenarios")

# Create directories
makedirs(scenarios_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress):
  template = gdal.Open(template)
  template_band = template.GetRasterBand(1)
  template_dimensions, template_projection = template.GetGeoTransform(), template.GetProjection()
  if compress: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32,
                                                options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
  if compress == False: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32)
  driver.GetRasterBand(1).WriteArray(input_array)
  driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
  driver.SetGeoTransform(template_dimensions)
  driver.SetProjection(template_projection)

# Burn a polygon to raster
def burn_polygon_to_raster(raster, polygon, fixed=True, fixed_value=1, column_name=None, all_touched=True):
  with rasterio.open(raster, 'r+') as src:
      array = src.read(1)
      transform = src.transform
      gdf = gpd.read_file(polygon)
      for geom in gdf.geometry:
          if not fixed and column_name == None:
              column_name = gdf.columns[0]
          if not fixed: burn_value = gdf.loc[gdf.geometry == geom, column_name].values[0]
          else: burn_value = fixed_value
          rasterize([(geom, burn_value)], out=array, transform=transform,
              all_touched=all_touched, dtype=src.meta['dtype'], out_shape=src.shape)
      src.write(array, 1)

# Select model

In [None]:
# Select a model
model_exists = False
for subdir, dirs, files in os.walk(models_dir):
  for file in files:
    if file == 'model.json':
      print(f'selected_model = "{subdir.split(f"{models_dir}/",1)[1]}"')
      model_exists = True
if not model_exists:
  print("No model exists.")

In [None]:
selected_model = "agbd_240718_164421"
categorise_variate = False # If the variate was categorised in 5_models

# Define model directories
selected_model_dir = join(models_dir,selected_model)
selected_model_json = join(selected_model_dir, "model.json")
selected_model_descr_dir = join(selected_model_dir, "model_description.json")
selected_model_dataset_path = join(selected_model_dir, f"{selected_model}.pkl")
selected_model_dataset = pd.read_pickle(selected_model_dataset_path)

# Read description for model dataset attributes
with open(join(selected_model_dir,"model_dataset_description.json")) as model_dataset_description_json:
  model_dataset_description = json.load(model_dataset_description_json)
model_dataset_name = model_dataset_description["model_dataset_name"]
number_of_columns = model_dataset_description["number_of_columns"]
number_of_rows = model_dataset_description["number_of_rows"]
id_column = model_dataset_description["id_column"]
selected_variate = model_dataset_description["selected_variate"]
uncertainty = model_dataset_description["uncertainty"]
covariates_renamed = model_dataset_description["covariates_renamed"]
covariates_categorised = model_dataset_description["covariates_categorised"]
selected_predictors = model_dataset_description["selected_predictors"]
categorical_columns = model_dataset_description["categorical_columns"]
descriptive_parameters = model_dataset_description["descriptive_parameters"]
filter_parameter = model_dataset_description["filter_parameter"]
filter_values_to_include = model_dataset_description["filter_values_to_include"]
sample_imported_dataset = model_dataset_description["sample_imported_dataset"]
sample_imported_dataset_by_percent = model_dataset_description["sample_imported_dataset_by_percent"]
sample_imported_dataset_value = model_dataset_description["sample_imported_dataset_value"]

# Reload hyperparameters
with open(selected_model_descr_dir) as model_description_json:
  model_description = json.load(model_description_json)
final_hyperparameters = eval(model_description["hyperparameters"])

# Remove early stopping and replace with mean n_estimators
if "early_stopping_rounds" in final_hyperparameters:
  final_hyperparameters = {k:v for k, v in final_hyperparameters.items() if k != "early_stopping_rounds"}
  final_hyperparameters["n_estimators"] = round(model_description["n_estimators mean"])

# Create scenarios model directory
scenarios_model_dir = join(scenarios_dir, selected_model)
makedirs(scenarios_model_dir, exist_ok=True)

# Copy model_dataset_description.json
with open(join(scenarios_model_dir, "model_dataset_description.json"), "w") as file:
  file.write(json.dumps(model_dataset_description))

# Define model
XGBPredictor = xgb.XGBRegressor(**final_hyperparameters)
XGBPredictor.load_model(fname=selected_model_json)

# Avoids issues using dataframe from CPU
xgb.set_config(verbosity=0, use_rmm=True)

# Select scenario area

In [None]:
# Select a scenario area
scenario_area_exists = False
for subdir in os.listdir(scenarios_model_dir):
  if not subdir.endswith('.csv') and not subdir.endswith('.json'):
    print(f'selected_scenario_area = "{subdir}"')
    scenario_area_exists = True
if not scenario_area_exists:
  print(f"Create a scenario area directory in {scenarios_model_dir}")

In [None]:
selected_scenario_area = "terengganu"

model_scenario_override = 2022 # set if cannot be automatically determined from model predictors

yearly_predictors = ["forest_with_edge_effects", "disturbance_with_edge_effects"]

# Define scenario area directory
scenario_area_dir = join(scenarios_model_dir,selected_scenario_area)
makedirs(scenario_area_dir, exist_ok=True)

# Create subdirectories
predictors_dir = join(scenario_area_dir, "predictors")
tile_templates_dir = join(scenario_area_dir, 'tile_templates')
tile_predictors_dir = join(scenario_area_dir, "tile_predictors")
tile_predictor_stacks_dir = join(scenario_area_dir, "tile_predictor_stacks")
tile_prediction_cache_dir = join(scenario_area_dir,"tile_prediction_cache")
scenario_predictions_unmasked_dir = join(scenario_area_dir,"scenario_predictions_unmasked")
scenario_predictions_dir = join(scenario_area_dir, "scenario_predictions")

makedirs(predictors_dir, exist_ok=True)
makedirs(tile_templates_dir, exist_ok=True)
makedirs(tile_predictors_dir, exist_ok=True)
makedirs(tile_predictor_stacks_dir, exist_ok=True)
makedirs(tile_prediction_cache_dir, exist_ok=True)
makedirs(scenario_predictions_unmasked_dir, exist_ok=True)
makedirs(scenario_predictions_dir, exist_ok=True)

# Remove the 'pre_' prefix from each predictor
model_predictors = sorted([predictor[4:] for predictor in selected_predictors])

# Create a list of predictor years from the model's predictors
model_predictor_years = []
for predictor in model_predictors:
  for yearly_predictor in yearly_predictors:
    if yearly_predictor in predictor:
      model_predictor_years.append(int(predictor[-4:]))

# Determine the model scenario from the maximum year
# Values from the most recent predictor year (e.g. 2022) will be applied to the second most recent (e.g. 2021) as a proxy at the predictor stack stage
if model_scenario_override != None: model_scenario = model_scenario_override
else: model_scenario = max(model_predictor_years) + 1
model_scenario_filename = f"{model_scenario}.csv"
model_scenario_dir = join(scenarios_model_dir,model_scenario_filename)
print(f"The maximum year used in the model is {model_scenario}, which has been created as the first scenario.\n")
print(f"The {model_scenario} scenario predictor list has been saved to:\n {model_scenario_dir}\n")
print(f"Ensure all predictors in this list, as well as for any additional scenarios, are uploaded to:\n{predictors_dir}")

# Save the model scenario predictors as a .csv
pd.DataFrame(model_predictors).to_csv(model_scenario_dir, index=False)

# Copy predictor from the final predictors directory
for predictor in os.listdir(predictor_final_dir):
  if predictor not in os.listdir(predictors_dir):
    predictor_original_path = join(predictor_final_dir, predictor)
    predictor_copy_path = join(predictors_dir, predictor)
    copyfile(predictor_original_path, predictor_copy_path)
print(f"All predictors present in the following directory have already been copied over: {predictor_final_dir}")

# Define historic scenarios

In [None]:
# Select constant predictors (which are the same in all scenarios)
print("constant_predictors = [")
for predictor in model_predictors:
  print(f'  "{predictor}",')
print("]")

In [None]:
constant_predictors = [
  "coast_proximity_km",
  # "disturbance_with_edge_effects_2004",
  # "disturbance_with_edge_effects_2005",
  # "disturbance_with_edge_effects_2006",
  # "disturbance_with_edge_effects_2007",
  # "disturbance_with_edge_effects_2008",
  # "disturbance_with_edge_effects_2009",
  # "disturbance_with_edge_effects_2010",
  # "disturbance_with_edge_effects_2011",
  # "disturbance_with_edge_effects_2012",
  # "disturbance_with_edge_effects_2013",
  # "disturbance_with_edge_effects_2014",
  # "disturbance_with_edge_effects_2015",
  # "disturbance_with_edge_effects_2016",
  # "disturbance_with_edge_effects_2017",
  # "disturbance_with_edge_effects_2018",
  # "disturbance_with_edge_effects_2019",
  # "disturbance_with_edge_effects_2020",
  # "disturbance_with_edge_effects_2021",
  # "forest_with_edge_effects_2005",
  # "forest_with_edge_effects_2006",
  # "forest_with_edge_effects_2007",
  # "forest_with_edge_effects_2008",
  # "forest_with_edge_effects_2009",
  # "forest_with_edge_effects_2010",
  # "forest_with_edge_effects_2011",
  # "forest_with_edge_effects_2012",
  # "forest_with_edge_effects_2013",
  # "forest_with_edge_effects_2014",
  # "forest_with_edge_effects_2015",
  # "forest_with_edge_effects_2016",
  # "forest_with_edge_effects_2017",
  # "forest_with_edge_effects_2018",
  # "forest_with_edge_effects_2019",
  # "forest_with_edge_effects_2020",
  # "forest_with_edge_effects_2021",
  "latitude",
  "longitude",
  # "pa_taman_negara_ais",
  "topo_cor_smooth_aspect_cosine",
  "topo_cor_smooth_aspect_sine",
  "topo_cor_smooth_circular_variance_aspect_03",
  "topo_cor_smooth_circular_variance_aspect_07",
  "topo_cor_smooth_circular_variance_aspect_11",
  "topo_cor_smooth_deviation_mean_elevation_03",
  "topo_cor_smooth_deviation_mean_elevation_07",
  "topo_cor_smooth_deviation_mean_elevation_11",
  "topo_cor_smooth_eastness",
  "topo_cor_smooth_elevation",
  "topo_cor_smooth_northness",
  "topo_cor_smooth_profile_curvature",
  "topo_cor_smooth_roughness_03",
  "topo_cor_smooth_roughness_07",
  "topo_cor_smooth_roughness_11",
  "topo_cor_smooth_slope",
  "topo_cor_smooth_stream_power_index_log10",
  "topo_cor_smooth_surface_area_ratio",
  "topo_cor_smooth_tangential_curvature",
  "topo_cor_smooth_topographic_position_index_03",
  "topo_cor_smooth_topographic_position_index_07",
  "topo_cor_smooth_topographic_position_index_11",
  "topo_cor_smooth_topographic_ruggedness_index",
  "topo_cor_smooth_topographic_wetness_index",
  "topo_cor_unsmooth_aspect_cosine",
  "topo_cor_unsmooth_aspect_sine",
  "topo_cor_unsmooth_circular_variance_aspect_03",
  "topo_cor_unsmooth_circular_variance_aspect_07",
  "topo_cor_unsmooth_circular_variance_aspect_11",
  "topo_cor_unsmooth_deviation_mean_elevation_03",
  "topo_cor_unsmooth_deviation_mean_elevation_07",
  "topo_cor_unsmooth_deviation_mean_elevation_11",
  "topo_cor_unsmooth_eastness",
  "topo_cor_unsmooth_elevation",
  "topo_cor_unsmooth_northness",
  "topo_cor_unsmooth_profile_curvature",
  "topo_cor_unsmooth_roughness_03",
  "topo_cor_unsmooth_roughness_07",
  "topo_cor_unsmooth_roughness_11",
  "topo_cor_unsmooth_slope",
  "topo_cor_unsmooth_stream_power_index_log10",
  "topo_cor_unsmooth_surface_area_ratio",
  "topo_cor_unsmooth_tangential_curvature",
  "topo_cor_unsmooth_topographic_position_index_03",
  "topo_cor_unsmooth_topographic_position_index_07",
  "topo_cor_unsmooth_topographic_position_index_11",
  "topo_cor_unsmooth_topographic_ruggedness_index",
  "topo_cor_unsmooth_topographic_wetness_index",
]

In [None]:
# Historic scenarios

# Set the minimum year that all yearly predictors are available for:
first_historic_predictor_year = 1990
# Set the maximum year that all yearly predictors are available for:
last_historic_predictor_year = 2022

# Calculate the range of scenario years and minimum historic scenario year
model_scenario_year_range = max(model_predictor_years) - (min(model_predictor_years) -1)
minimum_historic_scenario = first_historic_predictor_year + model_scenario_year_range
print(f"The minimum historic scenario year that can be predicted is {minimum_historic_scenario}.")
print(f"The maximum historic scenario year that can be predicted is {last_historic_predictor_year}.")

# Set scenario predictors as all non-constant predictors
scenario_predictors = sorted(list(set(model_predictors) - set(constant_predictors)))

# Create predictor lists for all historic scenario years
for historic_scenario in range(minimum_historic_scenario, last_historic_predictor_year+1):
  year_difference = model_scenario - historic_scenario
  historic_scenario_predictors = []
  for scenario_predictor in scenario_predictors:
    try:
      year_change = int(scenario_predictor[-4:]) - year_difference
      historic_scenario_predictor = scenario_predictor[:-4] + str(year_change)
      historic_scenario_predictors.append(historic_scenario_predictor)
    except:
      historic_scenario_predictors.append(scenario_predictor)
  # Compile historic predictors and save as a .csv
  historic_predictors = sorted(historic_scenario_predictors + constant_predictors)
  historic_scenario_filename = f"{historic_scenario}.csv"
  historic_scenario_dir = join(scenarios_model_dir,historic_scenario_filename)
  pd.DataFrame(historic_predictors).to_csv(historic_scenario_dir, index=False)

print(f"Lists of predictors for all historic scenarios have been exported to {scenarios_model_dir}/.")

# Further scenarios (optional)

In [None]:
# Alternate 'no deforestation' (nodef) and 'no deforestation and no disturbance' (nodist) scenarios.
# These require 'forest_xxxx' and 'disturbance_xxxx' yearly predictors

define_alternate = True

# Years to predict for the 'no deforestation' scenario, i.e. no further loss in the 'forest' predictors
# and the 'no disturbance' scenario, additionally with no further disturbance. Both assume no reforestation.
alternate_prediction_years = [
                    2027,
                    2032,
                    2037
                    ]

if define_alternate:
  alternate_predictor_range = range(max(min(alternate_prediction_years)-model_scenario_year_range,last_historic_predictor_year), max(alternate_prediction_years)+1)

  # Disturbance cycle years for the 'no disturbance' scenario, e.g. expected interval between selective logging of forest.
  disturbance_cycle = 30

  # Set the forest extents based on the last historic predictor year
  alternate_forest_base = join(predictors_dir, f"forest_with_edge_effects_{last_historic_predictor_year}.tif")
  print(f"The starting year for the alternate scenarios is {last_historic_predictor_year}.")

  # Determine the minimum disturbance value, assuming it is present in the first historic scenario year
  example_disturbance = join(predictors_dir, f"disturbance_with_edge_effects_{first_historic_predictor_year}.tif")
  example_disturbance_array = gdal.Open(example_disturbance).ReadAsArray()
  minimum_disturbance_value = example_disturbance_array.min()
  print(f"The minimum disturbance value is {minimum_disturbance_value}, which will be used to create the 'no disturbance' predictors.")
  # Create a minimum disturbance array to export for the 'no disturbance' predictor years
  minimum_disturbance_array = np.where(example_disturbance_array, minimum_disturbance_value, minimum_disturbance_value)

  for prediction_year in alternate_predictor_range:
    # Create alternate scenario predictors for every year until the maximum prediction year
    # Copy the forest predictor for the last historic predictor year
    forest_nodef = join(predictors_dir, f"forest_with_edge_effects_{prediction_year}_nodef.tif")
    forest_nodist = join(predictors_dir, f"forest_with_edge_effects_{prediction_year}_nodist.tif")
    if not exists(forest_nodef): shutil.copyfile(alternate_forest_base, forest_nodef)
    if not exists(forest_nodist): shutil.copyfile(alternate_forest_base, forest_nodist)
    # Determine the 'no deforestation' disturbance predictor to copy based on the disturbance cycle
    disturbance_year_to_copy = prediction_year - disturbance_cycle
    disturbance_base = join(predictors_dir, f"disturbance_with_edge_effects_{disturbance_year_to_copy}.tif")
    disturbance_nodef = join(predictors_dir, f"disturbance_with_edge_effects_{prediction_year}_nodef.tif")
    if not exists(disturbance_nodef): shutil.copyfile(disturbance_base, disturbance_nodef)
    # Export a 'no disturbance' predictor for every year up to the maximum prediction year
    disturbance_nodist = join(predictors_dir, f"disturbance_with_edge_effects_{prediction_year}_nodist.tif")
    if not exists(disturbance_nodist): export_array_as_tif(minimum_disturbance_array, disturbance_nodist, template = example_disturbance)

  # Create predictor lists for all alternate scenario years
  for prediction_year in alternate_prediction_years:
    year_difference = model_scenario - prediction_year
    nodef_scenario_predictors = []
    nodist_scenario_predictors = []
    for scenario_predictor in scenario_predictors:
      try:
        year_change = int(scenario_predictor[-4:]) - year_difference
        if year_change > last_historic_predictor_year:
          nodef_scenario_predictor = scenario_predictor[:-4] + str(year_change) + "_nodef"
          nodist_scenario_predictor = scenario_predictor[:-4] + str(year_change) + "_nodist"
        else:
          nodef_scenario_predictor = scenario_predictor[:-4] + str(year_change)
          nodist_scenario_predictor = scenario_predictor[:-4] + str(year_change)
        nodef_scenario_predictors.append(nodef_scenario_predictor)
        nodist_scenario_predictors.append(nodist_scenario_predictor)
      except:
        nodef_scenario_predictors.append(scenario_predictor)
        nodist_scenario_predictors.append(scenario_predictor)
    # Compile 'no deforestation' predictors and save as a .csv
    nodef_predictors = sorted(nodef_scenario_predictors + constant_predictors)
    nodef_scenario_filename = f"{prediction_year}_nodef.csv"
    nodef_scenario_dir = join(scenarios_model_dir, nodef_scenario_filename)
    pd.DataFrame(nodef_predictors).to_csv(nodef_scenario_dir, index=False)
    # Compile 'no deforestation and no disturbance' predictors and save as a .csv
    nodist_predictors = sorted(nodist_scenario_predictors + constant_predictors)
    nodist_scenario_filename = f"{prediction_year}_nodist.csv"
    nodist_scenario_dir = join(scenarios_model_dir, nodist_scenario_filename)
    pd.DataFrame(nodist_predictors).to_csv(nodist_scenario_dir, index=False)

  print(f"Predictor lists for all alternate scenarios have been exported to {scenarios_model_dir}/.")

In [None]:
# Alternate 'no historic degradation' and 'no historic deforestation and degradation' scenarios,
# i.e. none between first and last predictor years
define_historic_alternate = True

if define_historic_alternate:
  # Determine the minimum disturbance value, assuming it is present in the first historic scenario year
  example_disturbance = join(predictors_dir, f"disturbance_with_edge_effects_{first_historic_predictor_year}.tif")
  example_disturbance_array = gdal.Open(example_disturbance).ReadAsArray()
  minimum_disturbance_value = example_disturbance_array.min()
  print(f"The minimum disturbance value is {minimum_disturbance_value}, which will be used to create the 'no disturbance' predictors.")
  # Create a minimum disturbance array to export for the 'no disturbance' predictor years
  minimum_disturbance_array = np.where(example_disturbance_array, minimum_disturbance_value, minimum_disturbance_value)
  # Export a 'no disturbance' predictor for both historic alternate scenarios
  disturbance_nodist_historic_name = f"disturbance_with_edge_effects_{last_historic_predictor_year}_nodist_historic"
  disturbance_nodist_historic_path = join(predictors_dir, f"{disturbance_nodist_historic_name}.tif")
  if not exists(disturbance_nodist_historic_path): export_array_as_tif(minimum_disturbance_array, disturbance_nodist_historic_path, template = example_disturbance)

  # Set the forest extents based on the first historic predictor year for 'no historic deforestation'
  forest_nodef_base = join(predictors_dir, f"forest_with_edge_effects_{first_historic_predictor_year}.tif")
  forest_nodef_array = gdal.Open(forest_nodef_base).ReadAsArray()
  print(f"The baseline year for 'no historic deforestation or degradation' is {first_historic_predictor_year}.")
  # Export a forest predictor for the 'no deforestation and no degradation' scenario
  forest_nodef_historic_name = f"forest_with_edge_effects_{last_historic_predictor_year}_nodef_historic"
  forest_nodef_historic_path = join(predictors_dir, f"{forest_nodef_historic_name}.tif")
  if not exists(forest_nodef_historic_path): export_array_as_tif(forest_nodef_array, forest_nodef_historic_path, template = forest_nodef_base)

  # Create a predictor list for 'no historic degradation'
  nodeg_historic_scenario_predictors = []
  for scenario_predictor in scenario_predictors:
    if "disturbance_with_edge_effects" in scenario_predictor:
      nodeg_historic_scenario_predictors.append(disturbance_nodist_historic_name)
    else: nodeg_historic_scenario_predictors.append(scenario_predictor)
  # Compile 'no historic disturbance' predictors and save as a .csv
  nodeg_historic_scenario_predictors = sorted(nodeg_historic_scenario_predictors + constant_predictors)
  nodeg_historic_scenario_filename = f"{last_historic_predictor_year}_nodeg_historic.csv"
  nodeg_historic_scenario_path = join(scenarios_model_dir, nodeg_historic_scenario_filename)
  pd.DataFrame(nodeg_historic_scenario_predictors).to_csv(nodeg_historic_scenario_path, index=False)

  # Create a predictor list for 'no historic deforestation or degradation'
  nodef_historic_scenario_predictors = []
  for scenario_predictor in scenario_predictors:
    if "disturbance_with_edge_effects" in scenario_predictor:
      nodef_historic_scenario_predictors.append(disturbance_nodist_historic_name)
    elif "forest_with_edge_effects" in scenario_predictor:
      nodef_historic_scenario_predictors.append(forest_nodef_historic_name)
    else: nodef_historic_scenario_predictors.append(scenario_predictor)
  # Compile 'no deforestation or degradation' predictors and save as a .csv
  nodef_historic_scenario_predictors = sorted(nodef_historic_scenario_predictors + constant_predictors)
  nodef_historic_scenario_filename = f"{last_historic_predictor_year}_nodef_historic.csv"
  nodef_historic_scenario_path = join(scenarios_model_dir, nodef_historic_scenario_filename)
  pd.DataFrame(nodef_historic_scenario_predictors).to_csv(nodef_historic_scenario_path, index=False)

  print(f"Predictor list for the 'no historic degradation' and 'no historic deforestation or degradation' scenarios exported to {scenarios_model_dir}/.")

In [None]:
# Complete restoration (with current forest extent) and complete recovery (maximum forest extent) scenarios
# Complete recovery requires the 'forest_9999_comrec.tif' predictor, which assumes all land recovers to forest.

define_restoration_recovery = True

# 'Complete recovery' management proxy, usually a protected area, which will be expanded to the scenario area
comrest_comrec_proxy = 'pa_taman_negara_ais'

# 'Complete recovery' redundant management areas, which will be removed from the scenario area
comrest_comrec_redundant_areas = [
  # "pa_with_edge_effects_endau_rompin_johor",
  # "pa_with_edge_effects_endau_rompin_pahang",
  # "pa_with_edge_effects_krau",
]

if define_restoration_recovery:

  # Expand the comrec proxy management predictor to the entire scenario area
  comrest_comrec_proxy_dir = join(predictors_dir, f"{comrest_comrec_proxy}.tif")
  comrest_comrec_proxy_array = gdal.Open(comrest_comrec_proxy_dir).ReadAsArray()
  comrest_comrec_proxy_max_value = comrest_comrec_proxy_array.max()
  print(f"The maximum 'complete recovory' management area proxy value is {comrest_comrec_proxy_max_value}.")
  comrest_comrec_proxy_max_array = np.where(comrest_comrec_proxy_array, comrest_comrec_proxy_max_value, comrest_comrec_proxy_max_value)
  comrest_comrec_proxy_max_dir = join(predictors_dir, f"{comrest_comrec_proxy}_9999_comrec.tif")
  if not exists(comrest_comrec_proxy_max_dir): export_array_as_tif(comrest_comrec_proxy_max_array, comrest_comrec_proxy_max_dir, template = comrest_comrec_proxy_dir)

  # Remove the comrec redundant management predictors from the entire study area
  for redundant_area in comrest_comrec_redundant_areas:
    redundant_area_dir = join(predictors_dir, f"{redundant_area}.tif")
    redundant_area_array = gdal.Open(redundant_area_dir).ReadAsArray()
    redundant_area_min_value = redundant_area_array.min()
    print(f"The minimum 'complete recovory' redundant management area ({redundant_area}) value is {redundant_area_min_value}.")
    redundant_area_min_array = np.where(redundant_area_array, redundant_area_min_value, redundant_area_min_value)
    redundant_area_min_dir = join(predictors_dir, f"{redundant_area}_9999_comrec.tif")
    if not exists(redundant_area_min_dir): export_array_as_tif(redundant_area_min_array, redundant_area_min_dir, redundant_area_dir)

  # Determine the minimum disturbance value, assuming it is present in the first historic scenario year
  example_disturbance = join(predictors_dir, f"disturbance_with_edge_effects_{first_historic_predictor_year}.tif")
  example_disturbance_array = gdal.Open(example_disturbance).ReadAsArray()
  minimum_disturbance_value = example_disturbance_array.min()
  print(f"The minimum disturbance value is {minimum_disturbance_value}, which will be used to create the 'no disturbance' predictors.")
  # Create a minimum disturbance array to export for the 'no disturbance' predictor years
  minimum_disturbance_array = np.where(example_disturbance_array, minimum_disturbance_value, minimum_disturbance_value)
  # Generate 'no disturbance' predictor for restoration and recovery
  disturbance_comrest_comrec_name = "disturbance_with_edge_effects_9999_comrest_comrec"
  disturbance_comrest_comrec_path = join(predictors_dir, f"{disturbance_comrest_comrec_name}.tif")
  if not exists(disturbance_comrest_comrec_path): export_array_as_tif(minimum_disturbance_array, disturbance_comrest_comrec_path, template = example_disturbance)

  # Generate forest predictor for restoration (unchanged from most recent historic year)
  forest_most_recent_path = join(predictors_dir, f"forest_with_edge_effects_{last_historic_predictor_year}.tif")
  forest_most_recent_array = gdal.Open(forest_most_recent_path).ReadAsArray()
  forest_comrest_name = "forest_with_edge_effects_9999_comrest"
  forest_comrest_path = join(predictors_dir, f"{forest_comrest_name}.tif")
  if not exists(forest_comrest_path): export_array_as_tif(forest_most_recent_array, forest_comrest_path, template = forest_most_recent_path)

  # Generate 'complete restoration' scenario csv
  comrest_scenario_predictors = []
  forest_comrest = "forest_with_edge_effects_9999_comrest"
  for scenario_predictor in scenario_predictors:
    if "forest_with_edge_effects" in scenario_predictor:
      comrest_scenario_predictors.append(forest_comrest_name)
    if "disturbance_with_edge_effects" in scenario_predictor:
      comrest_scenario_predictors.append(disturbance_comrest_comrec_name)
    if scenario_predictor == comrest_comrec_proxy:
      comrest_scenario_predictors.append(f"{comrest_comrec_proxy}_9999_comrec")
    for redundant_area in comrest_comrec_redundant_areas:
      if scenario_predictor == redundant_area:
        comrest_scenario_predictors.append(f"{redundant_area}_9999_comrec")
    # Compile 'complete restoration' predictors and save as a .csv
    comrest_predictors = sorted(comrest_scenario_predictors + constant_predictors)
    comrest_scenario_filename = "9999_comrest.csv"
    comrest_scenario_dir = join(scenarios_model_dir, comrest_scenario_filename)
    pd.DataFrame(comrest_predictors).to_csv(comrest_scenario_dir, index=False)


  # Create 'complete recovery' forest with edge effects (requires a resampled TMF Transition Map Subtypes)
  tmf_subtypes_filename = "tmf_TransitionMap_Subtypes_b1.tif"
  tmf_subtypes_path = join(predictor_resampled_dir, tmf_subtypes_filename)
  if exists(tmf_subtypes_path):
    forest_comrec_name = "forest_with_edge_effects_9999_comrec"
    forest_comrec_path = join(predictors_dir, f"{forest_comrec_name}.tif")
    if not exists(forest_comrec_path):
      tmf_subtypes_array = gdal.Open(tmf_subtypes_path).ReadAsArray()
      # Convert all water values to 'nodata' and non-water values to '1'
      comrec_forest_array = np.where((tmf_subtypes_array >= 70) & (tmf_subtypes_array <= 79), 0, 1)
      # Set smoothing kernel
      kernel = Gaussian2DKernel(x_stddev=1, y_stddev=1)
      # Set precision
      precision = 2
      # Reclassify for binary differentiation after proximity conversion
      differentiator_array = comrec_forest_array.copy()
      differentiator_array[differentiator_array == 1] = 10
      # Positive proximity
      positive_distances = ndimage.distance_transform_edt(comrec_forest_array == 0) # target pixels
      positive_proximity_array = np.where(positive_distances > 2, 0, positive_distances) # max distance 2
      # Negative proximity
      negative_distances = ndimage.distance_transform_edt(comrec_forest_array == 1) # target pixels
      negative_proximity_array = np.where(negative_distances > 2, 0, negative_distances) # max distance 2
      # Sum proximities and differentiator
      pixel_prox_summed =  differentiator_array + positive_proximity_array + negative_proximity_array
      # Reclassify for better semantic understanding of pixel proximity
      pixel_prox_reclassed = pixel_prox_summed.copy()
      pixel_prox_reclass_table = [(0, 0, -4), (1, 1, -1), (1.4, 1.5, -2), (2, 2, -3), (10, 10, 3), (11, 11, 0), (11.4, 11.5, 1), (12, 12, 2)]
      for min_value, max_value, new_value in pixel_prox_reclass_table:
        pixel_prox_reclassed[(pixel_prox_reclassed >= min_value) & (pixel_prox_reclassed <= max_value)] = new_value
      # Smooth binary array using 2D convolution
      binary_smoothed = convolve(comrec_forest_array, kernel, boundary='extend')
      # Sum pixel proximity and smoothed binary array
      edge_effects_array = np.round(pixel_prox_reclassed + binary_smoothed, precision)
      # Export edge effects predictor
      export_array_as_tif(edge_effects_array, forest_comrec_path)

  # Generate 'complete recovery' scenario csv
  comrec_scenario_predictors = []
  for scenario_predictor in scenario_predictors:
    if "forest_with_edge_effects" in scenario_predictor:
      comrec_scenario_predictors.append(forest_comrec_name)
    if "disturbance_with_edge_effects" in scenario_predictor:
      comrec_scenario_predictors.append(disturbance_comrest_comrec_name)
    if scenario_predictor == comrest_comrec_proxy:
      comrec_scenario_predictors.append(f"{comrest_comrec_proxy}_9999_comrec")
    for redundant_area in comrest_comrec_redundant_areas:
      if scenario_predictor == redundant_area:
        comrec_scenario_predictors.append(f"{redundant_area}_9999_comrec")
    # Compile 'complete recovery' predictors and save as a .csv
    comrec_predictors = sorted(comrec_scenario_predictors + constant_predictors)
    comrec_scenario_filename = "9999_comrec.csv"
    comrec_scenario_dir = join(scenarios_model_dir, comrec_scenario_filename)
    pd.DataFrame(comrec_predictors).to_csv(comrec_scenario_dir, index=False)

  print(f"Predictor lists for all the complete recovery scenario has been exported to {scenarios_model_dir}/.")

In [None]:
# Force prediction of years for which earlier historic predictors used in the model do not exist.
# This will use comrec forest for yearly predictors <1990, assuming that forest edge was
# > 120 m beyond the edge of the year being predicted, and also assume minimal disturbance.
# Predictions will be increasingly over estimated the further back they are forced, but a
# cut-off of 2007 (15 years ago) should minimise a significant effect on the trend.

force_historic = False
force_to_historic_scenario = 2008

if force_historic:
  forced_scenario_range = range(force_to_historic_scenario, minimum_historic_scenario)
  forced_historic_scenarios = list(forced_scenario_range)
  forced_predictor_range = range(force_to_historic_scenario-model_scenario_year_range, first_historic_predictor_year)
  forced_predictor_years = list(forced_predictor_range)

  forced_forest_base = join(predictors_dir, f"forest_with_edge_effects_9999_comrec.tif")

  # Determine the minimum disturbance value, assuming it is present in the first historic scenario year
  example_disturbance = join(predictors_dir, f"disturbance_with_edge_effects_{first_historic_predictor_year}.tif")
  example_disturbance_array = gdal.Open(example_disturbance).ReadAsArray()
  minimum_disturbance_value = example_disturbance_array.min()
  print(f"The minimum disturbance value is {minimum_disturbance_value}, which will be used to create the 'no disturbance' predictors.")
  # Create a minimum disturbance array to export for the 'no disturbance' predictor years
  minimum_disturbance_array = np.where(example_disturbance_array, minimum_disturbance_value, minimum_disturbance_value)

  # Create proxy predictors for years out of data range
  for forced_predictor_year in forced_predictor_years:
    # Create forced scenario predictors for every year until the minimum predictor year
    # Copy the forest predictor for comrec
    forest_forced = join(predictors_dir, f"forest_with_edge_effects_{forced_predictor_year}.tif")
    if not exists(forest_forced): shutil.copyfile(forced_forest_base, forest_forced)
    # Export a 'no disturbance' predictor for every year to the minimum prediction year
    disturbance_forced = join(predictors_dir, f"disturbance_with_edge_effects_{forced_predictor_year}.tif")
    if not exists(disturbance_forced): export_array_as_tif(minimum_disturbance_array, disturbance_forced, template = example_disturbance)
    print(f"Proxy predictors have been generated or already exist for {forced_predictor_year}.")

  for forced_historic_scenario in forced_historic_scenarios:
    forced_year_difference = model_scenario - forced_historic_scenario
    forced_historic_scenario_predictors = []
    for scenario_predictor in scenario_predictors:
      try:
        forced_year_change = int(scenario_predictor[-4:]) - forced_year_difference
        forced_historic_scenario_predictor = scenario_predictor[:-4] + str(forced_year_change)
        forced_historic_scenario_predictors.append(forced_historic_scenario_predictor)
      except:
        forced_historic_scenario_predictors.append(scenario_predictor)
    # Compile forced historic predictors and save as a .csv
    forced_historic_predictors = sorted(forced_historic_scenario_predictors + constant_predictors)
    forced_historic_scenario_filename = f"{forced_historic_scenario}.csv"
    forced_historic_scenario_dir = join(scenarios_model_dir,forced_historic_scenario_filename)
    pd.DataFrame(forced_historic_predictors).to_csv(forced_historic_scenario_dir, index=False)

  print(f"Lists of predictors for all forced historic scenarios have been exported to {scenarios_model_dir}/.")

# Scenario masks

In [None]:
mask_type = 'forest'
complete_recovery_mask = True
use_gedi_area = False # Instead of project_area.gpkg
minimum_historic_year = 2008

# Create an inverse project area polygon for masking
project_area_path = join(polygons_dir, "project_area.gpkg")
inverse_project_area_path = join(polygons_dir, "project_area_inverse.gpkg")
if not exists(inverse_project_area_path):
  template_polygon_path = join(polygons_dir, "template.gpkg")
  template_polygon = gpd.read_file(template_polygon_path)
  project_area_polygon = gpd.read_file(project_area_path)
  inverse_project_area_polygon = template_polygon['geometry'].difference(project_area_polygon['geometry']).iloc[0]
  inverse_project_area_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_project_area_polygon]})
  inverse_project_area_polygon_gdf.to_file(inverse_project_area_path, driver="GPKG")

gedi_area_path = join(polygons_dir, "gedi_area.gpkg")
inverse_gedi_area_path = join(polygons_dir, "gedi_area_inverse.gpkg")
if not exists(inverse_gedi_area_path):
  template_polygon_path = join(polygons_dir, "template.gpkg")
  template_polygon = gpd.read_file(template_polygon_path)
  gedi_area_polygon = gpd.read_file(gedi_area_path)
  inverse_gedi_area_polygon = template_polygon['geometry'].difference(gedi_area_polygon['geometry']).iloc[0]
  inverse_gedi_area_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_gedi_area_polygon]})
  inverse_gedi_area_polygon_gdf.to_file(inverse_gedi_area_path, driver="GPKG")

# Loop through binary rasters of the mask type
for binary in os.listdir(predictor_binary_dir):
  binary_path = join(predictor_binary_dir, binary)
  if mask_type in binary:
    year = binary[-8:-4]
    if int(year) >= minimum_historic_year:
      mask_path = join(masks_dir, f"scenario_mask_{year}.tif")
      if not exists(mask_path):
        binary_array = gdal.Open(binary_path).ReadAsArray()
        mask_array = np.where(binary_array == 0, nodatavalue, 1)
        export_array_as_tif(mask_array, mask_path)
        # Mask pixels outside the project area
        if use_gedi_area: burn_polygon_to_raster(mask_path, inverse_gedi_area_path, fixed_value=nodatavalue, all_touched=False)
        else: burn_polygon_to_raster(mask_path, inverse_project_area_path, fixed_value=nodatavalue, all_touched=False)
        print(f"A mask for {year} has been created.")
      else: print(f"A mask for {year} already exists.")

# Create 'complete recovery' mask, which assumes all land is forest (requires a resampled TMF Transition Map Subtypes)
if complete_recovery_mask:
  tmf_subtypes_filename = "tmf_TransitionMap_Subtypes_b1.tif"
  tmf_subtypes_path = join(predictor_resampled_dir, tmf_subtypes_filename)
  if exists(tmf_subtypes_path):
    comrec_mask_path = join(masks_dir, f"scenario_mask_comrec_9999.tif")
    if not exists(comrec_mask_path):
      tmf_subtypes_array = gdal.Open(tmf_subtypes_path).ReadAsArray()
      # Convert all water values to 'nodata' and non-water values to '1'
      comrec_mask_array = np.where((tmf_subtypes_array >= 70) & (tmf_subtypes_array <= 79), nodatavalue, 1)
      export_array_as_tif(comrec_mask_array, comrec_mask_path)
      # Mask pixels outside the project area
      if use_gedi_area: burn_polygon_to_raster(comrec_mask_path, inverse_gedi_area_path, fixed_value=nodatavalue, all_touched=False)
      else: burn_polygon_to_raster(comrec_mask_path, inverse_project_area_path, fixed_value=nodatavalue, all_touched=False)
  else: print(f"The resampled TMF Transition Map Subtypes raster is not in the indicated directory: {tmf_subtypes_filename}")

# Predictor verification

In [None]:
# Check that all predictors in all scenario csvs exist
scenario_csv_list = []
all_predictors_exist = True # Changes to false if predictor missing
for csv in os.listdir(scenarios_model_dir):
  if csv.endswith('.csv'):
    csv_dir = join(scenarios_model_dir, csv)
    csv_predictor_list = pd.Series.tolist(pd.read_csv(csv_dir).iloc[:,0])
    csv_predictor_dir_list = []
    for csv_predictor in csv_predictor_list:
      csv_predictor_dir_list.append(f"{predictors_dir}/{csv_predictor}.tif")
    for predictor in csv_predictor_dir_list:
      if not exists(predictor):
        all_predictors_exist = False
        print(f"The following predictor is missing:\n{predictor}\n and is required for the scenario '{csv[:-4]}'")

if all_predictors_exist: print("All required predictors are present.")

In [None]:
# Is the scenario area equal to the original template area?
original_template_area = True

# If not, create a new template for the scenario area and upload to:
# '6_scenarios/[model]/[scenario_area]/template.tif'
if original_template_area: scenario_template_dir = join(areas_dir, "template.tif")
else: scenario_template_dir = join(scenario_area_dir, "template.tif")
print(f"The following is being used as a template to verify scenario predictor dimensions and projections:\n{scenario_template_dir}")


scenario_template = gdal.Open(scenario_template_dir)
scenario_template_dimensions, scenario_template_projection = scenario_template.GetGeoTransform(), scenario_template.GetProjection()

In [None]:
predictor_issue = False
for predictor in os.listdir(predictors_dir):
  if predictor.endswith('.tif'):
    predictor_dir = join(predictors_dir, predictor)
    predictor_open = gdal.Open(predictor_dir)
    predictor_dimensions, predictor_projection = predictor_open.GetGeoTransform(), predictor_open.GetProjection()
    if predictor_dimensions != scenario_template_dimensions:
      print(f"{predictor} dimensions:\n{predictor_dimensions}\ndo not match the scenario template dimensions:\n{scenario_template_dimensions}\n")
      predictor_issue = True
    if predictor_projection != scenario_template_projection:
      print(f"{predictor} projection:\n{predictor_projection}\ndoes not match the scenario template projection:\n{scenario_template_projection}\n\n")
      predictor_issue = True

if not predictor_issue: print(f"All predictors in the following directory have the correct dimensions and projection:\n{predictors_dir}")
else: print("Correct and / or resample the predictor(s) in GIS software.")

# Template tiles

In [None]:
# Load the model scenario predictors for tile template creation
model_scenario_predictors = pd.Series.tolist(pd.read_csv(model_scenario_dir).iloc[:,0])
model_scenario_predictors_dirs = [predictors_dir + '/' + predictor + '.tif' for predictor in model_predictors]
# Create a template predictor array from the first predictor
template_base = gdal.Open(model_scenario_predictors_dirs[0])
template_base_array = template_base.ReadAsArray()
template_base_xsize, template_base_ysize = template_base.GetRasterBand(1).XSize, template_base.GetRasterBand(1).YSize
print(f"The template predictor is {template_base_xsize} x {template_base_ysize} pixels.")

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles_exist = len(template_tile_list)

if n_tiles_exist < 1: print("There are currently no template tiles. Run the next section.")
if n_tiles_exist >= 1:
  tile_size_y_rounded_exist = gdal.Open(join(tile_templates_dir,'template_tile_1.tif')).GetRasterBand(1).YSize
  tile_size_y_remainder_exist = gdal.Open(join(tile_templates_dir,f'template_tile_{n_tiles_exist}.tif')).GetRasterBand(1).YSize
  if n_tiles_exist == 1: print(f"There is a single 'tile' with a height of {tile_size_y_rounded_exist}."); tile_size_y_remainder_exist = 0
  else: print(f"There are {n_tiles_exist} template tiles, the first {n_tiles_exist-1} having a height of {tile_size_y_rounded_exist} pixels, the last {tile_size_y_remainder_exist} pixels.")

In [None]:
# Large scenario areas and / or numbers of predictors may be too much for the available memory.
# This section defines how to split predictions into tiles that can then be merged.
override_n_tiles = True # Useful if the tile number has already been tested.
n_tiles_override = 1
tile_size_y_rounded_override = int(7092/n_tiles_override)

memory_utilisation = 0.5 # Ideally set to 0.5 to avoid crashes
number_of_processes = 1

# Google Colab TPU is (at present) best cost / price ratio, and faster than the standard GPU.
# Premium GPU is about 2.5x faster than TPU, but disproportionately expensive.
gpu_premium = True

assert memory_utilisation > 0 and memory_utilisation <= 1, "Set memory_utilisation to a value between 0 and 1"
print(f'There are {number_of_processes} CPUs available for parallel processing.')

# Check for GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0': print('GPU device not found')
else: print(f"Found GPU at: {device_name}")
memory_utilisation = memory_utilisation / 4
if gpu_premium:
  memory_utilisation = memory_utilisation * 8 # More memory available if premium
  print("Using premium GPU settings. Make sure runtime is actually set to GPU Premium, otherwise set gpu_premium to False.")

# Calculate total size of predictor stack
predictor_stack_size = template_base_array.size * len(model_scenario_predictors_dirs)

# Calculate memory and the number of tiles required
total_memory_needed = 64 / 8 * predictor_stack_size
print(f'RAM required for each prediction: ~{total_memory_needed/(1024**3):.3f} GB')
print(f'RAM currently available: {psutil.virtual_memory().free / (1024**3):.3f} GB')
n_tiles_temp = int(np.ceil(total_memory_needed / (psutil.virtual_memory().free * memory_utilisation / number_of_processes)))

# Calculate template tile size (split on the y axis only)
tile_size_y_rounded = int(np.ceil(template_base_ysize/n_tiles_temp)) # Round the number of y pixels in each tile
tile_size_y_remainder = template_base_ysize%tile_size_y_rounded # Calculate the remainder for the last tile
n_tiles = len(range(0, template_base_ysize, tile_size_y_rounded)) # Update the number of tiles to include the remainder

if override_n_tiles:
  tile_size_y_rounded = tile_size_y_rounded_override
  tile_size_y_remainder = template_base_ysize%tile_size_y_rounded
  n_tiles = n_tiles_override
  print("n_tiles has been overridden.")

# Check if tiles need to be changed
if n_tiles == n_tiles_exist and tile_size_y_rounded == tile_size_y_rounded_exist and tile_size_y_remainder == tile_size_y_remainder_exist:
  print("No changes to tiles are required.")
else:
  # Clear all tile directories
  for tile in Path(tile_templates_dir).glob("**/*"):
    if tile.is_file(): tile.unlink()
  for tile in Path(tile_predictors_dir).glob("**/*"):
    if tile.is_file(): tile.unlink()
  for scenario_stack_dir in Path(tile_predictor_stacks_dir).glob("**/*"):
    shutil.rmtree(scenario_stack_dir)
  for tile in Path(tile_prediction_cache_dir).glob("**/*"):
    if tile.is_file(): tile.unlink()

  print(f'The prediction template will be divided into {n_tiles} tiles to avoid crashing.')

  # Generate new tile templates based on available memory
  tile_number = 1
  for y_start in range(0, template_base_ysize, tile_size_y_rounded):
    if tile_size_y_remainder != 0 and tile_number == n_tiles: tile_size_y = tile_size_y_remainder
    else: tile_size_y = tile_size_y_rounded
    tiling_string = "gdal_translate -of GTIFF -srcwin " + str(0)+ ", " + str(y_start) + ", " + str(template_base_xsize) + ", " + str(tile_size_y) + " " + str(model_scenario_predictors_dirs[0]) + " " + str(tile_templates_dir) + "/template_tile_" + str(tile_number) + ".tif"
    os.system(tiling_string)
    tile_number += 1

  print("Template tile creation complete.")

# Predictor tiles

In [None]:
# Create predictor tiles.

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

assert n_tiles > 0, "There are no template tiles. Run the template tiles section, even if only one is created."
tile_size_y_rounded = gdal.Open(join(tile_templates_dir,'template_tile_1.tif')).GetRasterBand(1).YSize
print(f"There are {n_tiles} template tiles.")

if n_tiles == 1: print("Predictor tile creation skipped. Predictor stack creation will use the original predictors.")
else:
  # Progress
  n_predictors = len(os.listdir(predictors_dir))
  predictor_progress_index, predictor_progress_label = 0, widgets.Label(value=f"Predictor progress: 0 / {n_predictors}")
  display(predictor_progress_label)
  tile_progress_index, tile_progress_label = 0, widgets.Label(value=f"Tile progress: 0 / {n_tiles}")
  display(tile_progress_label)

  # Loop through each predictor in the 6_scenarios predictors directory
  for predictor in os.listdir(predictors_dir):
    # Create list of tile directories
    predictor_dir = join(predictors_dir, predictor)
    predictor_array = gdal.Open(predictor_dir).ReadAsArray()
    # Split the predictor array into chunks based on tile size
    predictor_chunks = np.array_split(predictor_array, np.arange(tile_size_y_rounded, len(predictor_array), tile_size_y_rounded))
    tile_count = 1
    # Loop through tiles and export as .tif
    for tile in range(n_tiles):
      predictor_tile_filename = f"{predictor[:-4]}_{tile_count}.tif"
      # Check if tile already exists
      predictor_tile_exists = False
      for predictor_tile in os.listdir(tile_predictors_dir):
        if predictor_tile == predictor_tile_filename: predictor_tile_exists=True
        # If predictor tile does not exist:
      if predictor_tile_exists == False:
        template_tile_dir = join(tile_templates_dir, f"template_tile_{tile_count}.tif")
        export_array_as_tif(predictor_chunks[tile_count-1], join(tile_predictors_dir,predictor_tile_filename), template_tile_dir, compress = False)
        tile_count += 1
      # Update tile progress
      tile_progress_index += 1
      tile_progress_label.value = f"Tile progress: {tile_progress_index} / {n_tiles}"
    tile_progress_index = 0
    # Update predictor progress
    predictor_progress_index += 1
    predictor_progress_label.value = f"Predictor progress: {predictor_progress_index} / {n_predictors}"

# Predictor stacks

In [None]:
# Create predictor stack arrays for each scenario

# Collect scenarios with .csv predictor lists
scenarios_list = []
for csv in os.listdir(scenarios_model_dir):
  if csv.endswith('.csv'):
    scenarios_list.append(csv[:-4])

# Select scenarios to generate tiled predictor stacks
print("scenarios_to_stack = [")
for scenario in sorted(scenarios_list):
  print(f'  "{scenario}",')
print("]")

In [None]:
scenarios_to_stack = [
  "2008",
  "2009",
  "2010",
  "2011",
  "2012",
  "2013",
  "2014",
  "2015",
  "2016",
  "2017",
  "2018",
  "2019",
  "2020",
  "2021",
  "2022",
  "2027_nodef",
  "2027_nodist",
  "2032_nodef",
  "2032_nodist",
  "2037_nodef",
  "2037_nodist",
  "9999_comrec",
  "9999_comrest",
]

assert set(scenarios_to_stack).issubset(scenarios_list), "Not all selected scenarios exist."

use_tmf_data = False

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

assert n_tiles > 0, "There are no template tiles. Run the template tiles section, even if only one is created."
print(f"There are {n_tiles} template tiles.")

# Progress
scenario_progress_index, scenario_progress_label = 0, widgets.Label(value=f"Scenario progress: 0 / {len(scenarios_to_stack)}")
display(scenario_progress_label)
stack_progress_index, stack_progress_label = 0, widgets.Label(value=f"Tiled predictor stack progress: 0 / {n_tiles}")
display(stack_progress_label)

# Loop through each scenario
for scenario in scenarios_to_stack:
    # Define directory and predictors
    scenario_predictor_stacks_dir = join(tile_predictor_stacks_dir, scenario)
    makedirs(scenario_predictor_stacks_dir, exist_ok=True)
    scenario_predictors_csv = join(scenarios_model_dir,f"{scenario}.csv")
    scenario_predictors = pd.Series.tolist(pd.read_csv(scenario_predictors_csv).iloc[:,0])
    # Set the number of stacks to the number of tiles
    if n_tiles == 0: n_stacks = 1
    else: n_stacks = n_tiles
    # Create a tile count to match the predictor stack
    tile_count = 1
    for tile in range(n_stacks):
      scenario_stack_filename = f"predictor_stack_{scenario}_{tile_count}.npy"
      # Check if predictor stack already exists
      predictor_stack_exists = False
      for predictor_stack in os.listdir(scenario_predictor_stacks_dir):
        if predictor_stack == scenario_stack_filename: predictor_stack_exists=True
      # If scenario prediction tile does not exist:
      if predictor_stack_exists == False:
        scenario_tile_stack_dir = join(scenario_predictor_stacks_dir, scenario_stack_filename)
        # Create predictor chunks (arrays) from tiles
        if n_stacks == 1: predictor_tiles_dirs = [f"{predictors_dir}/{predictor}.tif" for predictor in scenario_predictors]
        else: predictor_tiles_dirs = [f"{tile_predictors_dir}/{predictor}_{tile_count}.tif" for predictor in scenario_predictors]
        predictor_array_chunks = []
        for predictor in predictor_tiles_dirs:
          predictor_array_chunk = gdal.Open(predictor).ReadAsArray()
          # Add current year's disturbance / edge effects (which was excluded from training) to the previous yearly predictor as a proxy
          if use_tmf_data:
            if 'disturbance' in predictor or 'forest' in predictor:
              predictor_year = int(re.search(r'(\d{4})', predictor.split('/')[-1]).group())
              current_year = int(scenario[:4])
              previous_scenario_year = current_year - 1
              if predictor_year == previous_scenario_year and predictor_year != 9999:
                if n_stacks == 1: current_predictor_array_chunk = gdal.Open(f"{predictor[:-8]}{str(current_year)}.tif").ReadAsArray()
                else: current_predictor_array_chunk = gdal.Open(f"{predictor[:-10]}{str(current_year)}_{tile_count}.tif").ReadAsArray()
                # Higher value denotes disturbance, lower value denotes forest edge effects
                if 'disturbance' in predictor: predictor_array_chunk = np.maximum(predictor_array_chunk, current_predictor_array_chunk)
                if 'forest' in predictor: predictor_array_chunk = np.minimum(predictor_array_chunk, current_predictor_array_chunk)
          predictor_array_chunks.append(predictor_array_chunk)
        # Create a predictor stack from chunks
        predictor_stack = np.dstack(predictor_array_chunks)
        predictor_array_chunks = None # Flush chunks
        stack_height, stack_width, stack_n_predictors = predictor_stack.shape
        # Convert predictor stack to 2D numpy array with predictors as columns
        predictor_stack_reshaped = predictor_stack.reshape(stack_height * stack_width, stack_n_predictors)
        predictor_stack = None # Flush stack
        # Save as a numpy file
        np.save(scenario_tile_stack_dir, predictor_stack_reshaped)
        predictor_stack_reshaped = None # Flush reshaped stack
      # Update progress
      tile_count += 1
      stack_progress_index += 1
      stack_progress_label.value = f"Tiled predictor stack progress: {stack_progress_index} / {n_stacks}"
    # Reset tile progress
    stack_progress_index = 0
    scenario_progress_index += 1
    scenario_progress_label.value = f"Scenario progress: {scenario_progress_index} / {len(scenarios_to_stack)}"
print("\nPredictor stacks complete.")

# Predict scenarios

In [None]:
# Collect available scenarios from the predictor stack tiles directory
scenario_stacks_list = []
for scenario in os.listdir(tile_predictor_stacks_dir):
    scenario_stacks_list.append(scenario)

# Select scenarios to predict
print("scenarios_to_predict = [")
for scenario in sorted(scenario_stacks_list):
  print(f'  "{scenario}",')
print("]")

In [None]:
scenarios_to_predict = [
  "2008",
  "2009",
  "2010",
  "2011",
  "2012",
  "2013",
  "2014",
  "2015",
  "2016",
  "2017",
  "2018",
  "2019",
  "2020",
  "2021",
  "2022",
  "2027_nodef",
  "2027_nodist",
  "2032_nodef",
  "2032_nodist",
  "2037_nodef",
  "2037_nodist",
  "9999_comrec",
  "9999_comrest",
]

assert set(scenarios_to_predict).issubset(scenario_stacks_list), "Not all selected scenarios exist."

In [None]:
# Change this and the code within the block accordingly.
add_covariates = True # Adds a selected covariate value as the predictor
sensitivity_value = 0.99
beam_value = 5
# 5 is the first of the full beams, with the least bias on AGBD.
# Cover beams 1 - 4 underestimate. Full beams 7 - 8 overestimate. 5 - 6 average.

# Check for GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0': print('GPU device not found')
else: print(f"Found GPU at: {device_name}")

# Load model
if categorise_variate: XGBPredictor = xgb.XGBClassifier(**final_hyperparameters)
else: XGBPredictor = xgb.XGBRegressor(**final_hyperparameters)
XGBPredictor.load_model(fname=selected_model_json)

# Check existing tile parameters
template_tile_list = []
for file in os.listdir(tile_templates_dir):
  if file.endswith('.tif') and file[:13] == 'template_tile':
    template_tile_list.append(file)
n_tiles = len(template_tile_list)

assert n_tiles > 0, "There are no template tiles. Run the template tiles section, even if only one is created."
template_tile_x = gdal.Open(join(tile_templates_dir,'template_tile_1.tif')).GetRasterBand(1).XSize
print(f"There are {n_tiles} template tiles.")

# Scenario progress
scenario_progress_index = 0
scenario_progress_label = widgets.Label(f"Scenario progress: {scenario_progress_index}/{len(scenarios_to_predict)}")
display(scenario_progress_label)

# Tile progress
tile_progress_index, tile_progress_label = 0, widgets.Label(value=f"Tile progress: 0 / {n_tiles}")
display(tile_progress_label)

# Loop through each scenario
for scenario in scenarios_to_predict:
  # Define scenario filename and check if exists
  scenario_prediction_unmasked_filename = f"{scenario}__{selected_scenario_area}_{selected_model}_unmasked.tif"
  scenario_prediction_unmasked_exists=False
  for scenario_prediction in os.listdir(scenario_predictions_unmasked_dir):
    if scenario_prediction == scenario_prediction_unmasked_filename:
      scenario_prediction_unmasked_exists=True
  # If scenario prediction does not exist:
  if scenario_prediction_unmasked_exists == False:
    # Get number of stacks
    scenario_predictor_stack_dir = join(tile_predictor_stacks_dir, scenario)
    n_stacks = len(os.listdir(scenario_predictor_stack_dir))
    # Create a tile cache directory for the prediction
    tile_cache_scenario_dir = join(tile_prediction_cache_dir, scenario_prediction_unmasked_filename[:-4])
    makedirs(tile_cache_scenario_dir, exist_ok=True)
    # Create a tile count to match the predictor stack chunk
    for stack in range(1, n_stacks+1):
      scenario_tile_filename = f"scenario_tile_{stack}.tif"
      # Check if tile already exists
      scenario_tile_exists = False
      for scenario_tile in os.listdir(tile_cache_scenario_dir):
        if scenario_tile == scenario_tile_filename: scenario_tile_exists=True
      # If scenario prediction tile does not exist:
      if scenario_tile_exists == False:
        # Load template tile parameters
        template_tile_dir = join(tile_templates_dir, f"template_tile_{stack}.tif")
        template_tile_y = gdal.Open(template_tile_dir).GetRasterBand(1).YSize
        template_tile_x = gdal.Open(template_tile_dir).GetRasterBand(1).XSize
        # Load predictor tile stack
        stack_filename = f"predictor_stack_{scenario}_{stack}.npy"
        predictor_stack = np.load(join(scenario_predictor_stack_dir, stack_filename))
        # Add covariates (sensitivity and BEAM)
        if add_covariates: predictor_stack = np.hstack((predictor_stack,
                           np.full((predictor_stack.shape[0], 1), beam_value, dtype=int),
                           np.full((predictor_stack.shape[0], 1), sensitivity_value, dtype=float)
                           ))
        # Define prediction array and reshape
        prediction = XGBPredictor.predict(predictor_stack)
        predictor_stack = None # Flush predictor stack
        prediction_tile = prediction.reshape((template_tile_y, template_tile_x))
        prediction = None # Flush prediction
        # Export prediction array as .tif
        export_array_as_tif(prediction_tile, join(tile_cache_scenario_dir, scenario_tile_filename), template = template_tile_dir, compress = False)
        prediction_tile = None # Flush prediction tile
        # Update progress
      tile_progress_index += 1
      tile_progress_label.value = f"Tile progress: {tile_progress_index} / {n_stacks}"
    # Prepare empty array for merging tiles
    prediction_array = np.empty((0,template_tile_x))
    # Read each tile .tif as an array, stack, then export as a .tif
    for subdir in os.listdir(tile_cache_scenario_dir):
      if subdir.endswith('.tif'):
        tile_dir = join(tile_cache_scenario_dir, subdir)
        prediction_array = np.vstack((prediction_array, gdal.Open(tile_dir).ReadAsArray()))
    # Define scenario template
    scenario_template = join(predictors_dir, os.listdir(predictors_dir)[0])
    scenario_prediction_unmasked_dir = join(scenario_predictions_unmasked_dir, f"{scenario_prediction_unmasked_filename}")
    export_array_as_tif(prediction_array, scenario_prediction_unmasked_dir, template = scenario_template, compress = True)
    # Delete scenario tile cache directory
    shutil.rmtree(tile_cache_scenario_dir)
  # Reset tile progress
  tile_progress_index = 0
  # Update scenario progress
  scenario_progress_index += 1
  scenario_progress_label.value = f"Scenario progress: {scenario_progress_index}/{len(scenarios_to_predict)}"
print("\nScenario predictions complete.")

# Mask scenario predictions



In [None]:
last_historic_predictor_year = '2022'
first_historic_predictor_year = '1990'

# Mask scenarios with the relevatant mask in 1_areas/masks.
for scenario_prediction in os.listdir(scenario_predictions_unmasked_dir): # Loop through each unmasked prediction
  scenario_masked_filename = f"{scenario_prediction[:-13]}.tif"
  scenario_masked_dir = join(scenario_predictions_dir, scenario_masked_filename)
  if not exists(scenario_masked_dir):
    # Match the year of the scenario (first four characters) to a mask which includes the year in the filename.
    mask_exists = False
    for mask in os.listdir(masks_dir):
      # Match historic scenarios and 9999_comrec (complete recovery) with the respective mask
      if scenario_prediction[:4] in mask and 'comrest' not in scenario_prediction and 'nodef_historic' not in scenario_prediction:
        selected_mask_filename = mask
        selected_mask_dir = join(masks_dir, selected_mask_filename)
        mask_exists = True
      # Match 9999_comrest with last historic predictor year mask (e.g. forest 2022)
      if 'comrest' in scenario_prediction and last_historic_predictor_year in mask:
        selected_mask_filename = mask
        selected_mask_dir = join(masks_dir, selected_mask_filename)
        mask_exists = True
      # Match 2022_nodef_historic with first historic predictor year mask (e.g. forest 1990)
      if 'nodef_historic' in scenario_prediction and first_historic_predictor_year in mask:
        selected_mask_filename = mask
        selected_mask_dir = join(masks_dir, selected_mask_filename)
        mask_exists = True
      # Match other alternate scenarios with the last historic predictor year mask (e.g. forest 2022)
      if int(scenario_prediction[:4]) > int(last_historic_predictor_year) and int(scenario_prediction[:4]) < 9999 and last_historic_predictor_year in mask:
        selected_mask_filename = mask
        selected_mask_dir = join(masks_dir, selected_mask_filename)
        mask_exists = True
    if mask_exists == False: print(f"A suitable mask for {scenario_prediction} does not exist.\n")
    else: # Mask the scenario prediction
      print(f"Masking {scenario_prediction} with {selected_mask_filename}...")
      mask_array = gdal.Open(selected_mask_dir).ReadAsArray()
      scenario_prediction_unmasked_dir = join(scenario_predictions_unmasked_dir, scenario_prediction)
      scenario_prediction_array = gdal.Open(scenario_prediction_unmasked_dir).ReadAsArray()
      # Mask where the mask array is not 1
      scenario_masked_array = np.where(mask_array != 1, nodatavalue, scenario_prediction_array)
      scenario_masked_filename = f"{scenario_prediction[:-13]}.tif"
      scenario_masked_dir = join(scenario_predictions_dir, scenario_masked_filename)
      export_array_as_tif(scenario_masked_array, scenario_masked_dir, template = selected_mask_dir, compress = True)
      print(f"{scenario_masked_filename} exported.")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()