<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/dev/9_stats_ready_rasters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi_asartr"
# base_dir = '/content/drive/MyDrive/masfi_asartr'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Imports and upgrades
!pip install geopandas
!pip install kaleido
!pip install rasterio

In [None]:
# Reload imports, replacing those in the cache
%load_ext autoreload
%autoreload 2
# Imports
import geopandas as gpd
from google.colab import runtime
import math
import numpy as np
from os.path import exists, join
from os import makedirs
from osgeo import gdal
import pandas as pd
import plotly.graph_objects as go
import rasterio
from rasterio import mask as msk
from ipywidgets import widgets
from IPython.display import display

In [None]:
areas_dir = join(base_dir, "1_areas")
scenarios_dir = join(base_dir, "6_scenarios")
predictions_dir = join(base_dir, "7_predictions")
stats_ready_dir = join(base_dir, "9_stats_ready_rasters")

# Create directories
makedirs(stats_ready_dir, exist_ok=True)

In [None]:
areas_dir = join(base_dir, "1_areas")
cell_area_path = join(areas_dir, "cell_area.tif")

# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress):
  template = gdal.Open(template)
  template_band = template.GetRasterBand(1)
  template_dimensions, template_projection = template.GetGeoTransform(), template.GetProjection()
  if compress: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32,
                                                options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
  if compress == False: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32)
  driver.GetRasterBand(1).WriteArray(input_array)
  driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
  driver.SetGeoTransform(template_dimensions)
  driver.SetProjection(template_projection)

# Process stats-ready rasters

In [None]:
# Select if to source predictions from scenarios_dir or predictions_dir
source_dir = predictions_dir
# source_dir = scenarios_dir

source_dir_name = f"{source_dir.split('_')[-1]}_dir"

# Select the model
for subdir in os.listdir(source_dir):
  if 'scenario_masks' not in subdir:
    print(f"selected_model = '{subdir}'")

In [None]:
selected_model = 'agbd_historic_250429_223033'

selected_model_dir = join(source_dir, selected_model)
# Select the prediction area
for subdir in os.listdir(selected_model_dir):
  if source_dir == scenarios_dir and not subdir.endswith('.csv') and not subdir.endswith('.json'):
    print(f"prediction_area = '{subdir}'")
  if source_dir == predictions_dir and subdir != 'model_iterations':
    print(f"prediction_area = '{subdir[10:]}'")

In [None]:
prediction_area = 'asartr'

# Model-area stats directory
model_area_sr_dir = join(stats_ready_dir, f"{selected_model}_{source_dir_name}_{prediction_area}")
makedirs(model_area_sr_dir, exist_ok=True)

stats_ready_scenario_dir = join(model_area_sr_dir, 'scenarios')
stats_ready_dist_dir = join(model_area_sr_dir, 'disturbance')
makedirs(stats_ready_scenario_dir, exist_ok=True)
makedirs(stats_ready_dist_dir, exist_ok=True)

if source_dir == scenarios_dir: scenario_raster_dir = join(selected_model_dir, prediction_area, 'scenario_predictions')
if source_dir == predictions_dir: scenario_raster_dir = join(selected_model_dir, f'scenarios_{prediction_area}', 'statistics_masked')
if source_dir == scenarios_dir: dist_raster_dir = join(selected_model_dir, prediction_area, 'scenario_disturbance')
if source_dir == predictions_dir: dist_raster_dir = join(selected_model_dir, f'scenarios_{prediction_area}', 'scenario_disturbance')

In [None]:
# List all raster files in source directories
scenario_mean_rasters = []
scenario_uncertainty_rasters = []
dist_mean_rasters = []
dist_uncertainty_rasters = []

# Collect scenario rasters
if os.path.exists(scenario_raster_dir):
    for f in os.listdir(scenario_raster_dir):
        if f.endswith('.tif'):
            full_path = join(scenario_raster_dir, f)
            if source_dir == predictions_dir:
                if 'mean__' in f:
                    scenario_mean_rasters.append(full_path)
                elif 'uncertainty__' in f:
                    scenario_uncertainty_rasters.append(full_path)
            else:  # scenarios_dir doesn't contain uncertainty rasters
                scenario_mean_rasters.append(full_path)

# Collect disturbance rasters
if os.path.exists(dist_raster_dir):
    for f in os.listdir(dist_raster_dir):
        if f.endswith('.tif'):
            full_path = join(dist_raster_dir, f)
            if source_dir == predictions_dir:
                if 'mean__' in f:
                    dist_mean_rasters.append(full_path)
                elif 'uncertainty__' in f:
                    dist_uncertainty_rasters.append(full_path)
            else:  # scenarios_dir doesn't contain uncertainty rasters
                dist_mean_rasters.append(full_path)

# Sort rasters chronologically
scenario_mean_rasters = sorted(scenario_mean_rasters)
scenario_uncertainty_rasters = sorted(scenario_uncertainty_rasters)
dist_mean_rasters = sorted(dist_mean_rasters)
dist_uncertainty_rasters = sorted(dist_uncertainty_rasters)

# Create lookup dictionaries for faster uncertainty matching
scenario_uncertainty_lookup = {}
for uncertainty_raster in scenario_uncertainty_rasters:
    base_name = os.path.basename(uncertainty_raster).replace('uncertainty__', 'mean__')
    scenario_uncertainty_lookup[base_name] = uncertainty_raster

dist_uncertainty_lookup = {}
for uncertainty_raster in dist_uncertainty_rasters:
    base_name = os.path.basename(uncertainty_raster).replace('uncertainty__', 'mean__')
    dist_uncertainty_lookup[base_name] = uncertainty_raster

# Load cell area raster once for efficiency
cell_area_ds = gdal.Open(cell_area_path)
cell_area_array = cell_area_ds.ReadAsArray()
# Convert cell area from m² to ha
cell_area_ha = np.divide(cell_area_array, 10000, dtype='float64')

# Function to process a batch of rasters
def process_rasters(raster_paths, uncertainty_lookup, output_dir, is_disturbance=False):
    progress_index = 0
    progress_total = len(raster_paths)
    raster_type = "Disturbance" if is_disturbance else "Scenario"
    progress_label = widgets.Label(f"{raster_type} rasters progress: {progress_index}/{progress_total}")
    display(progress_label)

    print(f"Processing {progress_total} {raster_type.lower()} rasters...")

    for raster_path in raster_paths:
        base_filename = os.path.basename(raster_path)

        # Create output filenames based on source directory
        if source_dir == predictions_dir:
            name = base_filename.split('__')[1].split('.')[0]
            output_agb_mg = join(output_dir, f"agb_total_mg__{name}.tif")
            output_agb_ci95_mg = join(output_dir, f"agb_total_ci95_mg__{name}.tif")
        else:
            name = base_filename.split('__')[0].split('.')[0]
            output_agb_mg = join(output_dir, f"{name}__agb_total_mg.tif")
            output_agb_ci95_mg = join(output_dir, f"{name}__agb_total_ci95_mg.tif")

        # Check if output files already exist
        agb_exists = os.path.exists(output_agb_mg)

        # Variables to store the raster data and mask
        agbd_array = None
        nodata = None
        valid_mask = None

        # Create total AGB raster if it doesn't exist
        if not agb_exists:
            # Read the mean raster
            ds = gdal.Open(raster_path)
            agbd_array = ds.ReadAsArray()
            # Get nodata value
            nodata = int(ds.GetRasterBand(1).GetNoDataValue())

            # Create mask for valid data
            valid_mask = (agbd_array != nodata)

            # Calculate total AGB (Mg) = AGBD (Mg/ha) × pixel area (ha)
            total_agb_mg = np.zeros_like(agbd_array, dtype='float32')
            total_agb_mg[valid_mask] = agbd_array[valid_mask] * cell_area_ha[valid_mask]
            # Explicitly set nodata areas
            total_agb_mg[~valid_mask] = nodata

            # Export total AGB raster
            export_array_as_tif(total_agb_mg, output_agb_mg, template=raster_path)

        # Process uncertainty if available
        if source_dir == predictions_dir:
            base_name = os.path.basename(raster_path)
            if base_name in uncertainty_lookup:
                # Check if CI95 raster already exists
                agb_ci95_exists = os.path.exists(output_agb_ci95_mg)
                if not agb_ci95_exists:
                    uncertainty_path = uncertainty_lookup[base_name]

                    # Read uncertainty raster
                    unc_ds = gdal.Open(uncertainty_path)
                    uncertainty_array = unc_ds.ReadAsArray()

                    # Read original raster if we need to
                    if agbd_array is None or valid_mask is None:
                        ds = gdal.Open(raster_path)
                        agbd_array = ds.ReadAsArray()
                        nodata = int(ds.GetRasterBand(1).GetNoDataValue())
                        valid_mask = (agbd_array != nodata)

                    # Calculate uncertainty as proportion for CI95 calculation
                    # Uncertainty is stored as percentage (0-100), divide by 100 to get proportion (0-1)
                    uncertainty_proportion = np.divide(uncertainty_array, 100, dtype='float64')

                    # Calculate total AGB CI95 (Mg) = AGBD (Mg/ha) × uncertainty proportion × area (ha)
                    total_agb_ci95_mg = np.zeros_like(agbd_array, dtype='float32')
                    total_agb_ci95_mg[valid_mask] = agbd_array[valid_mask] * uncertainty_proportion[valid_mask] * cell_area_ha[valid_mask]
                    # Explicitly set nodata areas
                    total_agb_ci95_mg[~valid_mask] = nodata

                    # Export total AGB CI95 raster
                    export_array_as_tif(total_agb_ci95_mg, output_agb_ci95_mg, template=raster_path)

        # Update progress
        progress_index += 1
        progress_label.value = f"{raster_type} rasters progress: {progress_index}/{progress_total}"

    return progress_total

# Process scenario and disturbance rasters
scenario_count = process_rasters(scenario_mean_rasters, scenario_uncertainty_lookup, stats_ready_scenario_dir)
dist_count = process_rasters(dist_mean_rasters, dist_uncertainty_lookup, stats_ready_dist_dir, is_disturbance=True)

print(f"Total biomass raster generation complete.")
print(f"Processed {scenario_count} scenario rasters and {dist_count} disturbance rasters")
print(f"Results saved to:\n- {stats_ready_scenario_dir}\n- {stats_ready_dist_dir}")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()