<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/1_areas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas

In [None]:
# Imports
import geopandas as gpd
import getpass
from google.colab import runtime
import ipywidgets as widgets
import math
import numpy as np
from os import makedirs, remove
from os.path import exists, join
from osgeo import gdal, ogr
gdal.UseExceptions()
import pandas as pd
import requests
from shapely.geometry import box
from shutil import copy
import zipfile
import warnings

In [None]:
# Define directories.
areas_dir = join(base_dir, "1_areas")
features_dir = join(base_dir, "3_features")
polygons_dir = join(areas_dir, "polygons")
dem_dir = join(areas_dir, "dem")
dem_tiles_dir = join(dem_dir, "tiles")

# Create directories if they do not exist.
makedirs(areas_dir, exist_ok=True)
makedirs(polygons_dir, exist_ok=True)
makedirs(dem_dir, exist_ok=True)
makedirs(dem_tiles_dir, exist_ok=True)

In [None]:
# Global function: read raster as array
def read_raster_as_array(path):
    ds = gdal.Open(path)
    arr = ds.ReadAsArray()
    ds = None
    return arr

# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -11111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress, dtype=gdal.GDT_Float32):
    template_ds = gdal.Open(template)
    template_band = template_ds.GetRasterBand(1)
    template_dimensions, template_projection = template_ds.GetGeoTransform(), template_ds.GetProjection()
    if compress: options = ['COMPRESS=ZSTD', 'ZSTD_LEVEL=1'] # Good speed / size ratio
    else: options = []
    if input_array.dtype == 'int16': dtype = gdal.GDT_Int16
    driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, 1, dtype, options=options)
    driver.GetRasterBand(1).WriteArray(input_array)
    driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
    driver.SetGeoTransform(template_dimensions)
    driver.SetProjection(template_projection)
    template_ds = driver = None

# Project area

In [None]:
# Upload 'project_area.gpkg' polygon to the 1_areas/polygons directory.
# This can be a polygon of any shape. A bounding box will be used to create the
# GEDI download area in 1_variates.ipynb. # A buffered bounding box will be used
# for the raster template, to ensure all feature edge effects are included.

#Project CRS EPSG
crs_epsg = 4326

# Buffer to ~300m to account for forest / disturbance edge effects (~120m),
# the prediction area buffer (~30m) and geolocation / clipping imprecision (x2).
buffer_distance_metres = 300

project_area_path = join(polygons_dir, 'project_area.gpkg')

if exists(project_area_path):
  print("Project polygon found:\n")
  # Read project polygon
  project_area_read = gpd.read_file(join(polygons_dir, 'project_area.gpkg'))
  display(project_area_read["geometry"].iloc[0])
  if project_area_read.crs.to_epsg() == crs_epsg:
    project_area_path = join(polygons_dir, "project_area.gpkg")
    project_area_buffered_bbox_path = join(polygons_dir, 'project_area_buffered_bbox.gpkg')
    # Calculate the bounding box of the project polygon
    if not exists (project_area_buffered_bbox_path):
      # Suppress warning about not being a geographic CRS, as we account for this.
      # However larger buffers or project areas near the poles might still need to be converted.
      warnings.filterwarnings("ignore", category=UserWarning)
      # Get the centroid of the project polygon
      project_polygon_centroid = project_area_read.centroid.values[0]
      # Convert the buffer distance from meters to decimal degrees based on the location at the centroid
      buffer_distance_degrees = buffer_distance_metres / (111320 * abs(math.cos(math.radians(project_polygon_centroid.y))))
      # Buffer the polygon
      project_area_buffered = project_area_read.buffer(buffer_distance_degrees)
      # Create a bounding box polygon and save
      project_area_buffered_bbox = box(*project_area_buffered.total_bounds)
      gdf = gpd.GeoDataFrame(geometry=[project_area_buffered_bbox], crs=f"EPSG:{crs_epsg}")
      gdf.to_file(project_area_buffered_bbox_path, driver='GPKG')
      print(f"Buffered the project area to {buffer_distance_metres} and created a bounding box: {project_area_buffered_bbox_path}")
    else: print(f"Project area has already been buffered and bound to a box: {project_area_buffered_bbox_path}")
    # Read the buffered project area bounding box
    project_area_buffered_bbox_read = gpd.read_file(project_area_buffered_bbox_path)
    bbox_bounds = project_area_buffered_bbox_read.total_bounds
    project_x_min, project_x_max = bbox_bounds[0], bbox_bounds[2]
    project_y_min, project_y_max = bbox_bounds[1], bbox_bounds[3]
    print(f"\nThe buffered polygon bounding box has the coordinates:\n{project_x_min}, {project_y_min} to {project_x_max}, {project_y_max}.")
  else: print("Reproject 'project_area.gpkg' to EPSG:4326.")
else: print("Create 'project_area.gpkg' and upload to 1_areas/polygons")

# Download DEM tiles

In [None]:
# Download Copernicus 'COP-DEM_GLO-30-DGED' DEM tiles for the project area.
# https://dataspace.copernicus.eu/explore-data/data-collections/copernicus-contributing-missions/collections-description/COP-DEM
# First register to get credentials for 'Copernicus Contributing Missions' in the Copernicus Data Space Ecosystem:
# https://dataspace.copernicus.eu/explore-data/data-collections/copernicus-contributing-missions/ccm-how-to-register
# Make sure to check the box "I am also interested in accessing Copernicus Contributing Missions data".

# Read project area bbox and create WKT 'area of interest'
project_area_buffered_bbox_path = join(polygons_dir, 'project_area_buffered_bbox.gpkg')
project_area_buffered_bbox_read = gpd.read_file(project_area_buffered_bbox_path)
bbox_bounds = project_area_buffered_bbox_read.total_bounds
project_x_min, project_y_min, project_x_max, project_y_max = bbox_bounds
aoi_wkt = f"POLYGON(({project_x_min} {project_y_min}, {project_x_min} {project_y_max}, {project_x_max} {project_y_max}, {project_x_max} {project_y_min}, {project_x_min} {project_y_min}))"

# Prompt for credentials and obtain OAuth2 token.
email = getpass.getpass("Enter Copernicus account email: ")
password = getpass.getpass("Enter Copernicus account password: ")
token_url = "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
data = {"client_id": "cdse-public", "username": email, "password": password, "grant_type": "password"}
try:
    token_response = requests.post(token_url, data=data)
    token_response.raise_for_status()
    access_token = token_response.json()["access_token"]
    print("Authentication successful. Access token obtained.")
except Exception as e:
    print("Authentication failed:", e)
    raise

# Query catalogue API for DEM products intersecting AOI, use local CSV cache if available
data_collection = "CCM"
catalog_url = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products"
filter_query = f"Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{aoi_wkt}')"
catalog_api_url = (catalog_url
    + f"?$filter={filter_query}"
    + "&$top=1000")  # Increase limit to avoid missing tiles
headers = {"Authorization": f"Bearer {access_token}"}
catalog_csv = join(dem_dir, "dem_catalogue.csv")

if exists(catalog_csv):
    df = pd.read_csv(catalog_csv)
    print(f"Loaded {len(df)} DEM products from local CSV.")
else:
    all_products = []
    next_url = catalog_api_url
    page_count = 0  # Initialize page counter
    while next_url:
        cat_response = requests.get(next_url, headers=headers)
        cat_response.raise_for_status()
        cat_json = cat_response.json()
        all_products.extend(cat_json["value"])
        page_count += 1  # Increment page counter
        next_url = cat_json.get("@odata.nextLink")
    df = pd.DataFrame.from_dict(all_products)
    print(f"Found {len(df)} DEM products across {page_count} pages intersecting the project area.")
    df.to_csv(catalog_csv, index=False)

# Filter DEM tiles by 'Name', sort by 'ModificationDate', then drop duplicates by Footprint
df_filtered = df[df["Name"].str.startswith("DEM1_SAR_DGE_30")]
print(f"Found {len(df_filtered)} 'COP-DEM_GLO-30-DGED' DEM tiles.")
df_filtered = df_filtered.sort_values("ModificationDate").copy()
df_filtered_unique = df_filtered.drop_duplicates(subset=["GeoFootprint"], keep="last")
print(f"{len(df_filtered_unique)} unique footprints will be downloaded, prioritising the most recent.")

# Build list of product IDs for download.
dem_tiles_id_list = df_filtered_unique["Id"].tolist()
if len(dem_tiles_id_list) == 0:
    print("No DEM tiles found within project area bounds.")

# Download and extract the DEM .tif from each product with up to 3 attempts.
index = 0
progress_label = widgets.Label(value=f"DEM tile download progress: {index}/{len(dem_tiles_id_list)}")
display(progress_label)
for product_id in dem_tiles_id_list:
    # Retrieve product row, build download URL and file paths.
    row = df_filtered.loc[df_filtered["Id"] == product_id].iloc[0]
    download_url_base = "https://download.dataspace.copernicus.eu/odata/v1/Products"
    product_url = f"{download_url_base}({product_id})/$value"
    dem_tile_zip_filename = f'{row["Name"]}.zip'
    dem_tile_zip_path = join(dem_tiles_dir, dem_tile_zip_filename)
    # Retry loop: download .zip and extract 'DEM.tif' directly into dem_tiles_dir.
    extracted_tif_path = None
    attempts = 0
    while attempts < 3:
        try:
            expected_size = row.get('ContentLength', None)  # If available
            if exists(dem_tile_zip_path):
                if expected_size and os.path.getsize(dem_tile_zip_path) != expected_size:
                    remove(dem_tile_zip_path)

            if not exists(dem_tile_zip_path):
                response = requests.get(product_url, headers=headers, allow_redirects=True)
                response.raise_for_status()
                with open(dem_tile_zip_path, 'wb') as f:
                  f.write(response.content)
            with zipfile.ZipFile(dem_tile_zip_path, 'r') as z:
                tif_filename = next((f for f in z.namelist() if f.endswith("DEM.tif")), None)
                if tif_filename is None:
                    raise Exception("DEM.tif not found in zip")
                extracted_tif_name = os.path.basename(tif_filename)
                extracted_tif_path = join(dem_tiles_dir, extracted_tif_name)
                with open(extracted_tif_path, 'wb') as out_file:
                    out_file.write(z.read(tif_filename))
            break  # Exit retry loop
        except Exception as e:
            attempts += 1
            if exists(dem_tile_zip_path):
                remove(dem_tile_zip_path)
            if extracted_tif_path and exists(extracted_tif_path):
                remove(extracted_tif_path)
            if attempts < 3:
                print(f"Attempt {attempts} failed for ID: {product_id} - {e}. Retrying...")
            else:
                print(f"Failed ID: {product_id} after 3 attempts - {e}. Moving to next product.")
    index += 1
    progress_label.value = f"DEM tile download progress: {index}/{len(dem_tiles_id_list)}"

# Merge DEM tiles

In [None]:
# Merge the DEM tiles into a single raster
dem_merged_path = join(dem_dir, "dem_merged.tif")

if not exists(dem_merged_path):
  # List tiles
  tiles_to_merge = []
  for file in os.listdir(dem_tiles_dir):
    if file.endswith(".tif"):
      tiles_to_merge.append(join(dem_tiles_dir, file))
  # Create a temporary virtual file (VRT) from the tiles
  temp_vrt = join(dem_dir, 'temp.vrt')
  gdal.BuildVRT(temp_vrt, tiles_to_merge)
  # Merge the input files into a single GeoTIFF file
  merge_options = gdal.TranslateOptions(format='GTiff', outputType=gdal.GDT_Float32, noData=nodatavalue,
                                  creationOptions=['COMPRESS=ZSTD', 'ZSTD_LEVEL=1'])
  gdal.Translate(dem_merged_path, temp_vrt, options=merge_options)
  # Remove the temporary VRT file
  os.remove(temp_vrt)
  print(f"The merged DEM raster has been saved to: {dem_merged_path}")
else: print(f"A merged DEM raster already exists at: {dem_merged_path}")

# Clip the raster to project area extent
dem_merged_clipped_path = join(dem_dir, "dem_merged_clipped.tif")

if not exists(dem_merged_clipped_path):
  # Read the buffered project area bounding box
  project_area_buffered_bbox_path = join(polygons_dir, 'project_area_buffered_bbox.gpkg')
  project_area_buffered_bbox_read = gpd.read_file(project_area_buffered_bbox_path)
  bbox_bounds = project_area_buffered_bbox_read.total_bounds
  # Get coordinates
  project_x_min, project_x_max = bbox_bounds[0], bbox_bounds[2]
  project_y_min, project_y_max = bbox_bounds[1], bbox_bounds[3]
  project_coords = [project_x_min, project_y_max, project_x_max, project_y_min]
  # Define Translate options
  clip_options = gdal.TranslateOptions(projWin=[project_x_min, project_y_max, project_x_max, project_y_min],
                                  outputType=gdal.GDT_Float32, noData=nodatavalue, creationOptions=['COMPRESS=ZSTD', 'ZSTD_LEVEL=1'])
  # call gdal.Translate() with the new options argument
  gdal.Translate(dem_merged_clipped_path, dem_merged_path, options=clip_options)
  print(f"The clipped, merged DEM raster has been saved to: {dem_merged_clipped_path}")
else: print(f"A clipped merged DEM raster already exists at: {dem_merged_clipped_path}")

# Copy the clipped, merged DEM to '3_features' directory to use as the base DEM
base_dem_dsm_path = join(areas_dir, "base_dem_dsm.tif")

if not exists(base_dem_dsm_path):
  copy(dem_merged_clipped_path, base_dem_dsm_path)
  print(f"The clipped, merged DEM has been copied for use as a base DEM: {base_dem_dsm_path}")
else: print(f"A base DEM already exists at: {base_dem_dsm_path}")

# Template

In [None]:
# Create template from DEM
template_tif_path = join(areas_dir, "template.tif")
if not exists(template_tif_path):
  dem_merged_clipped_path = join(dem_dir, "dem_merged_clipped.tif")
  dem_merged_clipped_array = read_raster_as_array(dem_merged_clipped_path) # Convert DEM to array
  template_array = np.ones_like(dem_merged_clipped_array) # Change all values to 1
  export_array_as_tif(template_array, template_tif_path, template=dem_merged_clipped_path)
  print(f"A template raster has been created: {template_tif_path}")
else: print(f"A template raster already exists at: {template_tif_path}")

In [None]:
# Create template polygon
template_polygon_path = join(polygons_dir, "template.gpkg")
if not exists(template_polygon_path):
  # Get template raster spatial data
  template_raster = gdal.Open(template_tif_path)
  template_raster_band = template_raster.GetRasterBand(1)
  spatial_ref = ogr.osr.SpatialReference()
  spatial_ref.ImportFromWkt(template_raster.GetProjection())
  # Polygonize template raster without fields or layer name
  template_polygon_file = ogr.GetDriverByName("GPKG").CreateDataSource(template_polygon_path)
  template_polygon_layer = template_polygon_file.CreateLayer("", srs=spatial_ref, geom_type=ogr.wkbPolygon)
  gdal.Polygonize(template_raster_band, None, template_polygon_layer, -1)
  template_raster = None
  print(f"A template polygon has been created: {template_polygon_path}")
else: print(f"A template polygon already exists at: {template_polygon_path}")
template_polygon_read = gpd.read_file(template_polygon_path)
template_polygon_bounds = template_polygon_read.total_bounds
print(f"\nThe template polygon has the coordinates:\n{template_polygon_bounds[0]}, {template_polygon_bounds[1]} to {template_polygon_bounds[2]}, {template_polygon_bounds[3]}.")

# Create an inverse project area path for masking
inverse_project_area_path = join(polygons_dir, "project_area_inverse.gpkg")
if not exists(inverse_project_area_path):
  template_polygon_path = join(polygons_dir, "template.gpkg")
  template_polygon = gpd.read_file(template_polygon_path)
  project_area_polygon = gpd.read_file(project_area_path)
  inverse_project_area_polygon = template_polygon.dissolve().geometry.iloc[0].difference(project_area_polygon.dissolve().geometry.iloc[0])
  inverse_project_area_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_project_area_polygon]}, crs=f"EPSG:{crs_epsg}")
  inverse_project_area_polygon_gdf.to_file(inverse_project_area_path, driver="GPKG")
  print(f"An inverse project area polygon has been created: {inverse_project_area_path}")
else: print(f"An inverse project area already exists at: {inverse_project_area_path}")

# Measurement rasters

In [None]:
# Create measurement rasters for feature engineering and area-based statistics.
# Despite some claims to the contrary, the Earth is not flat. Pixels in a geographic
# coordinate system (degrees) represent different metric distances and areas depending
# on latitude. These rasters store the precise dimensions of each pixel in metres.

# Define template
template_path = join(areas_dir, "template.tif")
template = gdal.Open(template_path)
template_array = template.ReadAsArray()
rows, cols = template_array.shape

# Geotransform: defines how pixel coordinates map to geographic coordinates.
# [0] longitude of top-left corner, [1] pixel width in degrees,
# [3] latitude of top-left corner, [5] pixel height in degrees (negative)
geotransform = template.GetGeoTransform()
pixel_width_degrees = geotransform[1]
pixel_height_degrees = geotransform[5]
template = None

# Grids of column and row indices for vectorised calculations, allowing pixels to
# be processed at once rather than looping individually.
col_grid, row_grid = np.meshgrid(np.arange(cols, dtype=np.float64), np.arange(rows, dtype=np.float64))

# Longitude at pixel centre (+0.5 shifts from corner to centre)
longitude_path = join(areas_dir, "longitude.tif")
if not exists(longitude_path):
    longitude_array = geotransform[0] + (col_grid + 0.5) * pixel_width_degrees
    # Antimeridian wrapping, where longitude must stay within -180 to +180.
    # Values beyond this range wrap around (e.g. 185° becomes -175°).
    longitude_array = np.remainder(longitude_array + 180, 360) - 180
    export_array_as_tif(longitude_array.astype(np.float64), longitude_path, dtype=gdal.GDT_Float64)
    print(f"Raster with cell longitude in decimal degrees created: {longitude_path}")
else: print(f"Raster with cell longitude in decimal degrees already exists: {longitude_path}")

# Latitude at pixel centre
latitude_path = join(areas_dir, "latitude.tif")
if not exists(latitude_path):
    latitude_array = geotransform[3] + (row_grid + 0.5) * pixel_height_degrees
    latitude_array = np.clip(latitude_array, -90, 90)
    export_array_as_tif(latitude_array.astype(np.float64), latitude_path, dtype=gdal.GDT_Float64)
    print(f"Raster with cell latitude in decimal degrees created: {latitude_path}")
else: print(f"Raster with cell latitude in decimal degrees already exists: {latitude_path}")

# Snyder, J. P. (1987). Map projections--A working manual (Vol. 1395). US Government Printing Office.

# WGS84 ellipsoid parameters. The Earth bulges at the equator and is flattened at the poles.
# This shape is defined by the equatorial radius and the polar radius.
# Eccentricity squared measures how much the ellipsoid deviates from a sphere.
# Snyder Page viii (Symbols) defines these as:
# a = equatorial radius (semimajor axis)
# b = polar radius (semiminor axis)
# The eccentricity squared calcualtion is defined as e**2 = 1 - (b**2/a**2)
equatorial_radius = 6_378_137.0
polar_radius = 6_356_752.31425
eccentricity_squared = 1.0 - (polar_radius**2 / equatorial_radius**2)
degrees_to_radians = np.pi / 180

# Load latitude raster and convert to radians, where one radian is the angle at which
# arc length equals the radius, required for sin() and cos() functions
latitude_array = read_raster_as_array(latitude_path)
latitude_radians = np.radians(latitude_array)

# Geodetic factor W = sqrt(1 - e**2 sin**2 phi), common to ellipsoidal curvature calculations.
# Appears in Snyder eqs. 4-18 through 4-21; where e**2 = eccentricity_squared, phi = latitude.
sin_latitude_squared = np.sin(latitude_radians)**2
geodetic_factor = np.sqrt(1 - eccentricity_squared * sin_latitude_squared)

# Cell width in metres. Unlike latitude, this varies because lines of longitude are
# furthest apart at the equator and meet at the poles. One degree of longitude spans
# ~111 km at the equator but nearly 0 km at the poles.
cell_size_x_path = join(areas_dir, "cell_size_x.tif")
if not exists(cell_size_x_path):
    # Prime vertical radius of curvature: the radius of the east-west curve at a latitude.
    # Snyder equation 4-20: N = a/W where a = equatorial_radius, W = geodetic_factor.
    prime_vertical_radius = equatorial_radius / geodetic_factor
    # Arc length = radius * angle in radians.
    # Cosine accounts for lines of longitude converging towards poles.
    # Snyder equation 4-21: L_lambda = a*cos(phi)/W for one radian, here multiplied by pixel width.
    cell_size_x_array = degrees_to_radians * prime_vertical_radius * np.cos(latitude_radians) * pixel_width_degrees
    export_array_as_tif(cell_size_x_array.astype(np.float64), cell_size_x_path, dtype=gdal.GDT_Float64)
    print(f"Raster with cell width in metres created: {cell_size_x_path}")
else: print(f"Raster with cell width in metres already exists: {cell_size_x_path}")

# Cell height in metres. Varies slightly with latitude due to Earth's shape.
cell_size_y_path = join(areas_dir, "cell_size_y.tif")
if not exists(cell_size_y_path):
    # Meridional radius of curvature: the radius of the north-south curve at a latitude.
    # Snyder equations 4-18 and 4-19: R' = a(1-e**2)/W**3 where a = equatorial_radius, W = geodetic_factor.
    meridional_radius = equatorial_radius * (1 - eccentricity_squared) / geodetic_factor**3
    # Arc length = radius * angle in radians.
    # Absolute value because pixel_height_degrees is negative.
    cell_size_y_array = np.abs(degrees_to_radians * meridional_radius * pixel_height_degrees)
    export_array_as_tif(cell_size_y_array.astype(np.float64), cell_size_y_path, dtype=gdal.GDT_Float64)
    print(f"Raster with cell height in metres created: {cell_size_y_path}")
else: print(f"Raster with cell height in metres already exists: {cell_size_y_path}")

# Prediction area

In [None]:
# Create the prediction area.
# Smaller than the template area, as feature edge effects will have erroneous values.
# Marginally (1 pixel) larger than the project area extent, ensuring all pixels
# touching the project area are included.
crs_epsg = 4326
prediction_area_path = join(polygons_dir, "prediction_area.gpkg")
if not exists(prediction_area_path):
    # Obtain max pixel resolution to buffer polygons
    cell_x_array = read_raster_as_array(cell_x_path)
    cell_y_array = read_raster_as_array(cell_y_path)
    buffer_distance_metres = np.round(max(np.max(cell_x_array), np.max(cell_y_array)))
    cell_x = cell_x_array = cell_y = cell_y_array = None
    # Suppress warning about not being a geographic CRS, as we account for this.
    # However larger buffers or project areas near the poles might still need to be converted.
    warnings.filterwarnings("ignore", category=UserWarning)
    # Get the centroid of the project polygon
    project_polygon_centroid = project_area_read.centroid.values[0]
    # Convert the buffer distance from meters to decimal degrees based on the location at the centroid
    buffer_distance_degrees = buffer_distance_metres / (111320 * abs(math.cos(math.radians(project_polygon_centroid.y))))
    # Buffer the polygon
    project_area_buffered = project_area_read.buffer(buffer_distance_degrees)
    # Create a bounding box polygon and save
    prediction_area = box(*project_area_buffered.total_bounds)
    gdf = gpd.GeoDataFrame(geometry=[prediction_area], crs=f"EPSG:{crs_epsg}")
    gdf.to_file(prediction_area_path, driver='GPKG')
    print(f"Buffered the project area to {buffer_distance_metres}m and created a bounding box: {prediction_area_path}")
else: print(f"Project area has already been buffered and bound to create a prediction area: {prediction_area_path}")

# Create an inverse project area path for masking
inverse_prediction_area_path = join(polygons_dir, "prediction_area_inverse.gpkg")
if not exists(inverse_prediction_area_path):
  template_polygon_path = join(polygons_dir, "template.gpkg")
  template_polygon = gpd.read_file(template_polygon_path)
  prediction_area_polygon = gpd.read_file(prediction_area_path)
  inverse_prediction_area_polygon = template_polygon.dissolve().geometry.iloc[0].difference(prediction_area_polygon.dissolve().geometry.iloc[0])
  inverse_prediction_area_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_prediction_area_polygon]}, crs=f"EPSG:{crs_epsg}")
  inverse_prediction_area_polygon_gdf.to_file(inverse_prediction_area_path, driver="GPKG")
  print(f"An inverse prediction area polygon has been created: {inverse_prediction_area_path}")
else: print(f"An inverse prediction area already exists at: {inverse_prediction_area_path}")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()