<a href="https://colab.research.google.com/github/joekelly211/masfi/blob/main/1_areas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports, directories and functions

In [None]:
# Define base directory
# Use '/content/drive/MyDrive/' for a personal drive
# Use '/gdrive/Shareddrives/' for a shared drive (must be created first)

base_dir = "/gdrive/Shareddrives/masfi"
# base_dir = '/content/drive/MyDrive/masfi'

# Mount Google Drive
from google.colab import drive
import os
import sys
if base_dir.startswith('/gdrive/Shareddrives/'):
  drive.mount('/gdrive', force_remount=True)
elif base_dir.startswith('/content/drive/MyDrive/'):
  drive.mount('/content/drive', force_remount=True)
  os.makedirs(base_dir, exist_ok=True)
else: print("Create a base_dir beginning with '/gdrive/Shareddrives/' or '/content/drive/MyDrive/'.")

_path_to_add = os.path.realpath(base_dir)
if _path_to_add not in sys.path:
    sys.path.append(_path_to_add)

In [None]:
# Capture outputs
%%capture
# Installs and upgrades
!pip install geopandas

In [None]:
# Reload imports, replacing those in the cache.
%load_ext autoreload
%autoreload 2
import geopandas as gpd
from google.colab import runtime
import ipywidgets as widgets
import math
import numpy as np
import requests
import tarfile
import warnings
from os import makedirs, remove
from os.path import exists, join
from shapely.geometry import box
from shutil import copyfile, copy
from osgeo import gdal, ogr

In [None]:
# Define directories.
areas_dir = join(base_dir, "1_areas")
predictors_dir = join(base_dir, "3_predictors")
polygons_dir = join(areas_dir, "polygons")
dem_dir = join(areas_dir, "dem")
dem_tiles_dir = join(dem_dir, "tiles")

# Create directories if they do not exist.
makedirs(areas_dir, exist_ok=True)
makedirs(polygons_dir, exist_ok=True)
makedirs(dem_dir, exist_ok=True)
makedirs(dem_tiles_dir, exist_ok=True)

In [None]:
# Global function: export an array as a .tif
template_tif_path = join(areas_dir, "template.tif")
nodatavalue = -1111111
compress = True
def export_array_as_tif(input_array, output_tif, template=template_tif_path, nodatavalue=nodatavalue, compress=compress):
  template = gdal.Open(template)
  template_band = template.GetRasterBand(1)
  template_dimensions, template_projection = template.GetGeoTransform(), template.GetProjection()
  if compress: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32,
                                                options=["COMPRESS=DEFLATE","PREDICTOR=2","ZLEVEL=9"])
  if compress == False: driver = gdal.GetDriverByName("GTiff").Create(output_tif, template_band.XSize, template_band.YSize, bands=1, eType=gdal.GDT_Float32)
  driver.GetRasterBand(1).WriteArray(input_array)
  driver.GetRasterBand(1).SetNoDataValue(nodatavalue)
  driver.SetGeoTransform(template_dimensions)
  driver.SetProjection(template_projection)

# Project area

In [None]:
# Upload 'project_area.gpkg' polygon to the 1_areas/polygons directory.
# This can be a polygon of any shape. A bounding box will be used to create the
# GEDI download area in 1_variates.ipynb. # A buffered bounding box will be used
# for the raster template, to ensure all predictor edge effects are included.

#Project CRS EPSG
crs_epsg = 4326

# Recommended to buffer at least 300 m to account for predictor edge effects
# and clipping imprecision
buffer_distance_metres = 300

project_area_path = join(polygons_dir, 'project_area.gpkg')

if exists(project_area_path):
  print("Project polygon found:\n")
  # Read project polygon
  project_area_read = gpd.read_file(join(polygons_dir, 'project_area.gpkg'))
  display(project_area_read["geometry"].iloc[0])
  if project_area_read.crs.to_epsg() == crs_epsg:
    project_area_path = join(polygons_dir, "project_area.gpkg")
    project_area_buffered_bbox_path = join(polygons_dir, 'project_area_buffered_bbox.gpkg')
    # Calculate the bounding box of the project polygon
    if not exists (project_area_buffered_bbox_path):
      # Suppress warning about not being a geographic CRS, as we account for this.
      # However larger buffers or project areas near the poles might still need to be converted.
      warnings.filterwarnings("ignore", category=UserWarning)
      # Get the centroid of the project polygon
      project_polygon_centroid = project_area_read.centroid.values[0]
      # Convert the buffer distance from meters to decimal degrees based on the location at the centroid
      buffer_distance_degrees = buffer_distance_metres / (111320 * abs(math.cos(math.radians(project_polygon_centroid.y))))
      # Buffer the polygon
      project_area_buffered = project_area_read.buffer(buffer_distance_degrees)
      # Create a bounding box polygon and save
      project_area_buffered_bbox = box(*project_area_buffered.total_bounds)
      gdf = gpd.GeoDataFrame(geometry=[project_area_buffered_bbox], crs=f"EPSG:{crs_epsg}")
      gdf.to_file(project_area_buffered_bbox_path, driver='GPKG')
      print(f"Buffered the project area to {buffer_distance_metres} and created a bounding box: {project_area_buffered_bbox_path}")
    else: print(f"Project area has already been buffered and bound to a box: {project_area_buffered_bbox_path}")
    # Read the buffered project area bounding box
    project_area_buffered_bbox_read = gpd.read_file(project_area_buffered_bbox_path)
    bbox_bounds = project_area_buffered_bbox_read.total_bounds
    project_x_min, project_x_max = bbox_bounds[0], bbox_bounds[2]
    project_y_min, project_y_max = bbox_bounds[1], bbox_bounds[3]
    print(f"\nThe buffered polygon bounding box has the coordinates:\n{project_x_min}, {project_y_min} to {project_x_max}, {project_y_max}.")
  else: print("Reproject 'project_area.gpkg' to EPSG:4326.")
else: print("Create 'project_area.gpkg' and upload to 1_areas/polygons")

# Download DEM tiles

In [None]:
# Download Copernicus DEM tiles for the project area. Currently the most recent
# product is COP-DEM_GLO-30-DGED__2023_1
# press release > https://sentinels.copernicus.eu/web/sentinel/-/copernicus-dem-new-direct-data-download-access
# Guide > https://spacedata.copernicus.eu/documents/20123/121286/Copernicus+DEM+Open+HTTPS+Access.pdf
# List of dataset tiles > https://prism-dem-open.copernicus.eu/pd-desk-open-access/publicDemURLs/COP-DEM_GLO-30-DGED__2023_1
# Sometimes the direct servers are down, in which case the '.tar' for each tile needs
# to be manually downloaded through other (ever-changing) GLO-30 data repositories
# and uploaded to '1_areas/dem/tiles'. Then continue to the 'Process DEM tiles' section.

dem_tiles_url_txt_path = join(dem_dir, "dem_tiles_url.txt")
dem_tiles_url_list = []

# If the list of DEM tiles exists as a .txt then open, otherwise download and save.
try:
  with open(dem_tiles_url_txt_path, 'r') as dem_tiles_url_txt_file:
    for url in dem_tiles_url_txt_file:
        dem_tiles_url_list.append(url[:-1]) # Remove new line
  print("DEM tile URL dowload list exists as 'dem_tiles_url.txt' in '1_areas/dem'.")
except:
  # Get html content to search for DEM tile download URLS
  html = 'https://prism-dem-open.copernicus.eu/pd-desk-open-access/publicDemURLs/COP-DEM_GLO-30-DGED__2023_1'
  response = requests.get(html)
  response.raise_for_status()
  html_content = response.text
  # Create list of DEM tile download URLs
  url_prefix, url_suffix = '<nativeDemUrl>', '</nativeDemUrl>'
  for i in range(1, html_content.count(url_prefix)+1):
    tile_url = html_content.split(url_prefix)[i].split(url_suffix)[0]
    if 'DSM_10' in tile_url: dem_tiles_url_list.append(tile_url) # avoid DSM_30
  # Save the DEM tiles URL list as a .txt file
  with open(dem_tiles_url_txt_path, 'w') as dem_tiles_url_txt_file:
    for url in dem_tiles_url_list:
      dem_tiles_url_txt_file.write(url + "\n")
  print("DEM tile URL download list saved to 'all_dem_tiles.txt' in '1_areas/dem'.")

# Read the buffered project area bounding box
project_area_buffered_bbox_path = join(polygons_dir, 'project_area_buffered_bbox.gpkg')
project_area_buffered_bbox_read = gpd.read_file(project_area_buffered_bbox_path)
bbox_bounds = project_area_buffered_bbox_read.total_bounds
project_x_min, project_x_max = bbox_bounds[0], bbox_bounds[2]
project_y_min, project_y_max = bbox_bounds[1], bbox_bounds[3]

# Filter URL list to tiles overlapped by the project polygon bounding box
dem_tiles_url_list_filtered = []

for url in dem_tiles_url_list:
  # Degree coordinates of DEM tile
  degree_coordinates = url[-18:-4]

  # Set number of degrees per tile (assuming square)
  tile_size = 1

  # Extract degree coordinate values
  tile_x_dir, tile_x_deg, tile_x_minutes = degree_coordinates[7:8], int(degree_coordinates[8:11]), int(degree_coordinates[12:14])
  tile_y_dir, tile_y_deg, tile_y_minutes = degree_coordinates[0][0], int(degree_coordinates[1:3]), int(degree_coordinates[4:6])

  # Convert to decimal degree coordinates
  if tile_x_dir == 'E':
    tile_x_min = tile_x_deg + (tile_x_minutes / 60.0)
    tile_x_max = tile_x_min + tile_size
  if tile_x_dir == 'W':
    tile_x_min = 0 - tile_x_deg - (tile_x_minutes / 60.0)
    tile_x_max = tile_x_min + tile_size
  if tile_y_dir == 'N':
    tile_y_min = tile_y_deg + (tile_y_minutes / 60.0)
    tile_y_max = tile_y_min + tile_size
  if tile_y_dir == 'S':
    tile_y_min = 0 - tile_y_deg - (tile_y_minutes / 60.0)
    tile_y_max = tile_y_min + tile_size

  # Check whether project bounding box is inside the tile
  lon_check = project_x_max > tile_x_min and project_x_min < tile_x_max
  lat_check = project_y_max > tile_y_min and project_y_min < tile_y_max
  if lon_check and lat_check:
    dem_tiles_url_list_filtered.append(url)

print("DEM tile URL list filtered to project_area_buffered_bbox.gpkg.")

In [None]:
# Display progress
index = 0
progress_label = widgets.Label(value=f"DEM tile download progress: {index}/{len(dem_tiles_url_list_filtered)}")
display(progress_label)

# Process URLs
for url in dem_tiles_url_list_filtered:
    url = url.strip()  # Remove any white space
    dem_tile_zip_filename = url.split('/')[-1]
    dem_tile_zip_path = join(dem_tiles_dir, dem_tile_zip_filename)
    while True:
        try:
            if not exists(dem_tile_zip_path):
                request = requests.get(url, allow_redirects=True)
                open(dem_tile_zip_path, 'wb').write(request.content)
            with tarfile.open(dem_tile_zip_path, 'r') as tar:
                tar.getmembers()  # Check if tarball is valid
            break  # Exit loop if successful
        except Exception as e:
            if exists(dem_tile_zip_path):
                remove(dem_tile_zip_path)  # Delete file if invalid
            print(f"Failed URL: {url} - {e}")
    # Update progress
    index += 1
    progress_label.value = f"DEM tile download progress: {index}/{len(dem_tiles_url_list_filtered)}"

# Process DEM tiles

In [None]:
# Display progress
index = 0
progress_label = widgets.Label(value=f"DEM tile extraction progress: {index}/{len(dem_tiles_url_list_filtered)}")
display(progress_label)

# Extract tile 'DEM.tif' into the DEM tiles directory if it doesn't already exist.
for file in os.listdir(dem_tiles_dir):
  if file.endswith(".tar"):
    dem_tile_filename = f"{file[:-4]}_DEM.tif"
    dem_tile_path = join(dem_tiles_dir, dem_tile_filename)
    if not exists(dem_tile_path):
      tar_path = join(dem_tiles_dir, file)
      tar_file = tarfile.open(tar_path, 'r')
      for member in tar_file.getmembers():
        if dem_tile_filename in member.name:
          member.name = os.path.basename(member.name)
          tar_file.extract(member, dem_tiles_dir)
    index += 1
    progress_label.value = f"DEM tile extraction progress: {index}/{len(dem_tiles_url_list_filtered)}"

In [None]:
# Merge the DEM tiles into a single raster
dem_merged_path = join(dem_dir, "dem_merged.tif")

if not exists(dem_merged_path):
  # List tiles
  tiles_to_merge = []
  for file in os.listdir(dem_tiles_dir):
    if file.endswith(".tif"):
      tiles_to_merge.append(join(dem_tiles_dir, file))
  # Create a temporary virtual file (VRT) from the tiles
  temp_vrt = join(dem_dir, 'temp.vrt')
  gdal.BuildVRT(temp_vrt, tiles_to_merge)
  # Merge the input files into a single GeoTIFF file
  merge_options = gdal.TranslateOptions(format='GTiff', outputType=gdal.GDT_Float32, noData=nodatavalue,
                                  creationOptions=['COMPRESS=DEFLATE', 'PREDICTOR=2', 'ZLEVEL=9'])
  gdal.Translate(dem_merged_path, temp_vrt, options=merge_options)
  # Remove the temporary VRT file
  os.remove(temp_vrt)
  print(f"The merged DEM raster has been saved to: {dem_merged_path}")
else: print(f"A merged DEM raster already exists at: {dem_merged_path}")

# Clip the raster to project area extent
dem_merged_clipped_path = join(dem_dir, "dem_merged_clipped.tif")

if not exists(dem_merged_clipped_path):
  # Read the buffered project area bounding box
  project_area_buffered_bbox_path = join(polygons_dir, 'project_area_buffered_bbox.gpkg')
  project_area_buffered_bbox_read = gpd.read_file(project_area_buffered_bbox_path)
  bbox_bounds = project_area_buffered_bbox_read.total_bounds
  # Get coordinates
  project_x_min, project_x_max = bbox_bounds[0], bbox_bounds[2]
  project_y_min, project_y_max = bbox_bounds[1], bbox_bounds[3]
  project_coords = [project_x_min, project_y_max, project_x_max, project_y_min]
  # Define Translate options
  clip_options = gdal.TranslateOptions(projWin=[project_x_min, project_y_max, project_x_max, project_y_min],
                                  outputType=gdal.GDT_Float32, noData=nodatavalue)
  # call gdal.Translate() with the new options argument
  gdal.Translate(dem_merged_clipped_path, dem_merged_path, options=clip_options)
  print(f"The clipped, merged DEM raster has been saved to: {dem_merged_clipped_path}")
else: print(f"A clipped merged DEM raster already exists at: {dem_merged_clipped_path}")

# Copy the clipped, merged DEM to '3_predictors' directory to use as the base DEM
base_dem_path = join(areas_dir, "base_dem.tif")

if not exists(base_dem_path):
  copy(dem_merged_clipped_path, base_dem_path)
  print(f"The clipped, merged DEM has been copied for use as a base DEM: {base_dem_path}")
else: print(f"A base DEM already exists at: {base_dem_path}")

# Create template

In [None]:
# Create template from DEM
template_tif_path = join(areas_dir, "template.tif")
if not exists(template_tif_path):
  dem_merged_clipped_path = join(dem_dir, "dem_merged_clipped.tif")
  dem_merged_clipped_array = gdal.Open(dem_merged_clipped_path).ReadAsArray() # Convert DEM to array
  template_array = np.ones_like(dem_merged_clipped_array) # Change all values to 1
  export_array_as_tif(template_array, template_tif_path, template=dem_merged_clipped_path, compress=False)
  print(f"A template raster has been created: {template_tif_path}")
else: print(f"A template raster already exists at: {template_tif_path}")

In [None]:
# Create template polygon
template_polygon_path = join(polygons_dir, "template.gpkg")
if not exists(template_polygon_path):
  # Get template raster spatial data
  template_raster = gdal.Open(template_tif_path)
  template_raster_band = template_raster.GetRasterBand(1)
  spatial_ref = ogr.osr.SpatialReference()
  spatial_ref.ImportFromWkt(template_raster.GetProjection())
  # Polygonize template raster without fields or layer name
  template_polygon_file = ogr.GetDriverByName("GPKG").CreateDataSource(template_polygon_path)
  template_polygon_layer = template_polygon_file.CreateLayer("", srs=spatial_ref, geom_type=ogr.wkbPolygon)
  gdal.Polygonize(template_raster_band, None, template_polygon_layer, -1)
  print(f"A template polygon has been created: {template_polygon_path}")
else: print(f"A template polygon already exists at: {template_polygon_path}")
template_polygon_read = gpd.read_file(template_polygon_path)
template_polygon_bounds = template_polygon_read.total_bounds
print(f"\nThe template polygon has the coordinates:\n{template_polygon_bounds[0]}, {template_polygon_bounds[1]} to {template_polygon_bounds[2]}, {template_polygon_bounds[3]}.")

# Create an inverse project area path for masking
inverse_project_area_path = join(polygons_dir, "project_area_inverse.gpkg")
if not exists(inverse_project_area_path):
  template_polygon_path = join(polygons_dir, "template.gpkg")
  template_polygon = gpd.read_file(template_polygon_path)
  project_area_polygon = gpd.read_file(project_area_path)
  inverse_project_area_polygon = template_polygon['geometry'].difference(project_area_polygon['geometry']).iloc[0]
  inverse_project_area_polygon_gdf = gpd.GeoDataFrame({'geometry': [inverse_project_area_polygon]}, crs=f"EPSG:{crs_epsg}")
  inverse_project_area_polygon_gdf.to_file(inverse_project_area_path, driver="GPKG")
  print(f"An inverse project area polygon has been created: {template_polygon_path}")
else: print(f"An inverse project area already exists at: {template_polygon_path}")

# Create measurement rasters

In [None]:
# Create measurement rasters for predictors and precise summing of pixels

# Define template
template_path = join(areas_dir, "template.tif")
template = gdal.Open(template_path)
template_array = template.ReadAsArray()
rows, cols = template_array.shape

# Define Earth radius
equatorial_radius = 6_378_137.0 # Equatorial radius in metres
polar_radius = 6_356_752.0 # Polar radius in metres

# Function for obtaining latitude distance in meters from decimal degrees, at a specific latitude
def distance_of_decimal_degrees_latitude(latitude: float, decimal_degrees: float) -> float:
    # Calculate the eccentricity squared (e2)
    e2 = (equatorial_radius**2 - polar_radius**2) / equatorial_radius**2
    # Convert latitude to radians
    latitude_rad = math.radians(latitude)
    # Calculate the meridional radius of curvature (M)
    M = equatorial_radius * (1 - e2) / (1 - e2 * math.sin(latitude_rad)**2)**(3/2)
    # Calculate the distance of one degree of latitude
    distance_per_degree = math.pi * M / 180
    # Calculate the distance of the specified decimal degrees
    distance_latitude = distance_per_degree * decimal_degrees
    return np.float64(distance_latitude) # Force high precision

# Function for obtaining longitude distance in meters from decimal degrees, at a specific latitude
def distance_of_decimal_degrees_longitude(latitude: float, decimal_degrees: float) -> float:
    # Handle pole proximity
    if abs(latitude) > 89.9: return 0.0
    # Convert latitude to radians
    latitude_rad = math.radians(latitude)
    # Calculate the distance of one degree of longitude at the given latitude
    distance_per_degree = (math.pi * equatorial_radius * math.cos(latitude_rad)) / 180
    # Calculate the distance of the specified decimal degrees
    distance_longitude = distance_per_degree * decimal_degrees
    return np.float64(distance_longitude) # Force high precision

geotransform = template.GetGeoTransform()

# Create a raster for the longitude in decimal degrees at the center of each pixel
longitude_path = join(areas_dir, "longitude.tif")
if not exists(longitude_path):
    # Handle antimeridian wrapping
    longitude_array = np.array([[((geotransform[0] + (c * geotransform[1]) + (r * geotransform[4]) + (geotransform[1] / 2) + 180) % 360 - 180)
                                for c in range(cols)] for r in range(rows)], dtype=np.float64) # Force precision
    export_array_as_tif(longitude_array, longitude_path)
    print(f"Raster with cell longitude in decimal degrees created: {longitude_path}")
else: print(f"Raster with cell longitude in decimal degrees already exists: {longitude_path}")

# Create a raster for the latitude in decimal degrees at the center of each pixel
latitude_path = join(areas_dir, "latitude.tif")
if not exists(latitude_path):
    # Clamp latitude to valid range
    latitude_array = np.clip(np.array([[geotransform[3] + (r * geotransform[5]) + (c * geotransform[2]) + (geotransform[5] / 2)
                                      for c in range(cols)] for r in range(rows)], dtype=np.float64), -90, 90) # Force precision
    export_array_as_tif(latitude_array, latitude_path)
    print(f"Raster with cell latitude in decimal degrees created: {latitude_path}")
else: print(f"Raster with cell latitude in decimal degrees already exists: {latitude_path}")
latitude_array = gdal.Open(latitude_path).ReadAsArray()

# Create a raster for the cell width in meters
cell_size_x_path = join(areas_dir, "cell_size_x.tif")
if not exists(cell_size_x_path):
    cell_size_x_array = np.vectorize(distance_of_decimal_degrees_longitude)(latitude_array, geotransform[1])
    export_array_as_tif(cell_size_x_array, cell_size_x_path)
    print(f"Raster with cell width in metres created: {cell_size_x_path}")
else: print(f"Raster with cell width in metres already exists: {cell_size_x_path}")

# Create a raster for the cell height in meters
cell_size_y_path = join(areas_dir, "cell_size_y.tif")
if not exists(cell_size_y_path):
    cell_size_y_array = np.vectorize(distance_of_decimal_degrees_latitude)(latitude_array, abs(geotransform[5]))
    export_array_as_tif(cell_size_y_array, cell_size_y_path)
    print(f"Raster with cell height in metres created: {cell_size_y_path}")
else: print(f"Raster with cell height in metres already exists: {cell_size_y_path}")

# Disconnect runtime

In [None]:
# Useful for stopping background execution
runtime.unassign()