# Preparing Shapefiles for the SAPPHIRE Data Gateway

## Imports


In [1]:
import sys
import os
from glob import glob

import geopandas as gpd
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from shapely.geometry import shape, MultiPolygon, box
from rasterio.plot import show

import rasterio
from rasterio.mask import mask
from rasterio.features import shapes
from rasterio.merge import merge

# import elevation
from tqdm import tqdm

In [2]:
for i in range(10):
    print(f"Processing iteration {i + 1}")

Processing iteration 1
Processing iteration 2
Processing iteration 3
Processing iteration 4
Processing iteration 5
Processing iteration 6
Processing iteration 7
Processing iteration 8
Processing iteration 9
Processing iteration 10


In [None]:
def get_elevation_bands(
    dem_sources, basin_polygon, crs, code, band_ranges, band_range=500
):
    """
    Function to get elevation bands from raster(s) within a basin polygon.

    Args:
        dem_sources: List of open rasterio dataset objects
        basin_polygon: Shapely geometry of the basin
        crs: Coordinate reference system
        code: Basin code identifier
        band_ranges: Optional list of (lower, upper) elevation tuples
        band_range: Elevation interval if band_ranges is None

    Returns:
        GeoDataFrame with elevation band polygons
    """
    # Determine which DEMs intersect with the basin
    basin_bounds = basin_polygon.bounds  # (minx, miny, maxx, maxy)
    basin_box = box(*basin_bounds)

    intersecting_dems = []
    for dem_src in dem_sources:
        dem_bounds = dem_src.bounds
        dem_box = box(
            dem_bounds.left, dem_bounds.bottom, dem_bounds.right, dem_bounds.top
        )

        if basin_box.intersects(dem_box):
            intersecting_dems.append(dem_src)

    if not intersecting_dems:
        raise ValueError(f"No DEM intersects with basin {code}")

    # If multiple DEMs intersect, merge them
    if len(intersecting_dems) > 1:
        # Merge the DEMs
        mosaic, out_transform = merge(intersecting_dems)
        # Mask the merged mosaic with the basin polygon
        out_image = mosaic
        # Create a temporary in-memory dataset for masking
        out_meta = intersecting_dems[0].meta.copy()
        out_meta.update(
            {
                "height": mosaic.shape[1],
                "width": mosaic.shape[2],
                "transform": out_transform,
            }
        )

        # Mask with basin polygon
        from rasterio.io import MemoryFile

        with MemoryFile() as memfile:
            with memfile.open(**out_meta) as mem_dataset:
                mem_dataset.write(mosaic)
                out_image, out_transform = mask(mem_dataset, [basin_polygon], crop=True)
                nan_value = mem_dataset.nodata
    else:
        # Single DEM - mask directly
        src = intersecting_dems[0]
        out_image, out_transform = mask(src, [basin_polygon], crop=True)
        nan_value = src.nodata

    # Set the nodata value to nan
    if nan_value is not None:
        out_image = out_image.astype(float)
        out_image[out_image == nan_value] = np.nan

    # Calculate elevation band ranges if not provided
    if band_ranges is None:
        min_elevation = np.nanmin(out_image[0])
        max_elevation = np.nanmax(out_image[0])
        this_start = min_elevation
        band_ranges_new = []

        while this_start <= max_elevation:
            band_ranges_new.append((this_start, this_start + band_range))
            this_start += band_range

        band_ranges = band_ranges_new

    mean_elevation_per_range = [(lower + upper) / 2 for lower, upper in band_ranges]

    # Initialize an array to store elevation band IDs
    elevation_bands = np.zeros_like(out_image[0])

    # Iterate over each band range
    for band_id, (lower, upper) in enumerate(band_ranges, start=1):
        # Mask pixels within the current band range
        mask_band = np.logical_and(out_image[0] > lower, out_image[0] <= upper)
        # Assign band ID to masked pixels
        elevation_bands[mask_band] = band_id

    # Convert elevation bands to polygons
    shapes_gen = shapes(elevation_bands.astype("uint8"), transform=out_transform)

    # Loop through the shapes and create a GeoDataFrame
    elev_dict = {}
    for poly, value in shapes_gen:
        if value not in elev_dict:
            elev_dict[value] = [shape(poly)]
        else:
            elev_dict[value].append(shape(poly))

    elev_values = []
    elev_polygons = []
    mean_elevation = []
    for value, polys in elev_dict.items():
        elev_values.append(value)
        elev_polygons.append(MultiPolygon(polys))
        mean_elevation.append(mean_elevation_per_range[int(value) - 1])

    elev_polygons = gpd.GeoDataFrame(
        {"geometry": elev_polygons, "CODE": code, "elevation_band": elev_values},
        crs=crs,
    )

    return elev_polygons


def create_shapefiles_with_elevation_bands(
    path_to_shp, dem_paths, elevation_bands=None, band_range=500
):
    """
    Create shapefiles with elevation bands from multiple DEM sources.

    Args:
        path_to_shp: Path to basin shapefile
        dem_paths: List of paths to DEM raster files
        elevation_bands: Optional list of (lower, upper) elevation tuples
        band_range: Elevation interval if elevation_bands is None

    Returns:
        GeoDataFrame with combined elevation band polygons
    """
    # Create an empty list to store elevation band GeoDataFrames
    all_elev_polygons = []
    basins_outline = gpd.read_file(path_to_shp)

    # Get CRS from basins
    crs = basins_outline.crs

    # Open all DEM sources
    dem_sources = []
    for dem_path in dem_paths:
        dem_sources.append(rasterio.open(dem_path))

    try:
        for index, basin in basins_outline.iterrows():
            code_basin = basin["CODE"]
            print(f"Processing basin: {code_basin}")

            total_area = basin.geometry.area

            elev_polygons = get_elevation_bands(
                dem_sources,
                basin["geometry"],
                crs=crs,
                code=code_basin,
                band_ranges=elevation_bands,
                band_range=band_range,
            )

            # Calculate relative area
            relative_area = elev_polygons["geometry"].area / total_area

            # Assign back to the original GeoDataFrame
            elev_polygons["relative_area"] = relative_area

            # Add the elevation band GeoDataFrame to the list
            all_elev_polygons.append(elev_polygons)

    finally:
        # Close all DEM sources
        for dem_src in dem_sources:
            dem_src.close()

    combined_elev_polygons = pd.concat(all_elev_polygons, ignore_index=True)
    combined_elev_polygons["id"] = (
        combined_elev_polygons.CODE
        + "_"
        + combined_elev_polygons.elevation_band.astype(int).astype(str)
    )

    # Drop elevation band 0
    combined_elev_polygons = combined_elev_polygons[
        combined_elev_polygons["elevation_band"] != 0
    ]

    return combined_elev_polygons

## Shapefile For Tajikistan

First we need to define some paths.
The general workflow here is to first create a shapefile with elevation bands.

In [None]:
path_to_DEM_TJK = "/Users/sandrohunziker/hydrosolutions Dropbox/Sandro Hunziker/SAPPHIRE_Central_Asia_Technical_Work/GIS/ML_Sandro/DEM/TJK_dem.tif"
path_to_DEM_KGZ = "/Users/sandrohunziker/hydrosolutions Dropbox/Sandro Hunziker/REFERENCE_CaseStudyPacks/CENTRAL_ASIA_DOMAIN/DEM/KGZ_dem.tif"
path_to_DEM_UZB = "/Users/sandrohunziker/hydrosolutions Dropbox/Sandro Hunziker/REFERENCE_CaseStudyPacks/CENTRAL_ASIA_DOMAIN/DEM/UZB_dem.tif"

# Combine all DEM paths into a list
dem_paths = [path_to_DEM_TJK, path_to_DEM_KGZ, path_to_DEM_UZB]

path_to_base_shp = "/Users/sandrohunziker/hydrosolutions Dropbox/Sandro Hunziker/SAPPHIRE_Central_Asia_Technical_Work/GIS/HRU_Gateway/00004_no_glaciers/00004_no_glaciers.shp"
output_path = "/Users/sandrohunziker/hydrosolutions Dropbox/Sandro Hunziker/SAPPHIRE_Central_Asia_Technical_Work/GIS/HRU_Gateway/TJK500m_no_glaciers"

In [None]:
combined_500m_shp = create_shapefiles_with_elevation_bands(
    path_to_shp=path_to_base_shp,
    dem_paths=dem_paths,  # Now using list of all DEM paths
    elevation_bands=None,
    band_range=500,
)

print(combined_500m_shp.head())

In [None]:
# plot one code to verify
code_to_plot = "17084"

tajik_500m_shp_subset = combined_500m_shp[combined_500m_shp["CODE"] == code_to_plot]
fig, ax = plt.subplots(figsize=(8, 8))
tajik_500m_shp_subset.plot(column="elevation_band", ax=ax, legend=True, cmap="terrain")
# also plot the basin outline
basin_outline = gpd.read_file(path_to_base_shp)
basin_outline_subset = basin_outline[basin_outline["CODE"] == code_to_plot]
basin_outline_subset.boundary.plot(ax=ax, color="black", linewidth=1)
ax.set_title(f"Elevation Bands for Basin {code_to_plot}")
plt.show()

In [None]:
# but it into the correct format

combined_500m_shp["Z"] = 1
combined_500m_shp.rename(columns={"id": "name"}, inplace=True)

combined_500m_shp.to_file(os.path.join(output_path, "TJK500m_no_glaciers.shp"))

## 500m 

In [None]:
name_of_shp = "elevation_bands_500m.shp"

# read shapefile
gdf = gpd.read_file(path_to_shp + name_of_shp)

gdf.head()

gdf["z"] = 1
# rename 'z' to Z
gdf.rename(columns={"z": "Z"}, inplace=True)

# rename id to name
gdf.rename(columns={"id": "name"}, inplace=True)

# save the shapefile
gdf.to_file(output_path + "/KGZ_elevation_bands_500m.shp")

## 1000m 

In [None]:
name_of_shp = "elevation_bands_1000m.shp"

# read shapefile
gdf = gpd.read_file(path_to_shp + name_of_shp)

gdf.head()

gdf["z"] = 1

# rename 'z' to Z

gdf.rename(columns={"z": "Z"}, inplace=True)

# rename id to name
gdf.rename(columns={"id": "name"}, inplace=True)

# save the shapefile
gdf.to_file(output_path + "/KGZ_elevation_bands_1000m.shp")