# Bayesian biodiversity: Geodata processing

In [3]:
import numpy as np
import pygeoprocessing as pgp
import geopandas as gpd
import pandas as pd
import rasterio
import shapely
from shapely import Point, Polygon, LineString, MultiLineString
import time
from rasterstats import zonal_stats
from pyproj import Transformer
from itertools import product
from osgeo import gdal, ogr
from typing import Tuple, Union, List
from datetime import timedelta
from osgeo import gdal, ogr, osr

In [2]:
# Load black for formatting
import jupyter_black

jupyter_black.load()

# Adjust display settings for pandas
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

## Functions for performing geoprocessing steps

### Local projection, buffering and global reprojection

- TODO: Could be made more efficient by storing transformer objects in a dictionary, and doing a lookup for existing ones, but requires a different handling with the apply functions. Can probably work as a global variable.

In [387]:
def project_to_local_utm(
    geometry: Union[Point, LineString]
) -> Tuple[Union[Point, LineString], str]:
    """
    Calculates the local UTM zone for a Point or LineString and transforms the
    gemoetry coordinates into local format.

    Args:
        geometry (Point or LineString): Coordinates of e.g. a sampling site
            or road segment.

    Returns:
        local_coords (Point or LineString): The input geometry transformed to
            local coordinates.
        epsg_code (str): The local EPSG code for this tranformation, used for
            reprojection in a later stage.
    """
    assert isinstance(
        geometry, (Point, LineString)
    ), "geometry should be a Point, LineString or MultiLineString"

    try:
        # Get the coordinate point values (based on first point for Linestrings)
        first_point = geometry.coords[0]
        long, lat = first_point[0], first_point[1]

        # Determine the UTM zone and hemisphere
        zone_number = int((long + 180) // 6) + 1
        epsg_code = f"EPSG:{32700 + zone_number if lat < 0 else 32600 + zone_number}"

        # Initialize Transformer object
        utm_transformer = Transformer.from_crs("EPSG:4326", epsg_code, always_xy=True)

        # Perform transformation with approach depending on type of geometry
        if isinstance(geometry, Point):
            # Perform transformation
            local_coords = Point(utm_transformer.transform(long, lat))

        elif isinstance(geometry, LineString):
            xx, yy = geometry.coords.xy
            xx_utm, yy_utm = utm_transformer.transform(xx, yy)
            local_coords = LineString([xy for xy in zip(xx_utm, yy_utm)])

        return local_coords, epsg_code

    except (ValueError, NotImplementedError) as e:
        print(f"Error transforming {geometry}: {e}")
        return None, None

In [119]:
def buffer_points_in_utm(
    points: gpd.GeoSeries, buffer_dist: int, polygon_type: str = "square"
) -> gpd.GeoSeries:
    """
    Creates a Polygon from Point coordinates, by buffering according to the
    desired radius.

    Args:
        points (GeoSeries): A GeoSeries with all the points that should be buffered
            into Polygons.
        buffer_dist (int): Buffer radius expressed in kilometers.
        polygon_type (str): The shape of the buffered Polygon. Can be any of
            ['square', 'round', 'flat'].

    Returns:
        utm_coords_buff (GeoSeries): The buffered points.
    """
    
    assert polygon_type in [
        "square",
        "round",
        "flat",
    ], "polygon_type must be one of ['square', 'round', 'flat']"

    # Buffer array of Points into the chosen size and type
    utm_coords_buff = shapely.buffer(points, buffer_dist, cap_style=polygon_type)

    return utm_coords_buff

In [120]:
def reproject_to_global(polygon: Polygon, epsg_code: str) -> Polygon:
    """
    Takes a Polygon defined by local UTM coordinates and reprojects it to global
    EPSG 4326 coordinates.

    Args:
        polygon (Polygon): The buffered site that should be reprojected.
        epsg_code (str): The local EPSG code for this tranformation.

    Returns:
        global_polygon (Polygon): The buffered site in global coordinates.
    """

    # Initialize Transformer object
    global_transformer = Transformer.from_crs(epsg_code, "EPSG:4326", always_xy=True)

    # Get the coordinates of the Polygon and perform transformation
    xx, yy = polygon.exterior.coords.xy
    xx, yy = global_transformer.transform(xx=xx, yy=yy)

    # Create a new Polygon from the reprojected coordinates
    global_coords = [xy for xy in zip(xx, yy)]
    global_polygon = shapely.Polygon(global_coords)

    return global_polygon

In [132]:
def run_site_buffering(
    gdf: gpd.GeoDataFrame, buffer_dist: List[int]
) -> gpd.GeoDataFrame:
    """
    Runs a sequence of functions to create polygons of different sizes from
    point coordinates representing different sampling sites. The coordinates
    a first projected from global EPSG 4326 to local UTM format. They are then
    buffered into polygons. Finally, the polygon coordinates are reprojected
    into the global format.

    Args:
        gdf (gpd.GeoDataFrame): Dataframe containing coordinates of sites.
        buffer_distances (list): Radii of the output polygons.

    Returns:
        gdf (gpd.GeoDataFrame): Updated dataframe with transformed coordinates
            and polygons in both local and global formats.
    """

    # Rename geometry column for clarity, since there will be multiple ones added
    gdf = gdf.rename(columns={"geometry": "global_coord"})

    # Project each Point to UTM and return coords + EPSG codes
    gdf[["utm_coord", "epsg_code"]] = gdf.apply(
        lambda row: project_to_local_utm(row["global_coord"]),
        axis=1,
        result_type="expand",
    )

    # Buffer different sized polygons and append as new columns
    for dist in buffer_dist:
        gdf[f"utm_{dist}km"] = buffer_points_in_utm(gdf["utm_coord"], dist * 1000)

    # Reproject the polygons to global coordinate format
    for dist in buffer_dist:
        gdf[f"glob_{dist}km"] = gdf.apply(
            lambda row: reproject_to_global(row[f"utm_{dist}km"], row["epsg_code"]),
            axis=1,
        )

    return gdf

In [146]:
def save_buffered_site_files(gdf, buffer_dist):
    """Saves the global polygons of each buffer radius into a separate Shapefile."""

    for dist in buffer_dist:
        gdf_res = gdf[["SSBS", f"glob_{dist}km"]].rename(
            columns={f"glob_{dist}km": "geometry"}
        )
        gdf_res.to_file(
            f"../../data/PREDICTS/site_coordinates/site_coord_buff_{buffer_dist}km.shp"
        )

### Load sampling site data and created buffered polygons

In [277]:
gdf_sites = gpd.read_file("../../data/PREDICTS/site_coordinates/site_coord.shp")
gdf_sites.head()

Unnamed: 0,SSBS,UN_region,geometry
0,AD1_2001__Liow 1 1,Asia,POINT (103.77861 1.35194)
1,AD1_2001__Liow 1 2,Asia,POINT (103.80806 1.35472)
2,AD1_2001__Liow 1 3,Asia,POINT (103.81167 1.39472)
3,AD1_2001__Liow 1 4,Asia,POINT (103.78722 1.32694)
4,AD1_2001__Liow 1 5,Asia,POINT (103.80361 1.28278)


In [140]:
# Set buffer distances
buffer_dist_km = [1, 10, 50]

In [314]:
start = time.time()

# Run the projection-buffering-reprojection procedure
gdf_sites = run_site_buffering(gdf_sites, buffer_dist_km)

# Saved one shapefile for each buffer radius
save_buffered_site_files(gdf_sites, buffer_dist_km)

end = time.time()
runtime = str(timedelta(seconds=end - start))
print(runtime)

0:04:38.932173


In [233]:
gdf_sites.head(3)

Unnamed: 0,SSBS,UN_region,global_coord,utm_coord,epsg_code,utm_1km,utm_10km,utm_50km,glob_1km,glob_10km,glob_50km
0,AD1_2001__Liow 1 1,Asia,POINT (103.77861 1.35194),POINT (364117.2270177193 149464.9354869793),EPSG:32648,"POLYGON ((365117.2270177193 150464.9354869793,...","POLYGON ((374117.2270177193 159464.9354869793,...","POLYGON ((414117.2270177193 199464.9354869793,...","POLYGON ((103.78759 1.36099, 103.78760 1.34290...","POLYGON ((103.86844 1.44244, 103.86852 1.26153...","POLYGON ((104.22784 1.80445, 104.22812 0.89981..."
1,AD1_2001__Liow 1 2,Asia,POINT (103.80806 1.35472),POINT (367393.6320973808 149770.4037510926),EPSG:32648,"POLYGON ((368393.6320973808 150770.4037510926,...","POLYGON ((377393.6320973808 159770.4037510926,...","POLYGON ((417393.6320973808 199770.4037510926,...","POLYGON ((103.81704 1.36377, 103.81705 1.34568...","POLYGON ((103.89789 1.44522, 103.89797 1.26431...","POLYGON ((104.25729 1.80723, 104.25757 0.90258..."
2,AD1_2001__Liow 1 3,Asia,POINT (103.81167 1.39472),POINT (367797.6346832808 154192.390200358),EPSG:32648,"POLYGON ((368797.6346832808 155192.390200358, ...","POLYGON ((377797.6346832808 164192.390200358, ...","POLYGON ((417797.6346832808 204192.390200358, ...","POLYGON ((103.82065 1.40377, 103.82066 1.38568...","POLYGON ((103.90150 1.48522, 103.90158 1.30431...","POLYGON ((104.26091 1.84723, 104.26119 0.94258..."


### Extraction of raster data based on polygons

- TODO: Separate non-overlapping and overlapping polygons. Process the first group using pygeoprocessing. See: https://stackoverflow.com/questions/47471872/find-non-overlapping-polygons-in-geodataframe

In [None]:
def calculate_raster_stats(
    polygon_path: str, raster_path: str, metric: List[str] = ["mean"], bool: include_all_pixels = True
) -> List[float]:
    """
    Computes statistical metrics for raster pixels that overlap with polygons that
    should be analyzed. Uses the zonal_stats method from the rasterstats package.

    Args:
        polygon_path (str): Path to the polygon shapefile defining the areas for analysis.
        raster_path (str): Path to the raster file containing the data to be analyzed.
        metric (List[str]): The statistical metrics to compute (e.g. 'mean', 'sum'). 
        include_all_pixels (bool): Whether to include all pixels that touch the polygon 
            boundaries, or just pixels whose center points fall within the polygon.

    Returns:
        result (list): Computed statistical values, one for each polygon in the shapefile.
    """

    # Calculate zonal statistics
    stats = zonal_stats(
        vectors=polygon_path,
        raster=raster_path,
        stats=metric,
        all_touched=include_all_pixels,
    )

    # Extract stats from each dictionary in the output list
    result = [x[metric] for x in stats]

    return result

In [268]:
def run_raster_extraction(gdf, polygon_paths, raster_paths, res_col_names):
    """
    Runs the calculation / extraction of statistics from one or several pairs
    of overlapping raster datasets and polygon shapefiles. It's assumed that
    every combination of raster paths and polygon paths should be processed.
    
    Args:
        gdf (gpd.GeoDataFrame): 
        polygon_paths (List[str]):
        raster_paths (List[str]):
        res_col_names (List[str]):
    
    Returns:
        gdf_res (GeoDataFrame):
    """

    gdf_res = gdf[["SSBS", "UN_region", "global_coord"]].copy()

    i = 0
    for polygon_path in polygon_paths:
        for raster_path in pop_density_paths:
            start = time.time()

            result = calculate_raster_stats(
                polygon_path, raster_path, metric="mean", include_all_pixels=True
            )
            gdf_res.loc[:, res_col_names[i]] = result
            i += 1

            end = time.time()
            runtime = str(timedelta(seconds=end - start))
            print(runtime)

    return gdf_res

### Extract population density data from Gridded Population of the World (GPW), v4

https://sedac.ciesin.columbia.edu/data/set/gpw-v4-population-density-adjusted-to-2015-unwpp-country-totals-rev11

Unit: Number of people per square kilometer.

In [158]:
pop_density_paths = [
    "../../data/GPW/gpw_v4_2000_30_sec.tif",
    "../../data/GPW/gpw_v4_2005_30_sec.tif",
    "../../data/GPW/gpw_v4_2010_30_sec.tif",
    "../../data/GPW/gpw_v4_2015_30_sec.tif",
    "../../data/GPW/gpw_v4_2020_30_sec.tif",
]

polygon_paths = [
    "../../data/PREDICTS/site_coordinates/site_coord_buff_1km.shp",
    "../../data/PREDICTS/site_coordinates/site_coord_buff_10km.shp",
    "../../data/PREDICTS/site_coordinates/site_coord_buff_50km.shp",
]

res_col_names = [
    "_".join(combination)
    for combination in product(
        ["Pop_density"],
        ["1km", "10km", "50km"],
        ["2000", "2005", "2010", "2015", "2020"],
    )
]

In [4]:
res_col_names = [
    "_".join(combination)
    for combination in product(
        ["Pop_density"],
        ["1km", "10km", "50km"],
        ["2000", "2005", "2010", "2015", "2020"],
    )
]
res_col_names

['Pop_density_1km_2000',
 'Pop_density_1km_2005',
 'Pop_density_1km_2010',
 'Pop_density_1km_2015',
 'Pop_density_1km_2020',
 'Pop_density_10km_2000',
 'Pop_density_10km_2005',
 'Pop_density_10km_2010',
 'Pop_density_10km_2015',
 'Pop_density_10km_2020',
 'Pop_density_50km_2000',
 'Pop_density_50km_2005',
 'Pop_density_50km_2010',
 'Pop_density_50km_2015',
 'Pop_density_50km_2020']

In [269]:
gdf_pop_density = run_raster_extraction(
    gdf_sites, polygon_paths, pop_density_paths, res_col_names
)

0:01:34.456221
0:01:31.131906
0:01:29.986614
0:01:29.809366
0:01:30.760568
0:06:31.017836
0:06:29.810487
0:06:30.095684
0:04:52.414137
0:04:27.059541
0:19:55.875813
0:20:14.489325
0:20:15.382683
0:20:30.425757
1:50:05.641577


In [270]:
gdf_pop_density.head()

Unnamed: 0,SSBS,UN_region,global_coord,Pop_density_1km_2000,Pop_density_1km_2005,Pop_density_1km_2010,Pop_density_1km_2015,Pop_density_1km_2020,Pop_density_10km_2000,Pop_density_10km_2005,Pop_density_10km_2010,Pop_density_10km_2015,Pop_density_10km_2020,Pop_density_50km_2000,Pop_density_50km_2005,Pop_density_50km_2010,Pop_density_50km_2015,Pop_density_50km_2020
0,AD1_2001__Liow 1 1,Asia,POINT (103.77861 1.35194),8665.053819,10686.046875,12856.830729,14912.848958,16510.614583,6093.886228,6958.095808,7791.201098,8467.353293,8861.665669,933.654075,1093.091219,1268.813392,1454.161213,1641.065066
1,AD1_2001__Liow 1 2,Asia,POINT (103.80806 1.35472),1131.090088,1245.123128,1336.200765,1381.482259,1362.475993,7262.850962,8273.240919,9271.036325,10134.848291,10751.494658,940.373195,1101.212981,1278.487799,1465.453558,1653.969848
2,AD1_2001__Liow 1 3,Asia,POINT (103.81167 1.39472),4979.729167,5457.96875,5831.039931,6000.927951,5890.359375,6354.087041,7373.591876,8452.087041,9503.366538,10438.625725,898.962319,1050.302434,1216.123064,1389.497541,1562.316572
3,AD1_2001__Liow 1 4,Asia,POINT (103.78722 1.32694),4332.883681,4888.942708,5376.256944,5694.303819,5751.630208,6440.715232,7292.890728,8099.86755,8738.147903,9092.586093,951.244901,1114.757922,1295.39474,1486.577678,1680.202951
4,AD1_2001__Liow 1 5,Asia,POINT (103.80361 1.28278),8465.158203,9318.025391,9998.738932,10336.432943,10192.90625,6748.165722,7515.062323,8172.850567,8578.686969,8606.26983,979.929484,1149.967869,1338.408612,1538.764551,1742.833948


In [273]:
# Save the dataframe as a file
df_pop_density = pd.DataFrame(gdf_pop_density.drop(["global_coord", "UN_region"]))
df_pop_density.to_parquet("../../data/GPW/output/pop_density.parquet")

In [274]:
# Try reading the same file
df_pop_test = pd.read_parquet("../../data/GPW/output/pop_density.parquet")
df_pop_test.head()

Unnamed: 0,SSBS,Pop_density_1km_2000,Pop_density_1km_2005,Pop_density_1km_2010,Pop_density_1km_2015,Pop_density_1km_2020,Pop_density_10km_2000,Pop_density_10km_2005,Pop_density_10km_2010,Pop_density_10km_2015,Pop_density_10km_2020,Pop_density_50km_2000,Pop_density_50km_2005,Pop_density_50km_2010,Pop_density_50km_2015,Pop_density_50km_2020
0,AD1_2001__Liow 1 1,8665.053819,10686.046875,12856.830729,14912.848958,16510.614583,6093.886228,6958.095808,7791.201098,8467.353293,8861.665669,933.654075,1093.091219,1268.813392,1454.161213,1641.065066
1,AD1_2001__Liow 1 2,1131.090088,1245.123128,1336.200765,1381.482259,1362.475993,7262.850962,8273.240919,9271.036325,10134.848291,10751.494658,940.373195,1101.212981,1278.487799,1465.453558,1653.969848
2,AD1_2001__Liow 1 3,4979.729167,5457.96875,5831.039931,6000.927951,5890.359375,6354.087041,7373.591876,8452.087041,9503.366538,10438.625725,898.962319,1050.302434,1216.123064,1389.497541,1562.316572
3,AD1_2001__Liow 1 4,4332.883681,4888.942708,5376.256944,5694.303819,5751.630208,6440.715232,7292.890728,8099.86755,8738.147903,9092.586093,951.244901,1114.757922,1295.39474,1486.577678,1680.202951
4,AD1_2001__Liow 1 5,8465.158203,9318.025391,9998.738932,10336.432943,10192.90625,6748.165722,7515.062323,8172.850567,8578.686969,8606.26983,979.929484,1149.967869,1338.408612,1538.764551,1742.833948


### Calculation of road density inside polygons

- TODO: Can this be sped up by splitting the road files into smaller chunks? Or by evaluating only a subset of road linestrings for each polygon?

In [390]:
def split_multi_line_strings(road_linestrings):

    result = []
    for geometry in road_linestrings:
        if isinstance(geometry, MultiLineString):
            split_string = [LineString(string) for string in geometry.geoms]
            result += split_string
        elif isinstance(geometry, LineString):
            result.append(geometry)
        else:
            continue

    return gpd.GeoDataFrame(geometry=result)

In [401]:
def intersect_sites_and_roads(site_polygons, gdf_roads):

    # Extract geoseries containing road linestrings
    road_linestrings = MultiLineString(gdf_roads["utm_coord"].tolist())

    # List for storing results
    site_road_len = []

    # Iterate through every site polygon of this size
    for polygon in site_polygons:
        # Calculate intersection between site polygon and all road linestrings
        intersect_len = shapely.intersection(polygon, road_linestrings).length
        site_road_len.append(intersect_len)

    return site_road_len

In [402]:
def run_road_density_extraction(gdf_sites, gdf_roads, buffer_dist):

    # Check if rows contain MultiLineStrings and split where needed
    gdf_roads = split_multi_line_strings(gdf_roads["geometry"])
    gdf_roads = gdf_roads.rename(columns={"geometry": "global_coord"})

    # Project each linestring to local UTM coordinates
    gdf_roads[["utm_coord", "epsg_code"]] = gdf_roads.apply(
        lambda row: project_to_local_utm(row["global_coord"]),
        axis=1,
        result_type="expand",
    )

    # Iterate through each buffer radius column in the site dataframe
    df_result = pd.DataFrame(gdf_sites["SSBS"].copy())
    for dist in buffer_dist:
        site_polygons = gdf_sites[f"utm_{dist}km"]
        result = intersect_sites_and_roads(site_polygons, gdf_roads)
        df_result[f"Road_density_{dist}km"] = result

    return df_result

### Calculate road density from Global Roads Open Access Data Set (gROADS), v1

https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1

One data set available for each continent.

### Metadata description

- `OBJECTID`: Object ID
- `SourceID`: Source ID
- `Picture`: Picture
- `Exs`: Existence Category. Options include 1=Definite, 2=Doubtful, 0=Unspecified
- `Notes`: Notes
- `RoadID`: Road ID
- `ONme`: Official Road Name
- `RteNme`: Route Name
- `NtlClass`: National Inventory Road Class
- `FClass`: Functional Class with options 1=Highway, 2=Primary, 3=Secondary, 4=Tertiary, 5=Local/ Urban, 6=Trail, 7=Private, 0=Unspecified
- `Crgway`: Carriageways. Options include 1=Single, 2=Dual, 0=Unspecified
- `NumLanes`: Number of lanes
- `LneWidthM`: Lane Width in meters
- `RdWidthM`: Road Width in meters
- `AxleLoadMT`: Maximum Axle Loading in MT
- `TotLoadMT`: Maximum Total Loading in MT
- `SrfTpe`: Surface Type with options 1=Paved, 2=Gravel, 3=Dirt/Sand, 4=Steel, 5=Wood, 6=Grass, 0=Unspecified
- `SrfCond`: Surface Condition with options 1=Rough (<40kph), 2=Smooth (>40kph), 3=Snow/Ice, 4=Mud, 0=Unspecified
- `SrfPrep`: Surface Preparation with options 1=Natural Compaction, 2=Traffic Compaction, 3=Engineered Compaction, 4=Uncompacted, 0=Unspecified
- `IsSeasonal`: Affected by Season. Options include 1=Yes, 2=No, 0=Unspecified
- `CurntPrac`: Current Road Practicability with options 1=Non-motorized, 2=Motorbike, 3=4WD <3.5MT, 4=Light Truck <10MT, 5=Heavy Truck <20MT, 6=Truck + Trailer >20MT, 0=Unspecified
- `GdWthrPrac`: Good Weather Road Practicability with options 1=Non-motorized, 2=Motorbike, 3=4WD <3.5MT, 4=Light Truck <10MT, 5=Heavy Truck <20MT, 6=Truck + Trailer >20MT, 0=Unspecified
- `BdWthrPrac`: Bad Weather Road Practicability. Options include 1=Non-motorized, 2=Motorbike, 3=4WD <3.5MT, 4=Light Truck <10MT, 5=Heavy Truck <20MT, 6=Truck + Trailer >20MT, 0=Unspecified
- `SpeedLimit`: Speed Limit in Km/hr
- `CurntSpeed`: Current Average Speed
- `GnralSpeed`: General Average Speed
- `IsUndrCstr`: Is under Construction / Repairs. Options include 1=Yes, 2=No, 0=Unspecified
- `CstWrkETC`: Construction Work Estimated Completion Date
- `GradDeg`: Gradient in degrees
- `Sec`: Road Security Category. Options include 1=Category A (low risk), 2=Category B (low to medium risk), 3=Category C (medium to high risk), 4=Category D (high risk), 5=Category E (critical risk), 0=Unspecified
- `HasShouldr`: Has Shoulder. Options include 1=Yes, 2=No, 0=Unspecified
- `HasSidewalk`: Has Sidewalk. Options include 1=Yes, 2=No, 0=Unspecified
- `DrivSide`: Driving Side. Options include 1=Left, 2=Right, 0=Unspecified
- `IsElevated`: Is elevated / suspended above ground/water. Options include 1=Yes, 2=No, 0=Unspecified
- `HasMedian`: Has Median. Options include 1=Yes, 2=No, 0=Unspecified
- `OpStatus`: Operational Status. Options include 1=Open, 2=Restricted, 3=Closed, 4=Abandoned/Disused, 0=Unspecified
- `Shape_Length`: Length of segment
- `Length_KM`: Length of segment in kilometers



In [282]:
# Merge the two Oceania files
oceania_e = gpd.read_file("../../data/gROADS/oceania-east/groads-v1-oceania-east.shp")
oceania_w = gpd.read_file("../../data/gROADS/oceania-west/groads-v1-oceania-west.shp")

oceania = pd.concat([oceania_e, oceania_w], ignore_index=True)
oceania.to_file("../../data/gROADS/oceania/groads-v1-oceania.shp")

In [406]:
road_paths = [
    "../../data/gROADS/oceania/groads-v1-oceania.shp",
    "../../data/gROADS/europe/groads-v1-europe.shp",
    "../../data/gROADS/africa/groads-v1-africa.shp",
    "../../data/gROADS/asia/groads-v1-asia.shp",
    "../../data/gROADS/americas/groads-v1-americas.shp",
]

un_regions = [
    "Oceania",
    "Europe",
    "Africa",
    "Asia",
    "Americas",
]

region_path_tuples = list(zip(un_subregions, road_paths))

In [288]:
# Set buffer distances
buffer_dist_km = [1, 10, 50]

In [407]:
df_road_density = pd.DataFrame()

for region, path in region_path_tuples:
    start = time.time()

    gdf_roads = gpd.GeoDataFrame(gpd.read_file(path)["geometry"])
    gdf_sites_loc = gdf_sites[gdf_sites["UN_region"] == region].copy()

    end = time.time()
    runtime = str(timedelta(seconds=end - start))
    print(f"Data for {region} loaded: {runtime}")

    start = time.time()

    df_result = run_road_density_extraction(gdf_sites_loc, gdf_roads, buffer_dist_km)
    end = time.time()
    runtime = str(timedelta(seconds=end - start))
    print(f"Data for {region} processed: {runtime}")

    df_road_density = pd.concat([df_road_density, df_result], ignore_index=True)

Data for Oceania loaded: 0:00:28.984243
Data for Oceania processed: 0:02:46.307334
Data for Europe loaded: 0:02:08.633548
Data for Europe processed: 1:09:31.489820
Data for Africa loaded: 0:02:33.634954
Data for Africa processed: 2:29:12.171623
Data for Asia loaded: 0:50:29.416524
Data for Asia processed: 4:26:11.335569
Data for Americas loaded: 0:01:58.753832
Data for Americas processed: 1:49:49.241273


In [408]:
df_road_density

Unnamed: 0,SSBS,Road_density_1km,Road_density_10km,Road_density_50km
0,AD1_2005__Blanche 1 1,2081.750094,190364.497423,2.084120e+06
1,AD1_2005__Blanche 1 2,1119.810940,187993.994423,2.072447e+06
2,AD1_2005__Blanche 1 3,0.000000,98624.640142,1.935901e+06
3,AD1_2005__Blanche 1 4,1186.374931,86832.033723,1.826060e+06
4,AD1_2005__Blanche 1 5,0.000000,68115.435147,1.757949e+06
...,...,...,...,...
35731,VK1_2013__ABMIboreal 7 5,0.000000,56520.132017,2.728339e+06
35732,VK1_2013__ABMIboreal 7 6,0.000000,73489.845850,2.718735e+06
35733,VK1_2013__ABMIboreal 7 7,2009.314410,163825.131491,2.960971e+06
35734,VK1_2013__ABMIboreal 7 8,2201.724782,87344.291912,2.803324e+06


In [409]:
# Save the dataframe as a file
df_road_density.to_parquet("../../data/gROADS/output/road_density.parquet")

In [410]:
# Try reading the same file
df_road_test = pd.read_parquet("../../data/gROADS/output/road_density.parquet")
df_road_test.head()

Unnamed: 0,SSBS,Road_density_1km,Road_density_10km,Road_density_50km
0,AD1_2005__Blanche 1 1,2081.750094,190364.497423,2084120.0
1,AD1_2005__Blanche 1 2,1119.81094,187993.994423,2072447.0
2,AD1_2005__Blanche 1 3,0.0,98624.640142,1935901.0
3,AD1_2005__Blanche 1 4,1186.374931,86832.033723,1826060.0
4,AD1_2005__Blanche 1 5,0.0,68115.435147,1757949.0


## WorldClim elevation data

https://worldclim.org/data/worldclim21.html#google_vignette

Same structure and resolution as the population density data. Can use the same approach to extract values for buffered sampling locations.

In [None]:
elevation_path = "../../data/WorldClim/Elevation/wc2.1_30s_elev.tif"
pgp.get_raster_info(elevation_path)

## WorldClim bioclimatic data

https://worldclim.org/data/bioclim.html

In [None]:
bioclim_path = "../../data/WorldClim/Bioclimatic/wc2.1_30s_bio_1.tif"
pgp.get_raster_info(bioclim_path)