In [1]:
from rgispy import snap
import pandas as pd
import geopandas as gpd
import xarray as xr

Goal: snap "real" coordinates to best matching wbm cell at 15min network resolution. 

real_coord -> 15min_cood

In this exammple we know the upstream catchment area for each of our features. For every cell in the WBM network there is also an associated catchment area (labelled "SubbasinArea").


Please note that in a real application you should first snap to the highest resolution network available, then subsequently snap to coarser resolutions. 

real_coord -> 30sec_coord -> 1min_coord -> .... -> 15min_coord

In [2]:
target_network = 'CONUS_Network_HydroSTN30_15min_Static.nc'
real = 'real_life_locations.csv'

In [3]:
real_df = pd.read_csv(real)
real_df = gpd.GeoDataFrame(real_df, geometry=gpd.points_from_xy(x=real_df.lon, y=real_df.lat, crs=4326))
real_df.head()

Unnamed: 0,id,lat,lon,catchment_area,geometry
0,1,42.712,-90.96,22900,POINT (-90.96000 42.71200)
1,2,41.56,-96.685,550,POINT (-96.68500 41.56000)
2,3,38.81,-110.213,7000,POINT (-110.21300 38.81000)
3,4,37.329,-96.858,160000,POINT (-96.85800 37.32900)


In [4]:
network = xr.open_dataset(target_network)
network

In [7]:
def catchment_snap(
    gdf,
    network,
    radius=1,
    tolerance=0.05,
    adjust_outside_tolerance=False,
    target_suffix="_15min",
    source_suffix="_real",
):
    target = (
        "catchment_area",
        "catchment_area",
        network["SubbasinArea"],
    )
    # supplement info from network to append to results
    # None values are values from source data (None in this case)
    supplements = [
        # (name, source value, network values)
        # CellID of network cell
        ("CellID", None, network["ID"]), 
        # Stream Order of network cell
        ( "Order", None, network["Order"],),
    ]
    snap_results, report = snap.snap_gdf(
        gdf,
        target,
        supplement_cols=supplements,
        radius=radius,
        tolerance=tolerance,
        adjust_outside_tolerance=adjust_outside_tolerance,
        target_suffix=target_suffix,
        source_suffix=source_suffix,
    )
    return snap_results, report


snap_15min, report = catchment_snap(real_df, network)


In [8]:
snap_15min

Unnamed: 0,id,lat,lon,catchment_area,xCoord_real,yCoord_real,catchment_area_real,xCoord_15min,yCoord_15min,catchment_area_15min,NetSymmetricDifference,is_naive,CellID_real,CellID_15min,Order_real,Order_15min,geometry
0,1,42.712,-90.96,22900,-90.96,42.712,22900,-90.875,42.625,229515.9062,0.818553,True,,179.0,,4.0,POINT (-90.87500 42.62500)
1,2,41.56,-96.685,550,-96.685,41.56,550,-96.375,41.875,575.4459,0.02261,False,,4378.0,,1.0,POINT (-96.37500 41.87500)
2,3,38.81,-110.213,7000,-110.213,38.81,7000,-110.375,38.875,7175.1196,0.012354,False,,8671.0,,3.0,POINT (-110.37500 38.87500)
3,4,37.329,-96.858,160000,-96.858,37.329,160000,-97.125,37.375,165133.375,0.015789,False,,234.0,,4.0,POINT (-97.12500 37.37500)


A naive locaiton indicates the feature was simply snapped to the cell it overlapped with.

either because:

   * that cell happened to be the best match
   * there were no better cells within the tolerance threshhold and `adjust_outside_tolerance` = False

If a cell was snapped to non-naive location, a better match was found in the neighboring cell radius

In [18]:
snap_15min.is_naive.value_counts()

False    3
True     1
Name: is_naive, dtype: int64

The symmetric difference measures how close the snapped cells target value is to the source value. 

`abs( (x-y) / (x +y) )`

The closer to zero, the better the match. Our first feature in this case could not find a neighboring cell within the 5% tolerance. The other three features found appropriate matches!


In [23]:
snap_15min[['id', 'catchment_area_real', 'catchment_area_15min', 'NetSymmetricDifference', 'is_naive']]

Unnamed: 0,id,catchment_area_real,catchment_area_15min,NetSymmetricDifference,is_naive
0,1,22900,229515.9062,0.818553,True
1,2,550,575.4459,0.02261,False
2,3,7000,7175.1196,0.012354,False
3,4,160000,165133.375,0.015789,False


Because we are snapping to such a coarse resolution, even moving adjust over by cell is a large physical distance. 

In [24]:
report

{'count': 4,
 'count_adjusted': 3,
 'count_outside_tolerance': 1,
 'count_outside_tolerance_catchment_area': 1,
 'catchment_area_symdif_mean': 0.21732639517248334,
 'catchment_area_symdif_min': 0.012354012166500521,
 'catchment_area_symdif_25%': 0.014929893517149214,
 'catchment_area_symdif_50%': 0.019199068482384132,
 'catchment_area_symdif_75%': 0.22159557013771827,
 'catchment_area_symdif_max': 0.8185534315586646,
 'catchment_area_corr': 0.48341441075961605,
 'snap_distance_km_mean': 23.847885688087025,
 'snap_distance_km_std': 14.049317945265862,
 'snap_distance_km_min': 11.91458876529748,
 'snap_distance_km_25%': 14.834318135297892,
 'snap_distance_km_50%': 20.004142761491103,
 'snap_distance_km_75%': 29.017710314280237,
 'snap_distance_km_max': 43.46866846406842}