From 91ba92c86378fc0a2d1ea8381ddb23936eb5ad80 Mon Sep 17 00:00:00 2001 From: "Brendan C. Ward" Date: Thu, 5 Dec 2019 22:21:31 -0800 Subject: [PATCH] Improved join utilities, split NHD waterbody multipolygons and retain only the first --- nhdnet/nhd/extract.py | 5 +++++ nhdnet/nhd/joins.py | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/nhdnet/nhd/extract.py b/nhdnet/nhd/extract.py index 11a17b4..e85fb07 100644 --- a/nhdnet/nhd/extract.py +++ b/nhdnet/nhd/extract.py @@ -211,6 +211,11 @@ def extract_waterbodies(gdb_path, target_crs, exclude_ftypes=[], min_area=0): ) ) + # Convert multipolygons to polygons + # those we checked that are true multipolygons are errors + idx = df.loc[df.geometry.type == "MultiPolygon"].index + df.loc[idx, "geometry"] = df.loc[idx].geometry.apply(lambda g: g[0]) + print("Converting geometry to 2D") df.geometry = df.geometry.apply(poly2D) diff --git a/nhdnet/nhd/joins.py b/nhdnet/nhd/joins.py index e3d434a..756bd12 100644 --- a/nhdnet/nhd/joins.py +++ b/nhdnet/nhd/joins.py @@ -138,15 +138,34 @@ def remove_joins(df, ids, downstream_col="downstream", upstream_col="upstream"): [type] [description] """ - # TODO: fix new dangling terminals? Set to 0 first? - # join_df = join_df.loc[~join_df.upstream.isin(coastline_idx)].copy() - # set the downstream to 0 for any that join coastlines - # this will enable us to mark these as downstream terminals in - # the network analysis later - # join_df.loc[join_df.downstream.isin(coastline_idx), "downstream"] = 0 - - # drop any duplicates (above operation sets some joins to upstream and downstream of 0) - # join_df = join_df.drop_duplicates() - - return df.loc[~(df[upstream_col].isin(ids) | (df[downstream_col].isin(ids)))].copy() + # Update any joins that would have connected to these ids + # on their downstream end + upstreams = df.loc[ + (df[downstream_col].isin(ids)) & (df[upstream_col] != 0), upstream_col + ] + has_other_joins = df.loc[ + df[upstream_col].isin(upstreams) & ~df[downstream_col].isin(ids), upstream_col + ] + + # new terminals are ones that end ONLY in these ids + new_terminals = upstreams.loc[~upstreams.isin(has_other_joins)] + ix = df.loc[df[upstream_col].isin(new_terminals)].index + df.loc[ix, downstream_col] = 0 + + # Update any joins that would have connected to these ids + # on their upstream end + downstreams = df.loc[ + df[upstream_col].isin(ids) & (df[downstream_col] != 0), downstream_col + ] + has_other_joins = df.loc[ + df[downstream_col].isin(downstreams) & ~df[upstream_col].isin(ids), + downstream_col, + ] + new_terminals = downstreams.loc[~downstreams.isin(has_other_joins)] + ix = df.loc[df[downstream_col].isin(new_terminals)].index + df.loc[ix, upstream_col] = 0 + + return df.loc[ + ~(df[upstream_col].isin(ids) | (df[downstream_col].isin(ids))) + ].drop_duplicates()