Skip to content
This repository has been archived by the owner on Apr 8, 2021. It is now read-only.

Commit

Permalink
Improved join utilities, split NHD waterbody multipolygons and retain…
Browse files Browse the repository at this point in the history
… only the first
  • Loading branch information
brendan-ward committed Dec 6, 2019
1 parent 660dc16 commit 91ba92c
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 11 deletions.
5 changes: 5 additions & 0 deletions nhdnet/nhd/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,11 @@ def extract_waterbodies(gdb_path, target_crs, exclude_ftypes=[], min_area=0):
)
)

# Convert multipolygons to polygons
# those we checked that are true multipolygons are errors
idx = df.loc[df.geometry.type == "MultiPolygon"].index
df.loc[idx, "geometry"] = df.loc[idx].geometry.apply(lambda g: g[0])

print("Converting geometry to 2D")
df.geometry = df.geometry.apply(poly2D)

Expand Down
41 changes: 30 additions & 11 deletions nhdnet/nhd/joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,34 @@ def remove_joins(df, ids, downstream_col="downstream", upstream_col="upstream"):
[type]
[description]
"""
# TODO: fix new dangling terminals? Set to 0 first?
# join_df = join_df.loc[~join_df.upstream.isin(coastline_idx)].copy()

# set the downstream to 0 for any that join coastlines
# this will enable us to mark these as downstream terminals in
# the network analysis later
# join_df.loc[join_df.downstream.isin(coastline_idx), "downstream"] = 0

# drop any duplicates (above operation sets some joins to upstream and downstream of 0)
# join_df = join_df.drop_duplicates()

return df.loc[~(df[upstream_col].isin(ids) | (df[downstream_col].isin(ids)))].copy()
# Update any joins that would have connected to these ids
# on their downstream end
upstreams = df.loc[
(df[downstream_col].isin(ids)) & (df[upstream_col] != 0), upstream_col
]
has_other_joins = df.loc[
df[upstream_col].isin(upstreams) & ~df[downstream_col].isin(ids), upstream_col
]

# new terminals are ones that end ONLY in these ids
new_terminals = upstreams.loc[~upstreams.isin(has_other_joins)]
ix = df.loc[df[upstream_col].isin(new_terminals)].index
df.loc[ix, downstream_col] = 0

# Update any joins that would have connected to these ids
# on their upstream end
downstreams = df.loc[
df[upstream_col].isin(ids) & (df[downstream_col] != 0), downstream_col
]
has_other_joins = df.loc[
df[downstream_col].isin(downstreams) & ~df[upstream_col].isin(ids),
downstream_col,
]
new_terminals = downstreams.loc[~downstreams.isin(has_other_joins)]
ix = df.loc[df[downstream_col].isin(new_terminals)].index
df.loc[ix, upstream_col] = 0

return df.loc[
~(df[upstream_col].isin(ids) | (df[downstream_col].isin(ids)))
].drop_duplicates()

0 comments on commit 91ba92c

Please sign in to comment.