Skip to content
This repository has been archived by the owner on Apr 8, 2021. It is now read-only.

Commit

Permalink
Added waterbody extract, added more flowline join utils
Browse files Browse the repository at this point in the history
  • Loading branch information
brendan-ward committed Dec 5, 2019
1 parent 014c1d5 commit 660dc16
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 54 deletions.
17 changes: 17 additions & 0 deletions nhdnet/geometry/points.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@
from shapely.geometry import Point


def to2D(geometry):
"""Flatten a 3D point to 2D.
Parameters
----------
geometry : Point
Input 3D geometry
Returns
-------
Point
Output 2D geometry
"""

return Point(geometry.x, geometry.y)


def create_points(df, x_column, y_column, crs):
"""Create a GeoDataFrame from pandas DataFrame
Expand Down
21 changes: 0 additions & 21 deletions nhdnet/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,29 +188,8 @@ def to_shp(df, path):
# Drop any records with missing geometries
df = df.loc[~df[geom_col].isnull()].copy()

# Convert data types to those supported by shapefile
for c in [c for c, t in df.dtypes.items() if t == "uint64"]:
df[c] = df[c].astype("float64")

df.to_file(path)

### Original implementation, now slower than geopandas to_file()
# geometry = df[geom_col].apply(mapping)
# # fill missing data with None and convert to dict
# props = df.drop(columns=[df._geometry_column_name])
# props.replace({c: {np.nan: None} for c in prop_cols}, inplace=True)
# props = props.apply(lambda row: row.to_dict(), axis=1)
# # Convert features to JSON
# features = DataFrame({"geometry": geometry, "properties": props})
# features["type"] = "Feature"
# features = features.apply(lambda row: row.to_dict(), axis=1)
# schema = infer_schema(df)
# with fiona.Env():
# with fiona.open(
# path, "w", driver="ESRI Shapefile", crs=df.crs, schema=schema
# ) as writer:
# writer.writerecords(features)


def serialize_sindex(df, path):
"""Serialize the bounding coordinates necessary to recreate a spatial index
Expand Down
66 changes: 57 additions & 9 deletions nhdnet/nhd/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,24 @@
import geopandas as gp
import pandas as pd
from shapely.geometry import MultiLineString
from nhdnet.geometry.lines import to2D, calculate_sinuosity
from nhdnet.geometry.lines import to2D as line2D, calculate_sinuosity
from nhdnet.geometry.polygons import to2D as poly2D
from nhdnet.nhd.joins import index_joins, find_joins

FLOWLINE_COLS = ["NHDPlusID", "FlowDir", "FType", "GNIS_ID", "GNIS_Name", "geometry"]

# TODO: add elevation gradient info
VAA_COLS = ["NHDPlusID", "StreamOrde", "StreamCalc", "TotDASqKm"]

WATERBODY_COLS = ["NHDPlusID", "FType", "AreaSqKm", "geometry"]


def extract_flowlines(gdb_path, target_crs, extra_flowline_cols=[]):
"""
Extracts data from NHDPlusHR data product.
Extract flowlines, join to VAA table, and filter out any loops and coastlines.
Extract joins between flowlines, and filter out any loops and coastlines.
Extracts flowlines data from NHDPlusHR data product.
Extract flowlines from NHDPlusHR data product, joins to VAA table,
and filters out coastlines.
Extracts joins between flowlines, and filters out coastlines.
Parameters
----------
Expand All @@ -50,7 +54,7 @@ def extract_flowlines(gdb_path, target_crs, extra_flowline_cols=[]):
Returns
-------
type of geopandas.GeoDataFrame
tuple of (GeoDataFrame, DataFrame)
(flowlines, joins)
"""

Expand Down Expand Up @@ -88,9 +92,6 @@ def extract_flowlines(gdb_path, target_crs, extra_flowline_cols=[]):
join_df.upstream = join_df.upstream.astype("uint64")
join_df.downstream = join_df.downstream.astype("uint64")

# build join index
joins = index_joins(join_df)

### Label loops for easier removal later, if we need to
# WARNING: loops may be very problematic from a network processing standpoint.
# Include with caution.
Expand Down Expand Up @@ -159,7 +160,7 @@ def extract_flowlines(gdb_path, target_crs, extra_flowline_cols=[]):

# Convert incoming data from XYZM to XY
print("Converting geometry to 2D")
df.geometry = df.geometry.apply(to2D)
df.geometry = df.geometry.apply(line2D)

print("projecting to target projection")
df = df.to_crs(target_crs)
Expand All @@ -177,3 +178,50 @@ def extract_flowlines(gdb_path, target_crs, extra_flowline_cols=[]):

return df, join_df


def extract_waterbodies(gdb_path, target_crs, exclude_ftypes=[], min_area=0):
"""Extract waterbodies from NHDPlusHR data product.
Parameters
----------
gdb_path : str
path to the NHD HUC4 Geodatabase
target_crs: GeoPandas CRS object
target CRS to project NHD to for analysis, like length calculations.
Must be a planar projection.
exclude_ftypes : list, optional (default: [])
list of FTypes to exclude.
min_area : int, optional (default: 0)
If provided, only waterbodies that are >= this value are retained
Returns
-------
GeoDataFrame
"""
print("Reading waterbodies")
df = gp.read_file(gdb_path, layer="NHDWaterbody")[WATERBODY_COLS]
print("Read {:,} waterbodies".format(len(df)))

df = df.loc[
(df.AreaSqKm >= min_area) & (~df.FType.isin(exclude_ftypes))
].reset_index(drop=True)
print(
"Retained {:,} waterbodies after dropping those below size threshold or in exclude FTypes".format(
len(df)
)
)

print("Converting geometry to 2D")
df.geometry = df.geometry.apply(poly2D)

print("projecting to target projection")
df = df.to_crs(target_crs)

df.NHDPlusID = df.NHDPlusID.astype("uint64")
df.AreaSqKm = df.AreaSqKm.astype("float32")
df.FType = df.FType.astype("uint16")

### Add calculated fields
df["wbID"] = df.index.values.astype("uint32") + 1

return df
118 changes: 94 additions & 24 deletions nhdnet/nhd/joins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def find_join(df, id, upstream="upstream", downstream="downstream"):
def find_join(df, id, downstream_col="downstream", upstream_col="upstream"):
"""Find the joins for a given segment id in a joins table.
Parameters
Expand All @@ -7,19 +7,19 @@ def find_join(df, id, upstream="upstream", downstream="downstream"):
data frame containing the joins
id : any
id to lookup in upstream or downstream columns
upstream : str, optional (default "upstream")
name of upstream column
downstream : str, optional (default "downstream")
downstream_col : str, optional (default "downstream")
name of downstream column
upstream_col : str, optional (default "upstream")
name of upstream column
Returns
-------
Joins that have the id as an upstream or downstream.
"""
return df.loc[(df[upstream] == id) | (df[downstream] == id)]
return df.loc[(df[upstream_col] == id) | (df[downstream_col] == id)]


def find_joins(df, ids, upstream="upstream", downstream="downstream"):
def find_joins(df, ids, downstream_col="downstream", upstream_col="upstream"):
"""Find the joins for a given segment id in a joins table.
Parameters
Expand All @@ -28,26 +28,19 @@ def find_joins(df, ids, upstream="upstream", downstream="downstream"):
data frame containing the joins
ids : list-like
ids to lookup in upstream or downstream columns
upstream : str, optional (default "upstream")
name of upstream column
downstream : str, optional (default "downstream")
downstream_col : str, optional (default "downstream")
name of downstream column
upstream_col : str, optional (default "upstream")
name of upstream column
Returns
-------
Joins that have the id as an upstream or downstream.
"""
return df.loc[(df[upstream].isin(ids)) | (df[downstream].isin(ids))]

return df.loc[(df[upstream_col].isin(ids)) | (df[downstream_col].isin(ids))]

# def find_singles(df, upstream="upstream", downstream="downstream"):
# return df.loc[
# ((df[upstream] == 0) & (df[downstream != 0]))
# | ((df[downstream] == 0) & (df[upstream] != 0))
# ]


def index_joins(df, upstream="upstream", downstream="downstream"):
def index_joins(df, downstream_col="downstream", upstream_col="upstream"):
"""Create an index of joins based on a given segment id.
Returns a dataframe indexed by id, listing the id of the next
segment upstream and the id of the next segment downstream.
Expand All @@ -61,22 +54,99 @@ def index_joins(df, upstream="upstream", downstream="downstream"):
----------
df : DataFrame
data frame containing the joins.
upstream : str, optional (default "upstream")
upstream column name
downstream : str, optional (default "downstream")
downstream_col : str, optional (default "downstream")
downstream column name
upstream_col : str, optional (default "upstream")
upstream column name
Returns
-------
DataFrame
"""

return (
df.loc[df[downstream] != 0]
.set_index(downstream)[[upstream]]
.join(df.set_index(upstream)[[downstream]])
df.loc[df[downstream_col] != 0]
.set_index(downstream_col)[[upstream_col]]
.join(df.set_index(upstream_col)[[downstream_col]])
.reset_index()
.drop_duplicates()
.set_index("index")
)


def create_upstream_index(
df, downstream_col="downstream", upstream_col="upstream", exclude=None
):
"""Create an index of downstream ids to all their respective upstream ids.
This is so that network traversal can start from a downstream-most segment,
and then traverse upward for all segments that have that as a downstream segment.
Parameters
----------
df : DataFrame
Data frame containing the pairs of upstream_col and downstream_col that
represent the joins between segments.
downstream_col : str, optional (default "downstream")
Name of column containing downstream ids
upstream_col : str, optional (default "upstream")
Name of column containing upstream ids
exclude : list-like, optional (default None)
List-like containing segment ids to exclude from the list of upstreams.
For example, barriers that break the network should be in this list.
Otherwise, network traversal will operate from the downstream-most point
to all upstream-most points, which can be very large for some networks.
Returns
-------
dict
dictionary of downstream_id to the corresponding upstream_id(s)
"""

ix = (df[upstream_col] != 0) & (df[downstream_col] != 0)

if exclude is not None:
ix = ix & (~df[upstream_col].isin(exclude))

# NOTE: this looks backward but is correct for the way that grouping works.
return (
df[ix, [downstream_col, upstream_col]]
.set_index(upstream_col)
.groupby(downstream_col)
.groups
)


def remove_joins(df, ids, downstream_col="downstream", upstream_col="upstream"):
"""Remove any joins to or from ids.
This sets any joins that terminate downstream to one of these ids to 0 in order to mark them
as new downstream terminals. A join that includes other downstream ids not in ids will be left as is.
Parameters
----------
df : DataFrame
Data frame containing the pairs of upstream_col and downstream_col that
represent the joins between segments.
ids : list-like
List of ids to remove from the joins
downstream_col : str, optional (default "downstream")
Name of column containing downstream ids
upstream_col : str, optional (default "upstream")
Name of column containing upstream ids
Returns
-------
[type]
[description]
"""
# TODO: fix new dangling terminals? Set to 0 first?
# join_df = join_df.loc[~join_df.upstream.isin(coastline_idx)].copy()

# set the downstream to 0 for any that join coastlines
# this will enable us to mark these as downstream terminals in
# the network analysis later
# join_df.loc[join_df.downstream.isin(coastline_idx), "downstream"] = 0

# drop any duplicates (above operation sets some joins to upstream and downstream of 0)
# join_df = join_df.drop_duplicates()

return df.loc[~(df[upstream_col].isin(ids) | (df[downstream_col].isin(ids)))].copy()

0 comments on commit 660dc16

Please sign in to comment.