In [None]:
# VBET VAllEY BOTTOM MODEL
#     The final output of the script is placed in nhd_dir as "VBET_ValleyBottoms.tif"

In [16]:
if __name__ == '__main__':
    # Import necessary modules
    from shapely.geometry import Point
    from shapely.ops import linemerge
    import rasterio as rio
    import pandas as pd
    import geopandas as gpd
    import gdal, osr
    import numpy as np
    import os, shutil
    import math
    from rasterio.merge import merge as merge_tool
    #import Utilities as utils
    import logging as localLog
    
    
    localLog.basicConfig(level=localLog.INFO)

    overwrite = False
    cleanup = False
    vb_dir = os.path.abspath(r"M:\Data\ValleyBottoms")
    
    #print("Using NHD directory %s" % nhd_dir)
    
    watersheds_dir = os.path.join(vb_dir, "Watersheds")
    
    """Large Slope Threshold: The value that represents the upper limit of slopes that will be included in the valley bottom 
    for the 'large' portions of the network."""
    large_slope_thresh = 2
    """Medium Slope Threshold: The value that represents the upper limit of slopes that will be included in the valley bottom
     for the 'medium' portions of the network."""
    medium_slope_thresh = 5
    """Small Slope Threshold: The value that represents the upper limit of slopes that will be included in the valley bottoms
     for the "small" portions of the network."""
    small_slope_thresh = 22
    
    """High Drainage Area Threshold: The drainage area value in square meters. Streams whose upstream drainage area is greater 
    than this value will be considered the "large" portion of the network, and whose maximum valley bottom width will be 
    represented with the "Large Buffer Size" parameter."""
    high_drainage_area_thresh = 1000000  # (m2)
    """Low Drainage Area Threshold: The drainage area value in square meters. Streams whose upstream drainage area is less 
    than this value will be considered the "small" portion of the network, and whose maximum valley bottom width will be 
    represented with the "Small Buffer Size" parameter. Streams whose upstream drainage area is between the high and low 
    drainage area thresholds will be considered the "medium" portion of the network and their maximum valley bottom width 
    represented by the "Medium Buffer Width" parameter."""
    low_drainage_area_thresh = 40000     # (m2)
    
    #createVBETValleyBottom(vb_dir, watersheds_dir, large_slope_thresh, medium_slope_thresh, small_slope_thresh,
    #                       overwrite=True, cleanup=False)

In [None]:
#def createVBETValleyBottom(indir, watershedsDir, lrgSlopeThresh, medSlopeThresh,
#                           smSlopeThresh, overwrite=False, cleanup=False):
""" Creates a single output valley bottom using the VBET methodology. First iterates watersheds directory 
for each HUC4 watersheds and """
indir=vb_dir
watershedsDir=watersheds_dir
lrgSlopeThresh=large_slope_thresh
medSlopeThresh=medium_slope_thresh
smSlopeThresh=small_slope_thresh

# The final output of the script
vbet_allwatersheds = os.path.join(indir, "VBET_ValleyBottoms.tif")

# Watershed Size Column Name
watershed_col = "TotDASqKm"

if not os.path.exists(vbet_allwatersheds) or overwrite or True:
    localLog.info("\nValley Bottom Raster based on VBET methodology doesn't exist. Beginning creation.\n")
    # Final output file doesn't exist, begin creation

    flow_acc_thresh = 2000  # minimum flow accumulation size to identify stream
    #ValleyBottomRastersPrep.vb_prep(watershedsDir, flow_initiation_threshold=flow_acc_thresh)

    # Need to divide by 1000 because of the PercentRise calculation used in Esri's Slope Determination. Just a component of predictor variables.
    lrgSlopeThresh = lrgSlopeThresh/1000
    medSlopeThresh = medSlopeThresh/1000
    smSlopeThresh = smSlopeThresh/1000

    print(watershedsDir)

    for w_dir in os.listdir(watershedsDir):
        localLog.info("--- BEGINNING ON WATERSHED %s ---" % w_dir)
        watershed_dir = os.path.join(watershedsDir, w_dir)
        for subdir in os.listdir(watershed_dir):
            if "Rasters" in subdir:
                rasters_dir = os.path.join(watershed_dir, subdir)
            if "GDB" in subdir:
                geodatabase = os.path.join(watershed_dir, subdir)

        fac_raster_loc = os.path.join(rasters_dir, "fac.tif")
        preds_dir = os.path.join(rasters_dir, "Predictors")
        intermediate_preds_dir = os.path.join(rasters_dir, "RSAC_Intermediates")
        dem_ras = os.path.join(preds_dir, "elev_meters.tif")
        slope_ras = os.path.join(intermediate_preds_dir, "Slope.tif")

        if not os.path.exists(slope_ras) or not os.path.exists(dem_ras):
            localLog.error("Slope raster does not exist. Run RSAC preprocessing script from ArcGIS python environment.")
            raise Exception

        flowlines_vector = os.path.join(watershed_dir, "NHD_Flowlines_buffered.gpkg")
        flowlines_raster = os.path.join(watershed_dir, "NHD_Flowlines_buffered.tif")
        flowlines_clipped_raster = os.path.join(watershed_dir, "NHD_Flowlines_buffered_clipped.tif")

        if not os.path.exists(flowlines_clipped_raster) or not os.path.exists(flowlines_raster) or overwrite:
            localLog.info("%s doesn't exist. Beginning creation..." % flowlines_clipped_raster)

            localLog.debug("Reading in NHD flowlines feature class from geodatabase...")
            flowlines = gpd.GeoDataFrame.from_file(geodatabase, layer='NHDFlowline')

            raster_crs = getRasterProj4(fac_raster_loc)
            localLog.debug("Reprojecting flowlines dataframe to FAC raster projection...")
            flowlines.to_crs(raster_crs, inplace=True)

            # GET VAA TABLE AND JOIN TO FEATURE CLASS FOR WATERSHED SIZE
            flowlines_vaa = gpd.GeoDataFrame.from_file(geodatabase, layer='NHDPlusFlowlineVAA')

            flowlines = flowlines.merge(flowlines_vaa, on='NHDPlusID')
            
            flowlines = gpd.GeoDataFrame(flowlines)

            # Merge renames geometry to geometry_x. Fix
            flowlines['geometry'] = flowlines['geometry_x']

            # Cleanup flowlines table by removing all columns not geometry
            drop_columns = flowlines.columns.tolist()
            drop_columns.remove('geometry')
            drop_columns.remove(watershed_col)

            # TODO - select only flowlines which are true in-ground streams. Do not include canals, culverts, etc
            #pipline underground  - FCODE 42803, 42804, 42807, 42808, 42812...

            flowlines.drop(drop_columns, axis=1, inplace=True)

            # Create new column 'fac_poly' (Flow accumulation poly) and calculate the single point geometry 
            #     of the vertex. This is used to extract the flow accumulation value of the line string. 
            localLog.debug("Finding node on flowline and buffering...")

            # get resolution
            with rio.open(fac_raster_loc) as ras:
                res_x, res_y = ras.res

            #pixel X number in km
            ratio_x = 1000 / res_x
            ratio_y = 1000 / res_y

            flowlines["TotDASq_m"] = flowlines[watershed_col] * ratio_x * ratio_y

            def calculateBufferSize(da):
                if da <= 1:
                    buff_size = 1
                else:
                    buff_size = math.sqrt(da) / (math.log(da, 10) * (4 / 3))

                return buff_size

            def bufferLines(row):
                geom = row.geometry
                buffersize = row.BufferSize
                #fac = row[watershed_col]

                # log of 1 is 0, can't divide by zero. Also, a Flow accumulation value of 1 or zero is a misread, essentially minimum buffer size
                #if fac <= 1:
                #    fac = 2

                """Simple equation which correlates the flow accumulation values (fac), e.g. watershed size,
                to the appropriate valley bottom buffer"""
                #buffersize = math.sqrt(fac) / (math.log(fac, 10) * (4 / 3))

                return geom.buffer(buffersize)


            flowlines["BufferSize"] = flowlines["TotDASq_m"].apply(calculateBufferSize)

            # Buffer each flowline by its watershed size
            flowlines["geometry"] = flowlines.apply(bufferLines, axis=1)

            # Drop the now unused fac_poly column. Cleaner and can't write to file with two geometry columns
            #flowlines.drop(["fac_poly"], axis=1, inplace=True)
            # Write out to shapefile
            localLog.debug("Writing out flowline buffers to file: %s" % flowlines_vector)
            print("Flowline dtypes:\n {}".format(flowlines.dtypes))
    
            flowlines.to_file(flowlines_vector)

            # Rasterize buffered flowlines
            outbuffer_ras = os.path.join(preds_dir, "WatershedBufferSize")
            if not os.path.exists(outbuffer_ras) or overwrite:
                print("Rasterizing %s ..." % flowlines_vector)
                rasterinfo = getResAndExtent(dem_ras) # use dem_ras just for sample
                extent = " ".join(rasterinfo[2:])
                res_x = str(rasterinfo[0])
                res_y = str(rasterinfo[1])
                opts = "-a BufferSize" + " -a_nodata -9999 -tr " + res_x + " " + res_y + " -te " + extent
                # print(opts)
                gdal.Rasterize(outbuffer_ras, flowlines_vector, options=opts)

INFO:root:
Valley Bottom Raster based on VBET methodology doesn't exist. Beginning creation.

INFO:root:--- BEGINNING ON WATERSHED 1407 ---
INFO:root:--- BEGINNING ON WATERSHED 1408 ---
INFO:root:M:\Data\ValleyBottoms\Watersheds\1408\NHD_Flowlines_buffered_clipped.tif doesn't exist. Beginning creation...


M:\Data\ValleyBottoms\Watersheds




Flowline dtypes:
 TotDASqKm     float64
geometry       object
TotDASq_m     float64
BufferSize    float64
dtype: object




In [21]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
def getRasterProj4(raster):
    """ Function returns the projection of the input raster in proj4"""
    fac = gdal.Open(raster)

    ras_proj = fac.GetProjection()
    spatialRef = osr.SpatialReference()

    osr.UseExceptions()
    # Apparently osr has difficulties identifying albers projections
    prjText = ras_proj.replace('"Albers"', '"Albers_Conic_Equal_Area"')
    spatialRef.ImportFromWkt(prjText)
    ras_proj_proj4 = spatialRef.ExportToProj4()
    return ras_proj_proj4

def getRasterTransform(rasterLoc):
    with rio.open(rasterLoc) as raster:
        t = raster.affine
        
    return t


def calculateGeom(row):
    """ Is passed a row containing flowline geometry. Finds the node
    in the geometry which is closest to center and then creates and 
    returns a 5m buffered polygon"""
    geom = row["geometry"]
    if geom.geom_type == "MultiLineString":
        geom = linemerge(geom)
    num_nodes = len(geom.coords)
    if not num_nodes == 2:
        # IF LINESTRING IS NOT COMPOSED OF ONLY TWO NODES, GET THE NODE IN THE MIDDLE MOST OF THE LINE
        point = Point(geom.coords[int(num_nodes/2)])
        bufferSize = 10
    else:
        #print("Two point linestring...")
        # IF TWO POINT LINESTRING, SMALL STREAM ANYWAY. TAKE THE POINT WHICH IS AT THE END OF THE LINE
        point = Point(geom.coords[-1])
        bufferSize = 5
    
    poly = point.buffer(bufferSize)
        
    #points = createPoints(poly, fac_raster_loc)
    
    return poly


def getSnappedPixelLocation(geom_x, geom_y, ras_aff):
    #print("GEOM_X: ", geom_x, "GEOM_Y: ", geom_y)
    """ Returns set of upper-right snapped pixel locations in set as (x, y)"""
    pix_xsize = ras_aff.a
    pix_ysize = ras_aff.e
    #print(pix_xsize, pix_ysize)

    # get pixel coordinates of the geometry's bounding box
    xvals = sorted([geom_x, ras_aff.c])
    yvals = sorted([geom_y, ras_aff.f])
    #print("XVALS: ", xvals)

    diffx = xvals[1] - xvals[0]
    diffy = yvals[1] - yvals[0]
    #print("DIFFS: ", diffx, diffy)

    pixel_xdiff = float("{0:.11f}".format( diffx % pix_xsize ))  # get modulo pixel difference to float precision of 11 decimals
    pixel_ydiff = float("{0:.11f}".format( diffy % pix_ysize ))  # get modulo pixel difference to float precision of 11 decimals
    #print("PIXEL DIFF: ", pixel_xdiff, pixel_ydiff)

    #snapped pixel locations
    if pixel_xdiff > pix_xsize / 2:
        snapped_ulx = geom_x + (pix_xsize - pixel_xdiff)
    else:
        snapped_ulx = geom_x - pixel_xdiff
   
    if abs(pixel_ydiff) > abs(pix_ysize / 2):
        snapped_uly = geom_y + (abs(pix_ysize) + pixel_ydiff)
    else:
        snapped_uly = geom_y - abs(pixel_ydiff)
            
    if snapped_ulx % pix_xsize != ras_aff.c % pix_xsize:
        print(snapped_ulx % pix_xsize)
        raise ValueError("BAD PIXEL VALUE FOR ULX - ", snapped_ulx)
    if snapped_uly % pix_ysize != ras_aff.f % pix_ysize:
        print(snapped_uly % pix_ysize)
        raise ValueError("BAD PIXEL VALUE FOR ULY - ", snapped_uly)
    
    return {"x": snapped_ulx, "y": snapped_uly}


def createPoints(row):
    #print("CREATE POINTS: ", row)
    global rowcount
    rowcount += 1
    if rowcount % 10000 == 0:
        print("Feature #: ", rowcount)
    
    geom_b = row["fac_poly"].bounds

    ul = getSnappedPixelLocation(geom_b[0], geom_b[3], rt)
    lr = getSnappedPixelLocation(geom_b[2], geom_b[1], rt)

    outshape_x = int(abs(lr["x"] - ul["x"]))
    outshape_y = int(abs(ul["y"] - lr["y"]))
    outshapex_inPixels = int(outshape_x/abs(rt.a))
    outshapey_inPixels = int(outshape_y/abs(rt.e))
    
    if outshapex_inPixels == 0 or outshape_y == 0:
        raise ValueError("Snapped bounding box is not correct", 
                         outshapex_inPixels, outshapey_inPixels)
    
    half_x_size = abs(rt.a)/2
    half_y_size = abs(rt.e)/2
    
    polygon_internal_points = []
    polygon_external_points = []
    
    for x in range(outshapex_inPixels):
        pointx = (ul["x"] + half_x_size) + (x * abs(rt.a))
        for y in range(outshapey_inPixels):
            pointy = (ul["y"] - half_y_size) - (y * abs(rt.e))
            point = Point(pointx, pointy)
            
            if point.within(row["fac_poly"]): # IF WITHIN THE BUFFERED POINT
                #print("POINT: ", point)
                #props = {'Type': str(feature["properties"]["Type"]),
                #         'Class': int(feature["properties"]['Class'])}
                #count += 1
                polygon_internal_points.append(point)
                #pointput.write({'geometry': mapping(point), 'properties': props}
            else:
                polygon_external_points.append(point)
                
    if len(polygon_internal_points) != 0:
        return getMaxRasterValues(polygon_internal_points)
    else:
        # IF NONE ARE IN THE BUFFERED AREA, RETURN ALL OF THEM
        #print("NUMBER OF INTERSECTING POINTS IS 0: ", len(polygon_internal_points))
        return getMaxRasterValues(polygon_external_points)
        #print("NUMBER OF POINTS: ", len(polygon_internal_points))
        #raise ValueError("PROBLEM at feature ", row)
    
def getMaxRasterValues(points):
    """ GET LIST OF POINTS AND CALCULATES THE MAXIMUM VALUES OF ALL THE POINST OF THE fac_raster"""
    values = []
    for point in points:
        for val in fac_raster.sample([(point.x, point.y)]):
            values.append(val[0])
        
    return max(values)

def getResAndExtent(raster_file):
    """ RETURN THE RESOLUTION AND EXTENT OF THE RASTER AS A LIST [resx, resy, xmin, ymin, xmax, ymax]"""
    with rio.open(raster_file) as ras:
        ymax = ras.profile['transform'][5]
        xmin = ras.profile['transform'][2]
        height = ras.profile['height']
        width = ras.profile['width']
        resx = ras.profile['transform'][0]
        resy = ras.profile['transform'][4]
        ymin = ymax + (height * resy)
        xmax = xmin + (width * resx)

        return [abs(resx), abs(resy), str(xmin), str(ymin), str(xmax), str(ymax)]
    
def bufferLines(row):
    geom = row.geometry
    fac = row[watershed_col]
    
    # log of 1 is 0, can't divide by zero. Also, a Flow accumulation value of 1 or zero is a misread, essentially minimum buffer size
    if fac <= 1:
        fac = 2

    """Simple equation which correlates the flow accumulation values (fac), e.g. watershed size,
    to the appropriate valley bottom buffer"""
    buffersize = math.sqrt(fac)/(math.log(fac, 10) * (4/3))
    
    return geom.buffer(buffersize)

In [55]:
geodatabase = r"M:\Data\ValleyBottoms\Watersheds\1507\NHDPlus_H_1507_GDB.gdb"
flowlines_vaa = gpd.GeoDataFrame.from_file(geodatabase, layer='NHDPlusFlowlineVAA')
flowlines = gpd.GeoDataFrame.from_file(geodatabase, layer='NHDFlowline')

In [56]:
print(flowlines.shape)

(99931, 19)


In [57]:
flowlines.columns

Index(['Permanent_Identifier', 'FDate', 'Resolution', 'GNIS_ID', 'GNIS_Name',
       'LengthKM', 'ReachCode', 'FlowDir', 'WBArea_Permanent_Identifier',
       'FType', 'FCode', 'MainPath', 'InNetwork', 'VisibilityFilter',
       'Shape_Length', 'NHDPlusID', 'VPUID', 'Enabled', 'geometry'],
      dtype='object')

In [58]:
flowlines = flowlines.merge(flowlines_vaa, on='NHDPlusID')

# Merge renames geometry to geometry_x. Fix
flowlines['geometry'] = flowlines['geometry_x']
#lowlines[watershed_col]

0          0.4726
1        381.7279
2          2.1035
3         46.5598
4         46.1306
5         29.1945
6          0.2047
7          1.3311
8         15.5205
9          0.2725
10         1.5225
11         0.9207
12         0.7097
13         0.0599
14         6.4099
15         6.4071
16         0.2157
17         6.8697
18         2.8771
19         1.2221
20       287.7668
21       288.6652
22       330.3382
23       330.2896
24       714.9244
25       714.8087
26       713.9927
27         0.6057
28         3.1418
29         1.3604
           ...   
99195      0.0965
99196      1.2070
99197      7.1543
99198      1.1220
99199      0.3036
99200      1.6421
99201      0.1202
99202     12.0433
99203      0.0440
99204      0.1002
99205      0.9197
99206     17.5111
99207      0.1872
99208      9.8484
99209      0.1131
99210     26.1589
99211      0.0029
99212      0.9897
99213     43.4165
99214    475.5548
99215      4.6044
99216      1.9121
99217      0.2051
99218     10.0342
99219     

In [59]:
all_columns = flowlines.columns.tolist()
#print(flowlines['geometry_x'].head())
#print(flowlines['geometry'].head())
print(all_columns)

['Permanent_Identifier', 'FDate', 'Resolution', 'GNIS_ID', 'GNIS_Name', 'LengthKM', 'ReachCode_x', 'FlowDir', 'WBArea_Permanent_Identifier', 'FType', 'FCode', 'MainPath', 'InNetwork', 'VisibilityFilter', 'Shape_Length', 'NHDPlusID', 'VPUID_x', 'Enabled', 'geometry_x', 'StreamLeve', 'StreamOrde', 'StreamCalc', 'FromNode', 'ToNode', 'HydroSeq', 'LevelPathI', 'PathLength', 'TerminalPa', 'ArbolateSu', 'Divergence', 'StartFlag', 'TerminalFl', 'UpLevelPat', 'UpHydroSeq', 'DnLevel', 'DnLevelPat', 'DnHydroSeq', 'DnMinorHyd', 'DnDrainCou', 'FromMeas', 'ToMeas', 'ReachCode_y', 'RtnDiv', 'Thinner', 'VPUIn', 'VPUOut', 'AreaSqKm', 'TotDASqKm', 'DivDASqKm', 'MaxElevRaw', 'MinElevRaw', 'MaxElevSmo', 'MinElevSmo', 'Slope', 'SlopeLenKm', 'ElevFixed', 'HWType', 'HWNodeSqKm', 'StatusFlag', 'VPUID_y', 'geometry_y', 'geometry']


In [60]:
all_columns = flowlines.columns.tolist()
all_columns.remove('geometry')
all_columns.remove(watershed_col)
                
# TODO - select only flowlines which are true in-ground streams. Do not include canals, culverts, etc

flowlines.drop(all_columns, axis=1, inplace=True)

In [61]:
flowlines.head()

Unnamed: 0,TotDASqKm,geometry
0,0.4726,(LINESTRING Z (-113.2393364680036 34.078086813...
1,381.7279,(LINESTRING Z (-113.2066020680543 32.541226682...
2,2.1035,(LINESTRING Z (-113.2053928680562 32.816411282...
3,46.5598,(LINESTRING Z (-113.2153698013741 32.954081082...
4,46.1306,(LINESTRING Z (-113.2150438013746 32.953932682...


In [69]:
res_x = 10
res_y = 10

ratio_x = 1000 / res_x
ratio_y = 1000 / res_y

flowlines["TotDASq_m"] = flowlines[watershed_col] * ratio_x * ratio_y

In [71]:
flowlines.head()

Unnamed: 0,TotDASqKm,geometry,TotDASq_m
0,0.4726,(LINESTRING Z (-113.2393364680036 34.078086813...,4726.0
1,381.7279,(LINESTRING Z (-113.2066020680543 32.541226682...,3817279.0
2,2.1035,(LINESTRING Z (-113.2053928680562 32.816411282...,21035.0
3,46.5598,(LINESTRING Z (-113.2153698013741 32.954081082...,465598.0
4,46.1306,(LINESTRING Z (-113.2150438013746 32.953932682...,461306.0


In [103]:
def calculateBufferSize(da):
    if da <= 1:
        buff_size = 1
    else:
        buff_size = math.sqrt(da) / (math.log(da, 10) * (4 / 3))
    
    return buff_size

In [104]:
flowlines["BufferSize"] = flowlines["TotDASq_m"].apply(calculateBuffer)#, axis=1)

In [105]:
flowlines.head()

Unnamed: 0,TotDASqKm,geometry,TotDASq_m,BufferSize
0,0.4726,(LINESTRING Z (-113.2393364680036 34.078086813...,4726.0,14.03171
1,381.7279,(LINESTRING Z (-113.2066020680543 32.541226682...,3817279.0,222.636607
2,2.1035,(LINESTRING Z (-113.2053928680562 32.816411282...,21035.0,25.162458
3,46.5598,(LINESTRING Z (-113.2153698013741 32.954081082...,465598.0,90.289266
4,46.1306,(LINESTRING Z (-113.2150438013746 32.953932682...,461306.0,89.935966


In [107]:
da =  381.7279 * 100 * 100

222.6366073124281