# Create watersheds and catchments

AKSSF project has ~ 500 sites that have been shifted to the flow networks.
We need to create watersheds for each. This will make extracting spatial
and climatic covariates for modeling go much faster.

Dustin pointed out that there are fromnodes and tonodes in the NHDPlus that can be used to navigate
upstream, save all the NHPPlusIDs, select and merge the catchments to create watersheds for each site.
This works in R so just need to transfer to python. Premise is to use a while loop to keep selecting
new stream segments that have their tonode match the fromnode of the last segment(s). Stop when
summing the ids is not greater than 0.

Create a loop and process watersheds for all the points. Start with Cook Inlet first.
Note that folders, geodbs, and merged catchments are created in the merge_grids script.
1. select catchments that intersect points to get NHDPlusID
2. create list of IDs
3. use loop to create watersheds
4. first get list of all upstream NHDPlusIDs
5. create temporary layer of catchments
6. select catchments that match the upstream IDs
7. dissolved on those catchments and save to Cook Inlet gdb and watersheds feature dataset

In [2]:
# COOK INLET
# steps 1 and 2
# intersect points with catchments and create list of NHDPlusIDs
import arcpy
arcpy.env.workspace = r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb"
arcpy.env.overwriteOutput = True

points = r"T:\Aquatic\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["NHDPlusID"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

#check if duplicate catchments in the idList
idset = set(idList)
print(idset)
print(len(idset))


OBJECTID
Shape
NHDPlusID
SourceFC
GridCode
AreaSqKm
VPUID
Catch_ID
Shape_Length
Shape_Area
241
{75004400010754.0, 75000200013319.0, 75004300001288.0, 75000200004615.0, 75004300002314.0, 75000200000010.0, 75000200000011.0, 75000200015883.0, 75000200015882.0, 75000300025361.0, 75000200000021.0, 75000200003094.0, 75004300002332.0, 75000200015900.0, 75000200008734.0, 75004400001568.0, 75000200003107.0, 75004300006440.0, 75004400009260.0, 75004400009261.0, 75005300022831.0, 75004300002352.0, 75004300000311.0, 75004400010810.0, 75004400011322.0, 75004300005437.0, 75004400000576.0, 75000200014914.0, 75000400014403.0, 75000200013892.0, 75000400014405.0, 75000700036166.0, 75000200011336.0, 75000200005532.0, 75004300008012.0, 75004400010320.0, 75004300003409.0, 75004400003154.0, 75004300000856.0, 75004400009308.0, 75004300004452.0, 75000200010344.0, 75000100003947.0, 75000200015469.0, 75000200017518.0, 75000500010609.0, 75004300001906.0, 75004400000627.0, 75004400009331.0, 75000200002162.0, 7500

In [3]:
#only 114 watersheds created for cook inlet, but 241 unique catchments
# see which ones are missing.
import arcpy

# Got through 95 watersheds and all other programs froze, restarted and finding which watersheds remain.
arcpy.env.workspace = r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb\Watersheds"
wtds = arcpy.ListFeatureClasses()
#just get numeric part
wtds = [x[4:20] for x in wtds]
#convert to numeric
wtds = [int(i) for i in wtds]
print(wtds)
print(len(wtds))
print(len(idList))
#
idFilter = [x for x in idList if x not in wtds]
print(idFilter)
print("Original list of sites in Cook Inlet: " + str(len(idList)))
print("Watersheds completed: " + str(len(wtds)))
print("Watersheds remaining: " + str(len(idFilter)))


[75004200007057, 75004300006312, 75004300001906, 75004300000100, 75004300004983, 75004300004332, 75004300006239, 75004300004304, 75004300002332, 75004300005437, 75004300003464, 75004300005171, 75004300004701, 75004300005303, 75004300006440, 75004300004254, 75004300001207, 75004300005245, 75004300000311, 75004300008012, 75004300004192, 75004300007324, 75004300001162, 75004300004217, 75004300002207, 75004300003409, 75004300002142, 75004300002314, 75004300003171, 75004300000101, 75004300005352, 75004300000856, 75004300004341, 75004300002289, 75004300007331, 75004300006376, 75004300002278, 75004300002352, 75004300001210, 75004300004452, 75004300000105, 75004300006071, 75004300004324, 75004300001288, 75004300006013, 75004400004829, 75004400000576, 75004400006041, 75004400010402, 75004400008856, 75004400010754, 75004400010810, 75004400002950, 75004400009144, 75004400006357, 75004400007385, 75004400007948, 75004400009261, 75004400001510, 75004400007387, 75004400010320, 75004400001568, 7500440

In [None]:
# COOK INLET
# steps 4-7

import arcpy
import pandas as pd


# steps 4-9 for loop to create watersheds
arcpy.env.workspace = r"W:/GIS/AKSSF/Cook_Inlet/Cook_Inlet.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

vaa = "vaa_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
# arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb", "Watersheds", output_SR)

vaa_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(vaa, ("NHDPlusID", "FromNode", "ToNode")))

for id in idFilter:
    print("Starting watershed for: " + str(id))
    rec = [id]
    print(type(rec))
    up_ids = []

    while sum(rec) > 0:
        up_ids.append(rec)
        fromnode = vaa_df.loc[vaa_df["NHDPlusID"].isin(rec), "FromNode"]
        rec = vaa_df.loc[vaa_df["ToNode"].isin(fromnode), "NHDPlusID"]

    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    print(type(newup_ids))
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"NHDPlusID" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)


In [None]:
# COPPER RIVER
# steps 1 and 2
# intersect points with catchments and create list of NHDPlusIDs
import arcpy
arcpy.env.workspace = r"W:\GIS\AKSSF\Copper_River\Copper_River.gdb"
arcpy.env.overwriteOutput = True

points = r"T:\Aquatic\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Copper_River\Copper_River.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["NHDPlusID"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

In [None]:
# COPPER RIVER
# steps 4-7

import arcpy
import pandas as pd

#idList = [75004300004324]

# steps 4-9 for loop to create watersheds
arcpy.env.workspace = r"W:/GIS/AKSSF/Copper_River/Copper_River.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

vaa = "vaa_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Copper_River\Copper_River.gdb", "Watersheds", output_SR)

vaa_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(vaa, ("NHDPlusID", "FromNode", "ToNode")))

for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    print(type(rec))
    up_ids = []

    while sum(rec) > 0:
        up_ids.append(rec)
        fromnode = vaa_df.loc[vaa_df["NHDPlusID"].isin(rec), "FromNode"]
        rec = vaa_df.loc[vaa_df["ToNode"].isin(fromnode), "NHDPlusID"]

    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    print(type(newup_ids))
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"NHDPlusID" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)


In [None]:
# BRISTOL BAY WATERSHEDS

import arcpy
import pandas as pd

arcpy.env.workspace = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb"
arcpy.env.overwriteOutput = True

points = r"W:\GIS\AKSSF\AKSSF_Hydrography.gdb\bb_MD_verified_DM"
cats = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["catID"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

In [None]:
# BRISTOL BAY
# steps 4-7

import arcpy
import pandas as pd
import numpy
import time

# idList = [492244] #for testing

# steps 4-9 for loop to create watersheds
arcpy.env.workspace = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

streams = "streams_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
# arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb", "Watersheds", output_SR)

str_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(streams, ("catID", "upCatID1", "upCatID2")))
hws_codes = [999999, 1999999, 2999999, 3999999, 4999999]

#idList if doing ALL watersheds.
for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    up_ids = []
    sum_rec = sum(rec)
    timeout = time.time() + 60*15 # 15 minutes from this point

    while(sum_rec > 0):
        if time.time() > timeout:
            break
        up_ids.append(rec)
        rec = str_df.loc[str_df["catID"].isin(rec), ("upCatID1", "upCatID2")]
        rec = rec.replace(hws_codes, 0)
        rec = pd.concat([rec['upCatID1'], rec['upCatID2']])
        # print(rec)
        sum_rec = sum(rec)
    # print(up_ids)


    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    # print(type(newup_ids))
    # print(newup_ids)
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"catID" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    arcpy.management.Dissolve(tempLayer, outwtd)
    print("Watershed created at:" + outwtd)

In [None]:
# code when trouble-shooting bb above.
import arcpy

# Got through 95 watersheds and all other programs froze, restarted and finding which watersheds remain.
arcpy.env.workspace = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb\Watersheds"
wtds = arcpy.ListFeatureClasses()
#just get numeric part
wtds = [x[4:10] for x in wtds]
#convert to numeric
wtds = [int(i) for i in wtds]
print(wtds)
print(len(wtds))
print(len(idList))
#
# idFilter = [x for x in idList if x not in wtds]
# print(idFilter)
# print("Original list of sites in BB: " + str(len(idList)))
# print("Watersheds completed: " + str(len(wtds)))
# print("Watersheds remaining: " + str(len(idFilter)))


In [None]:
# PRINCE WILLIAM SOUND WATERSHEDS

import arcpy
import pandas as pd

gdb = r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb"
arcpy.env.workspace = gdb
arcpy.env.overwriteOutput = True

points = r"W:\GIS\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb\cats_merge"
idList = []
outcats = gdb + "\\cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("templayer", "INTERSECT", points)
arcpy.CopyFeatures_management("templayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["gridcode"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

In [None]:
# Prince_William_Sound
# steps 4-7

import arcpy
import pandas as pd
import numpy

arcpy.env.workspace = r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

streams = "streams_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb", "Watersheds", output_SR)

fields = arcpy.ListFields(streams)
for field in fields:
    print("{0}".format(field.name))

str_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(streams, ("LINKNO", "USLINKNO1", "USLINKNO2")))
hws_codes = [-1]

#idList if doing ALL watersheds.
for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    up_ids = []
    sum_rec = sum(rec)

    while(sum_rec > 0):
        up_ids.append(rec)
        rec = str_df.loc[str_df["LINKNO"].isin(rec), ("USLINKNO1", "USLINKNO2")]
        rec = rec.replace(hws_codes, 0)
        rec = pd.concat([rec['USLINKNO1'], rec['USLINKNO2']])
        # print(rec)
        sum_rec = sum(rec)
    # print(up_ids)


    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    # print(type(newup_ids))
    # print(newup_ids)
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"gridcode" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)


In [None]:
# KODIAK WATERSHEDS

import arcpy
import pandas as pd

arcpy.env.workspace = r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb"
arcpy.env.overwriteOutput = True

points = r"W:\GIS\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["gridcode"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

In [None]:
# Kodiak
# steps 4-7

import arcpy
import pandas as pd
import numpy

arcpy.env.workspace = r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

streams = "streams_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb", "Watersheds", output_SR)

fields = arcpy.ListFields(streams)
for field in fields:
    print("{0}".format(field.name))

str_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(streams, ("LINKNO", "USLINKNO1", "USLINKNO2")))
hws_codes = [-1]

#idList if doing ALL watersheds.
for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    up_ids = []
    sum_rec = sum(rec)

    while(sum_rec > 0):
        up_ids.append(rec)
        rec = str_df.loc[str_df["LINKNO"].isin(rec), ("USLINKNO1", "USLINKNO2")]
        rec = rec.replace(hws_codes, 0)
        rec = pd.concat([rec['USLINKNO1'], rec['USLINKNO2']])
        # print(rec)
        sum_rec = sum(rec)
    # print(up_ids)


    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    # print(type(newup_ids))
    # print(newup_ids)
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"gridcode" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)



# Watershed Summaries

1. read in all watersheds feature classes
2. create a table with the NHDPlusID/catID of the watershed and its area
3. read in catchments intersect
4. do a spatial join on catchments and sites to get a many to one between SiteIDs and catchments
5. create a table with a row for each SiteID, NHDPlusID/catID, and watershed area

In [None]:
import arcpy
import os
import pandas as pd

region = "Copper_River"
arcpy.env.workspace = "W:\\GIS\\AKSSF\\" + region + "\\" + region + ".gdb\\Watersheds"

wtds = arcpy.ListFeatureClasses()
print(wtds)

wtdList = []

for wtd in wtds:
    wtdName = wtd[4:20]
    print("Starting wtd: " + wtdName)
    wtdPath = os.path.join(arcpy.env.workspace, wtd)
    arcpy.AddField_management(wtdPath, "Area_km2", "DOUBLE")
    expression1 = "{0}".format("!SHAPE.area@SQUAREKILOMETERS!")
    arcpy.CalculateField_management(wtdPath, "Area_km2", expression1, "PYTHON", )
    # wtdArea = [i for i in arcpy.da.SearchCursor(wtdPath, ['Area_km2'])][0]
    wtdArea = [row[0] for row in arcpy.da.SearchCursor(wtdPath, ['Area_km2'])]
    print(wtdArea)
    wtdList.append({'Name': wtdName, 'Area_km2': wtdArea})

wtdDf = pd.DataFrame(wtdList)
print(wtdDf)

In [None]:
import arcpy
import os
import numpy
import pandas as pd

region = "Copper_River"
arcpy.env.workspace = "W:\\GIS\\AKSSF\\" + region + "\\" + region + ".gdb"

cats = os.path.join(arcpy.env.workspace, "cats_merge")
print(cats)
points = r"T:\Aquatic\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"

#note spatial join not working, the cats fields are empty!
arcpy.SpatialJoin_analysis(points, cats, "sites_sj")
sites_sj = os.path.join(arcpy.env.workspace, "sites_sj")

sitesDf = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sites_sj, ("SiteID", "NHDPlusID")))

print(sitesDf)
print(wtdDf)

#merge two data frames to get area linked to nhdplusid and siteid.

# sitesList = []
# with arcpy.da.SearchCursor(sites_sj, ["SiteID", "NHDPlusID"]) as cursor:
#     for row in cursor:
#         sitesList.append({site = row[0], ID = row[1]})