# Create watersheds and catchments

AKSSF project has ~ 500 sites that have been shifted to the flow networks.
We need to create watersheds for each. This will make extracting spatial
and climatic covariates for modeling go much faster.

Dustin pointed out that there are fromnodes and tonodes in the NHDPlus that can be used to navigate
upstream, save all the NHPPlusIDs, select and merge the catchments to create watersheds for each site.
This works in R so just need to transfer to python. Premise is to use a while loop to keep selecting
new stream segments that have their tonode match the fromnode of the last segment(s). Stop when
summing the ids is not greater than 0.

Create a loop and process watersheds for all the points. Start with Cook Inlet first.
Note that folders, geodbs, and merged catchments are created in the merge_grids script.
1. select catchments that intersect points to get NHDPlusID
2. create list of IDs
3. use loop to create watersheds
4. first get list of all upstream NHDPlusIDs
5. create temporary layer of catchments
6. select catchments that match the upstream IDs
7. dissolved on those catchments and save to Cook Inlet gdb and watersheds feature dataset

In [None]:
# COOK INLET
# steps 1 and 2
# intersect points with catchments and create list of NHDPlusIDs
import arcpy
arcpy.env.workspace = r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb"
arcpy.env.overwriteOutput = True

points = r"T:\Aquatic\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["NHDPlusID"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

In [None]:
# COOK INLET
# steps 4-7

import arcpy
import pandas as pd

#idList = [75004300004324]

# steps 4-9 for loop to create watersheds
arcpy.env.workspace = r"W:/GIS/AKSSF/Cook_Inlet/Cook_Inlet.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

vaa = "vaa_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Cook_Inlet\Cook_Inlet.gdb", "Watersheds", output_SR)

vaa_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(vaa, ("NHDPlusID", "FromNode", "ToNode")))

idList = [75004300006440]

for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    print(type(rec))
    up_ids = []

    while sum(rec) > 0:
        up_ids.append(rec)
        fromnode = vaa_df.loc[vaa_df["NHDPlusID"].isin(rec), "FromNode"]
        rec = vaa_df.loc[vaa_df["ToNode"].isin(fromnode), "NHDPlusID"]

    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    print(type(newup_ids))
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"NHDPlusID" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)
    #arcpy.management.CopyFeatures(tempLayer, outwtd)


In [3]:
# COPPER RIVER
# steps 1 and 2
# intersect points with catchments and create list of NHDPlusIDs
import arcpy
arcpy.env.workspace = r"W:\GIS\AKSSF\Copper_River\Copper_River.gdb"
arcpy.env.overwriteOutput = True

points = r"T:\Aquatic\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Copper_River\Copper_River.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["NHDPlusID"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

OBJECTID
Shape
NHDPlusID
SourceFC
GridCode
AreaSqKm
VPUID
HUC8_ID
Shape_Length
Shape_Area
28


In [None]:
# COPPER RIVER
# steps 4-7

import arcpy
import pandas as pd

#idList = [75004300004324]

# steps 4-9 for loop to create watersheds
arcpy.env.workspace = r"W:/GIS/AKSSF/Copper_River/Copper_River.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

vaa = "vaa_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Copper_River\Copper_River.gdb", "Watersheds", output_SR)

vaa_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(vaa, ("NHDPlusID", "FromNode", "ToNode")))

for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    print(type(rec))
    up_ids = []

    while sum(rec) > 0:
        up_ids.append(rec)
        fromnode = vaa_df.loc[vaa_df["NHDPlusID"].isin(rec), "FromNode"]
        rec = vaa_df.loc[vaa_df["ToNode"].isin(fromnode), "NHDPlusID"]

    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    print(type(newup_ids))
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"NHDPlusID" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)


In [11]:
# BRISTOL BAY WATERSHEDS

import arcpy
import pandas as pd

arcpy.env.workspace = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb"
arcpy.env.overwriteOutput = True

points = r"W:\GIS\AKSSF\AKSSF_Hydrography.gdb\bb_MD_verified_DM"
cats = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["catID"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

OBJECTID
Shape
gridcode
catID
Shape_Length
Shape_Area
114


In [3]:
# BRISTOL BAY
# steps 4-7

import arcpy
import pandas as pd
import numpy
import time

# idList = [492244] #for testing

# steps 4-9 for loop to create watersheds
arcpy.env.workspace = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

streams = "streams_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
# arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb", "Watersheds", output_SR)

str_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(streams, ("catID", "upCatID1", "upCatID2")))
hws_codes = [999999, 1999999, 2999999, 3999999, 4999999]

#idList if doing ALL watersheds.
for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    up_ids = []
    sum_rec = sum(rec)
    timeout = time.time() + 60*15 # 15 minutes from this point

    while(sum_rec > 0):
        if time.time() > timeout:
            break
        up_ids.append(rec)
        rec = str_df.loc[str_df["catID"].isin(rec), ("upCatID1", "upCatID2")]
        rec = rec.replace(hws_codes, 0)
        rec = pd.concat([rec['upCatID1'], rec['upCatID2']])
        # print(rec)
        sum_rec = sum(rec)
    # print(up_ids)


    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    # print(type(newup_ids))
    # print(newup_ids)
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"catID" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    arcpy.management.Dissolve(tempLayer, outwtd)
    print("Watershed created at:" + outwtd)

Starting watershed for: 1023044
Watershed created at:Watersheds\wtd_1023044
Starting watershed for: 2041471
Watershed created at:Watersheds\wtd_2041471
Starting watershed for: 2065755
Watershed created at:Watersheds\wtd_2065755
Starting watershed for: 2065914
Watershed created at:Watersheds\wtd_2065914
Starting watershed for: 2066924
Watershed created at:Watersheds\wtd_2066924
Starting watershed for: 2066955
Watershed created at:Watersheds\wtd_2066955
Starting watershed for: 2067494
Watershed created at:Watersheds\wtd_2067494
Starting watershed for: 2068072
Watershed created at:Watersheds\wtd_2068072
Starting watershed for: 2068584
Watershed created at:Watersheds\wtd_2068584
Starting watershed for: 2070402
Watershed created at:Watersheds\wtd_2070402
Starting watershed for: 2071934
Watershed created at:Watersheds\wtd_2071934
Starting watershed for: 2072993
Watershed created at:Watersheds\wtd_2072993
Starting watershed for: 2073464
Watershed created at:Watersheds\wtd_2073464
Starting wat

In [7]:
# code when trouble-shooting bb above.
import arcpy

# Got through 95 watersheds and all other programs froze, restarted and finding which watersheds remain.
arcpy.env.workspace = r"W:\GIS\AKSSF\Bristol_Bay\Bristol_Bay.gdb\Watersheds"
wtds = arcpy.ListFeatureClasses()
#just get numeric part
wtds = [x[4:10] for x in wtds]
#convert to numeric
wtds = [int(i) for i in wtds]
print(wtds)
print(len(wtds))
print(len(idList))
#
# idFilter = [x for x in idList if x not in wtds]
# print(idFilter)
# print("Original list of sites in BB: " + str(len(idList)))
# print("Watersheds completed: " + str(len(wtds)))
# print("Watersheds remaining: " + str(len(idFilter)))


[102304, 204147, 206575, 206591, 206692, 206695, 206749, 206807, 206858, 207040, 207193, 207299, 207346, 207442, 207823, 207828, 208237, 208239, 208471, 208564, 208596, 208791, 208816, 208825, 208832, 208858, 302304, 302434, 303095, 303366, 303395, 303412, 400088, 402019, 402105, 404661, 404804, 405105, 405105, 405161, 405207, 405412, 405420, 405533, 405901, 405973, 406003, 406113, 406319, 406377, 406405, 406484, 406858, 406945, 406949, 406958, 407103, 407125, 407162, 407337, 407418, 407450, 407472, 407522, 407547, 407571, 407650, 407657, 407667, 407823, 408004, 408087, 408222, 408435, 408491, 408679, 408701, 408722, 408867, 408907, 408938, 408939, 408957, 409116, 409142, 409224, 409308, 409530, 409537, 409606, 409645, 409811, 409870, 409959, 409969, 410112, 410127, 410212, 410457, 410520, 410557, 410584, 410690, 410746, 411567, 500870, 502079, 502147, 503036, 503070, 503247, 503309, 503489, 503837]
114
114


In [10]:
# PRINCE WILLIAM SOUND WATERSHEDS

import arcpy
import pandas as pd

gdb = r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb"
arcpy.env.workspace = gdb
arcpy.env.overwriteOutput = True

points = r"W:\GIS\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb\cats_merge"
idList = []
outcats = gdb + "\\cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("templayer", "INTERSECT", points)
arcpy.CopyFeatures_management("templayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["gridcode"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

OBJECTID
Shape
gridcode
Shape_Length
Shape_Area
19


In [28]:
# Prince_William_Sound
# steps 4-7

import arcpy
import pandas as pd
import numpy

arcpy.env.workspace = r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

streams = "streams_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Prince_William_Sound\Prince_William_Sound.gdb", "Watersheds", output_SR)

fields = arcpy.ListFields(streams)
for field in fields:
    print("{0}".format(field.name))

str_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(streams, ("LINKNO", "USLINKNO1", "USLINKNO2")))
hws_codes = [-1]

#idList if doing ALL watersheds.
for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    up_ids = []
    sum_rec = sum(rec)

    while(sum_rec > 0):
        up_ids.append(rec)
        rec = str_df.loc[str_df["LINKNO"].isin(rec), ("USLINKNO1", "USLINKNO2")]
        rec = rec.replace(hws_codes, 0)
        rec = pd.concat([rec['USLINKNO1'], rec['USLINKNO2']])
        # print(rec)
        sum_rec = sum(rec)
    # print(up_ids)


    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    # print(type(newup_ids))
    # print(newup_ids)
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"gridcode" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)


OBJECTID
Shape
LINKNO
DSLINKNO
USLINKNO1
USLINKNO2
DSNODEID
strmOrder
Length
Magnitude
DSContArea
strmDrop
Slope
StraightL
USContArea
WSNO
DOUTEND
DOUTSTART
DOUTMID
Shape_Length
Starting watershed for: 18457
Watersheds\wtd_18457
Starting watershed for: 26464
Watersheds\wtd_26464
Starting watershed for: 28086
Watersheds\wtd_28086
Starting watershed for: 29854
Watersheds\wtd_29854
Starting watershed for: 30884
Watersheds\wtd_30884
Starting watershed for: 31865
Watersheds\wtd_31865
Starting watershed for: 36645
Watersheds\wtd_36645
Starting watershed for: 37815
Watersheds\wtd_37815
Starting watershed for: 38993
Watersheds\wtd_38993
Starting watershed for: 40285
Watersheds\wtd_40285
Starting watershed for: 41515
Watersheds\wtd_41515
Starting watershed for: 42563
Watersheds\wtd_42563
Starting watershed for: 43055
Watersheds\wtd_43055
Starting watershed for: 43185
Watersheds\wtd_43185
Starting watershed for: 43933
Watersheds\wtd_43933
Starting watershed for: 43973
Watersheds\wtd_43973
Starti

In [5]:
# KODIAK WATERSHEDS

import arcpy
import pandas as pd

arcpy.env.workspace = r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb"
arcpy.env.overwriteOutput = True

points = r"W:\GIS\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"
cats = r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb\cats_merge"
idList = []
outcats = "cats_intersect"

arcpy.MakeFeatureLayer_management(cats, "tempLayer")
arcpy.management.SelectLayerByLocation("tempLayer", "INTERSECT", points)
arcpy.CopyFeatures_management("tempLayer", outcats)

fields = arcpy.ListFields("tempLayer")
for field in fields:
    print("{0}".format(field.name))
with arcpy.da.SearchCursor("tempLayer", ["gridcode"]) as cursor:
    for row in cursor:
        idList.append(row[0])

print(len(idList))

OBJECTID
Shape
gridcode
proc_reg
Shape_Length
Shape_Area
28


In [6]:
# Kodiak
# steps 4-7

import arcpy
import pandas as pd
import numpy

arcpy.env.workspace = r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb"
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False

streams = "streams_merge"
cats = "cats_merge"
output_SR = arcpy.Describe(cats).spatialReference
arcpy.env.outputCoordinateSystem = output_SR

#watersheds feature dataset for storing fcs
arcpy.management.CreateFeatureDataset(r"W:\GIS\AKSSF\Kodiak\Kodiak.gdb", "Watersheds", output_SR)

fields = arcpy.ListFields(streams)
for field in fields:
    print("{0}".format(field.name))

str_df = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(streams, ("LINKNO", "USLINKNO1", "USLINKNO2")))
hws_codes = [-1]

#idList if doing ALL watersheds.
for id in idList:
    print("Starting watershed for: " + str(id))
    rec = [id]
    up_ids = []
    sum_rec = sum(rec)

    while(sum_rec > 0):
        up_ids.append(rec)
        rec = str_df.loc[str_df["LINKNO"].isin(rec), ("USLINKNO1", "USLINKNO2")]
        rec = rec.replace(hws_codes, 0)
        rec = pd.concat([rec['USLINKNO1'], rec['USLINKNO2']])
        # print(rec)
        sum_rec = sum(rec)
    # print(up_ids)


    #up_ids is a list with more than numbers, use extend to only keep numeric nhdplusids
    newup_ids = []
    for x in up_ids:
        newup_ids.extend(x)

    # print(type(newup_ids))
    # print(newup_ids)
    tempLayer = "catsLyr"
    #expression = 'NHDPlusID IN {0}'.format(tuple(newup_ids))
    #trying expression to deal with one catchment (i.e. hws)
    expression = '"gridcode" IN ({0})'.format(', '.join(map(str, newup_ids)) or 'NULL')
    arcpy.MakeFeatureLayer_management(cats, tempLayer)
    arcpy.management.SelectLayerByAttribute(tempLayer, "NEW_SELECTION", expression, None)

    outwtd = "Watersheds\\wtd_" + str(round(id))
    print(outwtd)
    arcpy.management.Dissolve(tempLayer, outwtd)



OBJECTID
Shape
LINKNO
DSLINKNO
USLINKNO1
USLINKNO2
DSNODEID
strmOrder
Length
Magnitude
DSContArea
strmDrop
Slope
StraightL
USContArea
WSNO
DOUTEND
DOUTSTART
DOUTMID
proc_reg
Shape_Length
Starting watershed for: 48267
Watersheds\wtd_48267
Starting watershed for: 49617
Watersheds\wtd_49617
Starting watershed for: 50197
Watersheds\wtd_50197
Starting watershed for: 64593
Watersheds\wtd_64593
Starting watershed for: 72144
Watersheds\wtd_72144
Starting watershed for: 76954
Watersheds\wtd_76954
Starting watershed for: 77794
Watersheds\wtd_77794
Starting watershed for: 90346
Watersheds\wtd_90346
Starting watershed for: 93176
Watersheds\wtd_93176
Starting watershed for: 94216
Watersheds\wtd_94216
Starting watershed for: 97276
Watersheds\wtd_97276
Starting watershed for: 99516
Watersheds\wtd_99516
Starting watershed for: 100826
Watersheds\wtd_100826
Starting watershed for: 101556
Watersheds\wtd_101556
Starting watershed for: 103096
Watersheds\wtd_103096
Starting watershed for: 103196
Watersheds\

# Watershed Summaries

1. read in all watersheds feature classes
2. create a table with the NHDPlusID/catID of the watershed and its area
3. read in catchments intersect
4. do a spatial join on catchments and sites to get a many to one between SiteIDs and catchments
5. create a table with a row for each SiteID, NHDPlusID/catID, and watershed area

In [18]:
import arcpy
import os
import pandas as pd

region = "Copper_River"
arcpy.env.workspace = "W:\\GIS\\AKSSF\\" + region + "\\" + region + ".gdb\\Watersheds"

wtds = arcpy.ListFeatureClasses()
print(wtds)

wtdList = []

for wtd in wtds:
    wtdName = wtd[4:20]
    print("Starting wtd: " + wtdName)
    wtdPath = os.path.join(arcpy.env.workspace, wtd)
    arcpy.AddField_management(wtdPath, "Area_km2", "DOUBLE")
    expression1 = "{0}".format("!SHAPE.area@SQUAREKILOMETERS!")
    arcpy.CalculateField_management(wtdPath, "Area_km2", expression1, "PYTHON", )
    # wtdArea = [i for i in arcpy.da.SearchCursor(wtdPath, ['Area_km2'])][0]
    wtdArea = [row[0] for row in arcpy.da.SearchCursor(wtdPath, ['Area_km2'])]
    print(wtdArea)
    wtdList.append({'Name': wtdName, 'Area_km2': wtdArea})

wtdDf = pd.DataFrame(wtdList)
print(wtdDf)

['wtd_75019800000406', 'wtd_75019800010313', 'wtd_75019800014348', 'wtd_75019800001957', 'wtd_75019800019692', 'wtd_75019600118138', 'wtd_75019700004190', 'wtd_75019700004084', 'wtd_75019700017692', 'wtd_75019700001794', 'wtd_75019700003889', 'wtd_75003900062338', 'wtd_75003900033524', 'wtd_75003900054316', 'wtd_75003900055039', 'wtd_75003900023942', 'wtd_75003900058380', 'wtd_75003900028507', 'wtd_75003900027489', 'wtd_75003900044936', 'wtd_75003900023855', 'wtd_75003900044738', 'wtd_75003900055694', 'wtd_75003900023674', 'wtd_75003900062264', 'wtd_75003900055316', 'wtd_75003900039073', 'wtd_75003900027771']
Starting wtd: wtd_75019800000406
[7.35572511884198]
Starting wtd: wtd_75019800010313
[562.020966844164]
Starting wtd: wtd_75019800014348
[35.1819733428367]
Starting wtd: wtd_75019800001957
[12.7505996974426]
Starting wtd: wtd_75019800019692
[1623.2292910112]
Starting wtd: wtd_75019600118138
[4551.25551949044]
Starting wtd: wtd_75019700004190
[370.471499680301]
Starting wtd: wtd_75

In [19]:
import arcpy
import os
import numpy
import pandas as pd

region = "Copper_River"
arcpy.env.workspace = "W:\\GIS\\AKSSF\\" + region + "\\" + region + ".gdb"

cats = os.path.join(arcpy.env.workspace, "cats_merge")
print(cats)
points = r"T:\Aquatic\AKSSF\AKSSF_Hydrography.gdb\sites_outside_bb_verified_DM"

#note spatial join not working, the cats fields are empty!
arcpy.SpatialJoin_analysis(points, cats, "sites_sj")
sites_sj = os.path.join(arcpy.env.workspace, "sites_sj")

sitesDf = pd.DataFrame(arcpy.da.FeatureClassToNumPyArray(sites_sj, ("SiteID", "NHDPlusID")))

print(sitesDf)
print(wtdDf)

#merge two data frames to get area linked to nhdplusid and siteid.

# sitesList = []
# with arcpy.da.SearchCursor(sites_sj, ["SiteID", "NHDPlusID"]) as cursor:
#     for row in cursor:
#         sitesList.append({site = row[0], ID = row[1]})

           SiteID  NHDPlusID
0     kdk_busrv01        NaN
1     kdk_doscr01        NaN
2     kdk_karrv01        NaN
3     kdk_doscr02        NaN
4    kdk_olgcr01a        NaN
..            ...        ...
358      15238450        NaN
359      15238984        NaN
360      15238986        NaN
361      15239070        NaN
362      15241600        NaN

[363 rows x 2 columns]
              Name             Area_km2
0   75019800000406   [7.35572511884198]
1   75019800010313   [562.020966844164]
2   75019800014348   [35.1819733428367]
3   75019800001957   [12.7505996974426]
4   75019800019692    [1623.2292910112]
5   75019600118138   [4551.25551949044]
6   75019700004190   [370.471499680301]
7   75019700004084   [33.5466237772944]
8   75019700017692   [700.424577657103]
9   75019700001794   [141.764803175074]
10  75019700003889   [28.2315497416145]
11  75003900062338   [51084.2691317829]
12  75003900033524   [54.0521498871808]
13  75003900054316   [52.1643999462117]
14  75003900055039   [3.4403