This script will be for linking the siteIDs to the catchment IDs - which are either NHDPlusIDs,
gridcodes, or new catIDs made from the gridcodes for BB. This will give us a way to link the
thermal sensitivity responses by year to the covariates. This script also includes code to
check the watersheds. The watersheds are individual feature classes to avoid overlapping
polygons so the best way to check they are correct is to 1) check that every siteID/catID
has a watershed, and 2) check that the watershed area approximately matches the watershed
area from the flow accumulation grid. They won't be exact for sites that are not close to
the catchment outlet and could be quite different for very small watersheds in some cases.

# Watershed Summaries

1. read in all watersheds feature classes
2. create a table with the NHDPlusID/catID of the watershed name, region, and watershed area
3. merge original point feature classes (inside and outside bb)
4. do a spatial join with merged catchments by region to get catID and region on the points dataset
5. clip to the region and save in each regional gdb
6. merge all point feature classes with siteIDs and catIDs into one table
7. join the watershed area from the watershed fcs to the points table using the catID
NOT DONE and not sure we need it: 8. join the watershed area from the fac grid to the points table using the catID
9. compare the results to ensure that watersheds have been created correctly

In [21]:
# steps 1 and 2

import arcpy
import os
import pandas as pd

regions = ["Kodiak", "Copper_River", "Prince_William_Sound", "Cook_Inlet", "Bristol_Bay"]
wtdList = []

for region in regions:
    local_gdb = "W:\\GIS\\AKSSF\\" + region + "\\" + region + ".gdb\\Watersheds"
    arcpy.env.workspace = local_gdb
    wtds = arcpy.ListFeatureClasses()
    print(region + ": " + str(len(wtds)) + " watersheds")

    for wtd in wtds:
        wtdName = wtd[4:20]
        print("Starting wtd: " + wtdName)
        wtdPath = os.path.join(arcpy.env.workspace, wtd)
        # field_names = [f.name for f in arcpy.ListFields(wtdPath)]
        # print(field_names)
        # if "Area_km2" in field_names:
        #     print("Area already calculated")
        # else:
        #     arcpy.AddField_management(wtdPath, "Area_km2", "DOUBLE")
        #     expression1 = "{0}".format("!SHAPE.area@SQUAREKILOMETERS!")
        #     arcpy.CalculateField_management(wtdPath, "Area_km2", expression1, "PYTHON", )
        wtdArea = [row[0] for row in arcpy.da.SearchCursor(wtdPath, ['Area_km2'])]
        # print("wtdName: " + str(wtdArea))
        wtdList.append({'Region': region, 'cat_ID': wtdName, 'Area_km2': wtdArea})

wtdDf = pd.DataFrame(wtdList)
print(wtdDf)


Kodiak: 28 watersheds


TypeError: can only concatenate str (not "list") to str

In [4]:
#merge watersheds into one feature class for each region

import arcpy
import os
import pandas as pd
arcpy.env.overwriteOutput = True

regions = ["Kodiak", "Copper_River", "Prince_William_Sound", "Cook_Inlet", "Bristol_Bay"]

for region in regions:
    local_gdb = "W:\\GIS\\AKSSF\\" + region + "\\" + region + ".gdb"
    arcpy.env.workspace = local_gdb + "\\Watersheds"
    wtds = arcpy.ListFeatureClasses()
    print(region + ": " + str(len(wtds)) + " watersheds")

    #add cat_ID to each watershed before merging
    for wtd in wtds:
        wtdName = wtd[4:20]
        wtdPath = os.path.join(arcpy.env.workspace, wtd)
        arcpy.AddField_management(wtdPath, "cat_ID", "DOUBLE")
        # expression1 = "{0}".format("!SHAPE.area@SQUAREKILOMETERS!")
        arcpy.CalculateField_management(wtdPath, "cat_ID", wtdName, "PYTHON")

    arcpy.env.workspace = local_gdb
    wtdMerge = [local_gdb + "\\Watersheds\\" + s for s in wtds]
    wtd_output = "wtds_merge"
    arcpy.Merge_management(wtdMerge, wtd_output)


Kodiak: 28 watersheds
Copper_River: 28 watersheds
Prince_William_Sound: 19 watersheds
Cook_Inlet: 241 watersheds
Bristol_Bay: 114 watersheds


In [None]:
print(len(wtdDf))
print(len(wtdList))


# step 3 - done

import arcpy

gdb = "W:\\GIS\\AKSSF\\AKSSF_Hydrography.gdb"
arcpy.env.workspace = gdb
bb_pts = gdb + "\\bb_md_verified_DM"
other_pts = gdb + "\\sites_outside_bb_verified_DM"

output = "akssf_pts_verified"
arcpy.Merge_management([bb_pts, other_pts], output)

In [None]:
# steps 4-6

import arcpy
import os
import numpy
import pandas as pd

arcpy.env.overwriteOutput = True
regions_dict = {"Kodiak": '!gridcode!', "Copper_River": '!NHDPlusID!', "Prince_William_Sound": '!gridcode!', "Cook_Inlet": '!NHDPlusID!', "Bristol_Bay": '!catID!'}
# regions_dict = {"Bristol_Bay": '!catID!'}

points = "W:\\GIS\\AKSSF\\AKSSF_Hydrography.gdb\\akssf_pts_verified"
# sites_lst = []
cats_lst = []

for key, value in regions_dict.items():
    arcpy.env.workspace = "W:\\GIS\\AKSSF\\" + key + "\\" + key + ".gdb"
    cats = os.path.join(arcpy.env.workspace, "cats_merge")
    # arcpy.Clip_analysis(points, cats, r"memory\sites_clip")
    # arcpy.SpatialJoin_analysis(r"memory\sites_clip", cats, "sites_sj")
    # # #note that catID is in the BB cats_merge, but only a LONG, need a DOUBLE to account for NHDPlusIDs
    # arcpy.AddField_management("sites_sj", "cat_ID", "DOUBLE")
    # arcpy.CalculateField_management("sites_sj", "cat_ID", value)
    # sites_fc = os.path.join(arcpy.env.workspace, "sites_sj")
    # count = arcpy.GetCount_management(sites_fc)
    # print('{} has {} records'.format(sites_fc, count[0]))
    # sites_lst.append(sites_fc)
    cats_lst.append(cats)

print(sites_lst)
cats_outfile = "W:\\GIS\\AKSSF\\AKSSF_Hydrography.gdb\\all_cats"
arcpy.Merge_management(cats_lst, cats_outfile)

Also want to intersect sites with maximum flow accumulation in small buffer.

In [20]:
import arcpy
import pandas as pd
cats_outfile = "W:\\GIS\\AKSSF\\AKSSF_Hydrography.gdb\\all_cats"

#merge all sites_sj into one point file with all the catIDs.
outfile = "W:\\GIS\\AKSSF\\AKSSF_Hydrography.gdb\\all_sites_catid"
arcpy.Merge_management(sites_lst, outfile, "", "ADD_SOURCE_INFO")

#buffer sites
buffer = r"memory\sites_buffer"
arcpy.Buffer_analysis(outfile, buffer, "30 meters")

#this makes sure the buffer does not extend outside of the catchment
clip_outfile = r"memory\sites_clip"
arcpy.Clip_analysis(buffer, cats_outfile, clip_outfile)

#zonal stats on buffer intersection with flow accumulation grids (need to loop through regions for this)
# to get maximum flow accumulation bc some points may not exactly fall on stream grid
tbl_lst = []

for region in regions:
    arcpy.env.workspace = "W:\\GIS\\AKSSF\\" + region + "\\"
    fac = os.path.join(arcpy.env.workspace, "fac.tif")
    outtable = region + "_maxfac.dbf"
    arcpy.ZonalStatisticsAsTable_ra(clip_outfile, fac, outtable, "SiteID", "DATA","MAXIMUM")
    tbl_lst.append(outtable)

arr_lst = []
for tbl in tbl_lst:
    arr = arcpy.da.TableToNumPyArray(outtable, ("siteID", "cat_ID"))
    arr_lst.append(arr)
sitesDF = pd.concat(arr_lst)

ExecuteError: Failed to execute. Parameters are not valid.
Local raster dataset is not supported in this parameter. Please specify a portal item or image service url.
Failed to execute (ZonalStatisticsAsTable).


In [None]:
# steps 7-9
import arcpy
import pandas as pd

arcpy.env.workspace = "W:\\GIS\\AKSSF"
# print(wtdDf)
# print(sitesDF)

print(wtdDf.dtypes)
print(sitesDF.dtypes)

#fix mismatched data types for merge field.
# wtdDf.cat_ID.astype('int64')
wtdDf['cat_ID'] = wtdDf['cat_ID'].astype('int64')
#merge sitesDf with wtdDf to get watershed area linked to siteID
join = sitesDF.merge(wtdDf, on = 'cat_ID', how = 'left', indicator = True)
print(join)

join.to_csv("W:\\GIS\\AKSSF\\site_wtd_join.csv")
