# MEP Preprossessing

In this notebook I collect watershed boundary layers used for the Massachusetts Estuaries Project. 
1. Regroup subwatershed layers that were split by travel time.
2. Classify subwatersheds by elevation percentile (5%)
3. Split subwatersheds by elevation percentile
3. Classify land use within subwatersheds

Publication
Carlson, C.S., Masterson, J.P., Walter, D.A., and Barbaro, J.R., 2017, Development of simulated groundwater-contributing areas to selected streams, ponds, coastal water bodies, and production wells in the Plymouth-Carver region and Cape Cod, Massachusetts: U.S. Geological Survey Data Series 1074, 17 p. https://doi.org/10.3133/ds1074

Dataset: 
Carlson, C.S., Masterson, J.P., Walter, D.A., and Barbaro, J.R., 2017, Simulated groundwater-contributing areas to selected streams, ponds, coastal water bodies, and production wells, Plymouth-Carver region and Cape Cod, Massachusetts: U.S. Geological Survey data release, https://doi.org/10.5066/F7V69H2Z.


In [4]:
# Load dependancies ***************************************
import arcpy as ap
import os, glob, re, sys
import pandas as pd
import numpy as np
#import scipy.stats

# Set up arcpy environment ******************************
# input data directory
idr = "C:\\Workspace\\Geodata\\Massachusetts\\"
# working directory this should be where arcgis project is located
wdr = "C:\\Workspace\\Geodata\\Nload\\"
# output directory
odr = os.path.join(wdr,'outputs\\MEP')
# arcpy settings
ap.env.workspace = wdr # set arcpy environment working directory
aprx = arcpy.mp.ArcGISProject("Current") # current project
ap.CheckOutExtension("Spatial") # check out spatial extension
ap.env.outputCoordinateSystem = arcpy.SpatialReference("NAD 1983 UTM Zone 19N") # define cordinate system
# Allow overwriting of output
arcpy.env.overwriteOutput = True
#ap.SetProgressor("step", "processing...", step_value = 5)

In [3]:
def fn_regex_search_0 (string,pattern,noneVal="NA"):
    '''
    returns the first match of a regular expression pattern search on a string
    '''
    import re
    x = re.search(pattern,string)
    if x is None: 
        x= [noneVal]    
    return(x[0])
# test function
fn_regex_search_0('Mystic Lake GT10 E','\w+10')
def fn_regex_search_replace(string,pattern,replacement):
    '''
    returns the a string with a pattern substituted by a replacement
    '''
    import re
    x = re.sub(pattern,replacement,string)
    return(x)

fn_regex_search_replace('MysticLakeGT10E','\wT10','')
#fn_regex_search_replace('Mystic Lake  E','  ',' ')

'MysticLakeE'

In [22]:
def fn_recursive_glob_search (startDir=None,
                             fileExt="csv"):
    '''returns:
           file paths matching extension 
           within all subdirectories starting directory
       inputs:
           startDir = root or parent directory to start search
           fileExt = file extension, e.g. ".csv" ".xlsx" ".shp"
    '''
    import glob, os
    if startDir is None:
        startDir = os.getcwd
    fileList = []
    glbsearch = os.path.join(startDir,'**/*'+fileExt)
    for f in glob.glob(glbsearch, recursive=True):
        #print(f)
        fileList.append(f)
    return(fileList)

In [None]:
USGS_startDir = r'C:\Workspace\Geodata\Massachusetts\USGS_\SimulatedGround\original_USGS_areas\original_USGS_areas'
All_USGS_paths = fn_recursive_glob_search(USGS_startDir,'.shp')

In [26]:
# select tax parcel data and uncompress
TaxDir = r'C:\Workspace\Geodata\Massachusetts\TaxParcels'
TaxDirNames = fn_recursive_glob_search(TaxDir,'.zip')
print(TaxDirNames)
for file in TaxDirNames[0:1]:
    arcpy.UncompressFileGeodatabaseData_management("london.gdb")

['C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M003_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M020_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M036_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M041_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M042_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M052_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M055_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M062_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M072_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M075_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M082_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M086_parcels_gdb.zip', 'C:\\Workspace\\Geodata\\Massachusetts\\TaxParcels\\M089_parcels_gdb.zip', 'C:\\Workspace\\Geodata\

In [5]:
# make a working copy 
copyfile = r"C:\Workspace\Geodata\Nload\outputs\MEP\MEP_Subwatersheds.shp"
original = r"C:\Workspace\Geodata\Massachusetts\MEP\CC_MV_Subwatersheds\Subwatersheds.shp"
arcpy.management.Copy(original, copyfile, "ShapeFile", None)

In [6]:
# dissolve the MEP subwatersheds data
outfile = os.path.join(odr,"MEP_Subwatersheds_Dissolve")
arcpy.management.Dissolve(copyfile, outfile, "FID", None, "MULTI_PART", "DISSOLVE_LINES")

make a new feature class for subwatershed travel time. 

In [None]:
# make a new feature class for subwatershed travel time. 
fn_string = """def fn_regex_search_0 (string,pattern,noneVal="NA"):
    '''
    returns the first match of a regular expression pattern search on a string
    '''
    import re
    x = re.search(pattern,string)
    if x is None: 
        x= [noneVal]    
    return(x[0])
    """
arcpy.management.CalculateField(copyfile,
                                "Travel_Tim",
                                "fn_regex_search_0(!SUBWATER_D!,'\wT10','NA')",
                                "PYTHON3",
                                fn_string, "TEXT", "NO_ENFORCE_DOMAINS")

make a new subwatershed name field that excludes travel time

In [6]:
# make a new subwatershed name field that excludes travel time
fn_string = """def fn_regex_search_replace(string,pattern,replacement):
    '''
    returns the a string with a pattern substituted by a replacement
    '''
    import re
    x = re.sub(pattern,replacement,string)
    return(x)"""
newField = "SUBW_NAME"
arcpy.management.CalculateField(copyfile,
                                newField,
                                """fn_regex_search_replace(!SUBWATER_N!,"\wT10.*","")""", 
                                "PYTHON3",
                                fn_string,
                                "TEXT",
                                "NO_ENFORCE_DOMAINS")

zonal stats to calc 5th percentile of elevation in each subcatchment

In [None]:
# clip land use for aquifer extent
arcpy.analysis.Clip(r"C:\Workspace\Geodata\Massachusetts\lclu_gdb\MA_LCLU2016.gdb\LANDCOVER_LANDUSE_POLY", "MEP_Subwatersheds", r"C:\Workspace\Geodata\Nload\outputs\MEP\LCLU2016_MEP_Clip", None)

In [1]:
raster ="LiDAR_DEM_INT_16bit"
mask = "MEP_Subwatersheds"
outname = "lidar_extr"
with arcpy.EnvManager(scratchWorkspace=r"C:\Workspace\Geodata\Nload\Default.gdb", workspace=r"C:\Workspace\Geodata\Nload\Default.gdb"):
    lidar_extr = arcpy.sa.ExtractByMask(, ); lidar_extr.save(os.path.join(odr,outname))

In [None]:
raster = "lidar_extr"
poly = "MEP_Subwatersheds"
zonefield = "SUBW_NAME"
outname = "lid_sub_zs"
with arcpy.EnvManager(scratchWorkspace=r"C:\Workspace\Geodata\Nload\Default.gdb", 
                      workspace=r"C:\Workspace\Geodata\Nload\Default.gdb"):
    Lid_Sub_ZS = arcpy.ia.ZonalStatistics(poly, zonefield, raster, "PERCENTILE", "DATA", "CURRENT_SLICE", 5, "AUTO_DETECT"); 
    Lid_Sub_ZS.save(os.path.join(odr,outname))

In [14]:
# raster calculator to 
a = "lidar_extr"
b = "Lid_Sub_ZS"
outname = "lidar_le5pct"
lidar_le5pct = arcpy.ia.RasterCalculator([a,b],
                                          ["a", "b"],
                                          "a<=b"); 
lidar_le5pct.save(os.path.join(odr,outname))

RuntimeError: Failed to apply Raster Function: 'RasterCalculator' (The parameter is incorrect. 
Parameter 'Rasters' is missing or invalid. Bind failed in function 'Raster Calculator Function' [Raster Calculator Function].)

In [18]:
a = os.path.join(odr,"lidar_extr")
b = os.path.join(odr,"lid_sub_zs")
print(a,b)

C:\Workspace\Geodata\Nload\outputs\MEP\lidar_extr C:\Workspace\Geodata\Nload\outputs\MEP\lid_sub_zs


In [21]:
with arcpy.EnvManager(scratchWorkspace=r"C:\Workspace\Geodata\Nload\Default.gdb", 
                      workspace=r"C:\Workspace\Geodata\Nload\Default.gdb"):
    lidar_le5pct = arcpy.ia.LessThanEqual(a,b); 
    lidar_le5pct.save(os.path.join(odr,"lidar_le5pct"))

In [15]:
# convert raster of lidar_le5pct to polygon
outfile = os.path.join(odr,"le5pct_poly.shp")
poly = arcpy.conversion.RasterToPolygon("lidar_le5pct", outfile, "SIMPLIFY", "VALUE", "SINGLE_OUTER_PART", None)

In [17]:
# rename the field gridcode 
arcpy.management.CalculateField(outfile, "ele5pct", "fn(!gridcode!)", "PYTHON3", """def fn(x):
    y = "GT5%"
    if x == 1: y = "LE5%"
    return(y)""", "TEXT", "NO_ENFORCE_DOMAINS")
#arcpy.management.AlterField(outfile, 'gridcode', 'ElevLE5pct', 'Elev <= 5% percentile')

In [18]:
# dissolve new polygon layer by elevation class 
arcpy.management.Dissolve(os.path.join(odr,"le5pct_poly"),
                          os.path.join(odr,"le5pct_poly_diss"), 
                          "ele5pct", None, "MULTI_PART", "DISSOLVE_LINES")

In [6]:
# dissolve subwatersheds by subwatershed name.
arcpy.management.Dissolve(os.path.join(odr,"MEP_subwatersheds"),
                          os.path.join(odr,"MEP_SUBW_NAME"), 
                          "SUBW_NAME", None, "MULTI_PART", "DISSOLVE_LINES")

ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000728: Field SUBW_NAME does not exist within table
Failed to execute (Dissolve).


In [19]:
# compute the identity (intersection) of elevation poly and watershed poly
infeat = os.path.join(odr,"le5pct_poly_diss")
identfeat = os.path.join(odr,"MEP_Subwatersheds")
outname = os.path.join(odr,"MEP_le5pct")
arcpy.analysis.Identity(infeat, identfeat, 
                        outname, "ALL", None, "NO_RELATIONSHIPS")

In [5]:
# compute the identity (intersection) of elevation poly and watershed poly
infeat = os.path.join(odr,"le5pct_poly_diss")
identfeat = os.path.join(odr,"MEP_SUBW_NAME")
outname = os.path.join(odr,"MEP_el5p_SUBW")
arcpy.analysis.Identity(infeat, identfeat, 
                        outname, "ALL", None, "NO_RELATIONSHIPS")

ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Identity Features: Dataset C:\Workspace\Geodata\Nload\outputs\MEP\MEP_SUBW_NAME does not exist or is not supported
Failed to execute (Identity).


make a new feature class subwatershed ids exluding travel time

make new sub watershed layer that combines subwatersheds that were split by travel time

In [None]:
# Make a point feature layer from monitoring coordinates
# Set the local variables
in_table = r"C:\Users\Adrian.Wiegman\OneDrive - USDA\Research\Nload\MEP\MEP_Summary4_AW.xlsx\Coords$"
#in_table = r"C:\Users\Adrian.Wiegman\OneDrive - USDA\Research\Nload\MEP\MEP_Monitoring_Site_Coords.csv"
out_feature_class = "MEP_Monitoring_Site_Coords"
x_coords = "Lon"
y_coords = "Lat"

# Make the XY event layer...
arcpy.management.XYTableToPoint(in_table=in_table, 
                                out_feature_class=out_feature_class,
                                x_field=x_coords, 
                                y_field=y_coords)

# Print the total rows
print(arcpy.management.GetCount(out_feature_class))
#arcpy.management.AddJoin(out_feature_class, "OBJECTID", r"C:\Users\Adrian.Wiegman\OneDrive - USDA\Research\Nload\MEP\MEP_Monitoring_Site_Coords.csv", "OID", "KEEP_ALL", "NO_INDEX_JOIN_FIELDS")

In [None]:
# Appendix

In [None]:
## Unused code snippets