#### Arcpy + Geopandas combination

In [1]:
#Must be opened from ArcGIS Python Command Prompt (juptyter lab)
# Must have signed into ArcGIS online? (run ArcGIS Pro)

import arcpy
from arcpy import env
from arcpy.sa import *

import pandas as pd
import geopandas as gpd

from pathlib import Path
from datetime import datetime

In [2]:
def renameShapeField(inshp,infieldname,outfieldname):
    '''renames a field in a shapefile using geopandas because arcpy doesn't support this.'''
    ingdf = gpd.read_file(inshp)
    ingdf.rename(columns={infieldname: outfieldname}, inplace=True)
    ingdf.to_file(inshp)
    print(f'Fieldname {infieldname} changed to {outfieldname} in shapefile.')

In [3]:
##inputs
#======================= Brandy 2019-2018
#output dir
demdiff_dir = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\demdiff19-18")
#DEMS
dem1 = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2018.tif")
dem2 = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2019.tif")
#Stable polygons
stablepolyshp = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\shp\Stable_poly_18-19_expanded.shp")
#Output dod name stem
dodnamestem = r"DoD_19-18"
detrendnamestem = r"Detrend_19-18_polyn"
#region
aoi = 'brandy'

# #======================= Brandy 2020-2019
# #output dir
# demdiff_dir = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\demdiff20-19")
# #DEMS
# dem1 = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2019.tif")
# dem2 = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2020.tif")
# #Stable polygons
# stablepolyshp = Path(r"D:\Whiskeytown\dem_diff\brandy_creek\shp\Stable_poly_19-20_expanded.shp")
# #Output dod name stem
# dodnamestem = r"DoD_20-19"
# detrendnamestem = r"Detrend_20-19_polyn"
# #region
# aoi = 'brandy'

# #======================= Boulder 2019-2018
# #output dir
# demdiff_dir = Path(r"D:\Whiskeytown\dem_diff\boulder_creek\demdiff19-18")
# #DEMS
# dem1 = Path(r"D:\Whiskeytown\dem_diff\boulder_creek\dem\original\boulder_dem2018.tif")
# dem2 = Path(r"D:\Whiskeytown\dem_diff\boulder_creek\dem\original\boulder_dem2019.tif")
# #Stable polygons
# stablepolyshp = Path(r"D:\Whiskeytown\dem_diff\boulder_creek\shp\Stable_poly_18-19.shp")
# #Output dod name stem
# dodnamestem = r"DoD_19-18"
# detrendnamestem = r"Detrend_19-18_polyn"
# #region
# aoi = 'boulder'

In [6]:
#Set standard output dir and create dir
outdod_dir = Path(demdiff_dir, r"dod")
outdod_unadj_dir  = Path(demdiff_dir, r"dod\unadj")
outdod_adj_dir  = Path(demdiff_dir, r"dod\adj")
outdod_pt_dir  = Path(demdiff_dir, r"dod\shp")
outtrendraster_dir = Path(demdiff_dir, r"detrend")
outtrend_pt_dir = Path(demdiff_dir, r"detrend\shp")
outcoregdem_dir = Path(demdiff_dir, r"co-reg-dem")
scratch_dir = Path(demdiff_dir, r"arcpyscratch")

#create parent if doesn't exist
demdiff_dir.mkdir(parents=True, exist_ok=True)
#create subdir if don't exist
for direc in [outdod_dir, outdod_unadj_dir, outdod_adj_dir, outdod_pt_dir, outtrendraster_dir, outtrend_pt_dir, outcoregdem_dir, scratch_dir]:
    direc.mkdir(parents=True, exist_ok=True)



In [7]:
# Set environment settings
arcpy.env.workspace = str(scratch_dir)
arcpy.env.overwriteOutput = True
arcpy.env.compression = "LZW"

env.extent = str(dem1)
env.snapRaster = str(dem1)

# Check out the ArcGIS Spatial Analyst extension license
arcpy.CheckOutExtension("Spatial");

### 1. Make Unadjusted DoD 
(DEM2 - DEM1 = Unadjusted_DoD)

In [8]:
#Make unadjusted DoD
print('Creating unadjusted DoD ...')
outunadjdod = Path(outdod_unadj_dir,dodnamestem + '_' + aoi + r'_unadj.tif')

dod = RasterCalculator([str(dem1), str(dem2)], ["dem1", "dem2"],
                                       "dem2-dem1", "FirstOf", "FirstOf")

dod.save(str(outunadjdod))


Creating unadjusted DoD ...


### 2. Create point shapefile of unadjusted DoD, then clip with stable area polygons 


In [9]:
#Convert DoD to point shapefile, then clip with stable polygons and rename field to 'dod_unadj'
#inputs
outstablept_shp = str(Path(outdod_pt_dir, outunadjdod.stem + r'_stable_pts.shp')) #same as DoD, but '_stable_pts.shp')

#covert to points scratchfile
print('Coverting DoD to points ...')
arcpy.RasterToPoint_conversion(dod, r"memory\tempRasPt", "VALUE");
#clip scratchfile
print('Clipping points to stable areas ...')
arcpy.Clip_analysis(r"memory\tempRasPt", str(stablepolyshp), r"memory\tempRasPtClip", "");
#get polygon "type (paved, unpaved)" onto att table via spatial join
print('Spatial join to get polygon type attribute to points ...')
arcpy.analysis.SpatialJoin(r"memory\tempRasPtClip", str(stablepolyshp), outstablept_shp);

#rename field
print('Renaming field in point shapefile ...')
renameShapeField(outstablept_shp,'grid_code','dod_unadj')


Coverting DoD to points ...


ModuleNotFoundError: No module named 'XTools'

Clipping points to stable areas ...
Spatial join to get polygon type attribute to points ...
Renaming field in point shapefile ...
Fieldname grid_code changed to dod_unadj in shapefile.


#### 3. Calculate Error Trend Surfaces using residual values in stable area polygons (which should be zero) 
- Polynomial order 0 = uniform correction (mean of residuals)
- Polynomial order 1 = sloped plane
- Polynomial order 2 = quadratic surface
- Polynomial order 3 = cubic surface
- Polynomial order 4 = quartic surface


In [10]:
#Calculate trend surface using all stable points (polynomial 0, 1 and 2) (and 3, 4?)

#Inputs/outputs
stablept_shp = outstablept_shp #(from cell above)
outtrendrasterstem = str(Path(outtrendraster_dir, detrendnamestem))

zField = "dod_unadj"
cellSize = 0.25
# PolynomialOrder = 2 (set in loop)
regressionType = "LINEAR"

for polyn_order in [0, 1, 2, 3, 4]:
    # Execute Trend
    print(f'Creating trend surface polynomial order {polyn_order}...')
    outTrend = Trend(stablept_shp, zField, cellSize, 
                     polyn_order, regressionType)
    outTrend.save(outtrendrasterstem + str(polyn_order) + ".tif")


Creating trend surface polynomial order 0...
Creating trend surface polynomial order 1...
Creating trend surface polynomial order 2...
Creating trend surface polynomial order 3...
Creating trend surface polynomial order 4...


#### 4. Apply Error Trend surfaces to DoD to create Adjusted DoD for visual comparisons.
Adjustment order is:  
 DoD - TrendRaster = AdjustedDoD
   
This is equivalent to adjusting DEM1:  
 DEM2 - DEM1 - TrendRaster = AdjustedDoD
 
Since  
TrendRaster = DoD residual which should be zero.   

For example:  
DEM2 = 3, DEM1 = 5, DoD = -2, TrendRaster = -2  
AdjustedDoD =  DEM2 - DEM1 - TrendRaster = 0  
             =  3 - 5 -(-2) = 0  
             =  DoD - TrendRaster = 0  
             =  -2 -(-2) = 0 


In [11]:
# '''
# Apply to DoD to evaluate visually
# Adjustment order is:
#     DoD - TrendRaster = AdjustedDoD
# This is equivalent to adjusting DEM1:
#      DEM2 - DEM1 - TrendRaster = AdjustedDoD
# Since TrendRaster = DoD residual which should be zero.
# For example:
# DEM2 = 3, DEM1 = 5, DoD = -2, TrendRaster = -2
# AdjustedDoD =  DEM2 - DEM1 - TrendRaster = 0
#             =  3 - 5 -(-2) = 0
#             =  DoD - TrendRaster = 0
#             =  -2 -(-2) = 0
# '''

#inputs/outputs
inunadjdod = str(outunadjdod) #from above cell
intrendrasterstem = outtrendrasterstem #from cell above
outadjdodstem = str(Path(outdod_adj_dir, dodnamestem + '_' + aoi + r'_adj_polyn_'))

#loop through detrend surfaces and apply to DoD
for polyn_order in [0,1,2,3,4]:
    #apply adjustment
    trendraster = intrendrasterstem + str(polyn_order) + '.tif'
    adj = RasterCalculator([inunadjdod, trendraster], ["dod", "trendraster"],
                                       "dod-trendraster", "FirstOf", "FirstOf")
    adj.save(outadjdodstem + str(polyn_order) + '.tif')
    

### 5. Sanity check!  Make sure that ErrorTrendSurface is being correctly applied to DoD.

In [12]:
#Sanity check to double check that DoD error correction is being done with teh right signs
PolynomialOrderSurface = 3

#inputs
intrendrasterstem = outtrendrasterstem #from cell above
trendraster = intrendrasterstem + str(PolynomialOrderSurface) + '.tif'
print(f'DEM1: {dem1}\n'
      f'DEM2: {dem2}\n'
      f'ErrorTrendRaster: {trendraster}\n\n'
      f'DoD = DEM2 - DEM1\n')

#Adjust DoD with trend raster
print(f'Method1:\n'
      f'AdjustedDoD = UnadjustedDoD - ErrorTrendRaster\n')
unadjdod = RasterCalculator([str(dem1), str(dem2)], ["dem1", "dem2"],
                                       "dem2 - dem1", "FirstOf", "FirstOf")
adjdod = RasterCalculator([unadjdod, trendraster], ["dod", "trendraster"],
                                       "dod - trendraster", "FirstOf", "FirstOf")
#Alternatively, Co-Register DEM1 to DEM2 using ErrorTrendRaster, then derive adjusted DoD.  
#Should be identical
print(f'Method2 (should be identical to Method1): \n'
      f'Co-RegisteredDEM1 = DEM1 + ErrorTrendRaster\n'
      f'AltAdjustedDoD = DEM2 - Co-RegisteredDEM1\n')
coregdem1 = RasterCalculator([str(dem1), trendraster], ["dem1", "trendraster"],
                                       "dem1 + trendraster", "FirstOf", "FirstOf")
altadjdod = RasterCalculator([coregdem1, str(dem2)], ["coregdem1", "dem2"],
                                       "dem2 - coregdem1", "FirstOf", "FirstOf")

doddiff = RasterCalculator([adjdod, altadjdod], ["adjdod", "altadjdod"],
                                       "adjdod - altadjdod", "FirstOf", "FirstOf")

#write output to get statistics
doddiff.save(arcpy.env.workspace + '\DoDDiffSanityCheck.tif')
print(f'Difference between Method1 and Method2 rasters (should be zero)')
#Get stats of doddiff
for stat in ['MEAN', 'STD', 'MAXIMUM', 'MINIMUM']:
    doddiff_result = arcpy.management.GetRasterProperties(arcpy.env.workspace + '\DoDDiffSanityCheck.tif', stat);
    outstat = doddiff_result.getOutput(0);
    print(f'{stat} = {str(outstat)}')

#clean up
arcpy.management.Delete(arcpy.env.workspace + '\DoDDiffSanityCheck.tif');


DEM1: D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2018.tif
DEM2: D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2019.tif
ErrorTrendRaster: D:\Whiskeytown\dem_diff\brandy_creek\demdiff19-18\detrend\Detrend_19-18_polyn3.tif

DoD = DEM2 - DEM1

Method1:
AdjustedDoD = UnadjustedDoD - ErrorTrendRaster

Method2 (should be identical to Method1): 
Co-RegisteredDEM1 = DEM1 + ErrorTrendRaster
AltAdjustedDoD = DEM2 - Co-RegisteredDEM1

Difference between Method1 and Method2 rasters (should be zero)
MEAN = 0
STD = 0
MAXIMUM = 0
MINIMUM = 0


### 6. Decide which trend surface to use

In [22]:
#Extract residual values from each adjusted DoD trend surface and evaluate bulk points

stablept_shp = str(Path(outdod_pt_dir, outunadjdod.stem + r'_stable_pts.shp')) #same as DoD, but '_pts.shp')
inadjdodstem = str(Path(outdod_adj_dir, dodnamestem + '_' + aoi + r'_adj_polyn_'))
outdetrendevalptshp = str(Path(outtrend_pt_dir, Path(outdod_pt_dir, outunadjdod.stem + r'_pts.shp') .stem + r'_detrend_eval.shp'))

#copy stable points to eval file
arcpy.Copy_management(stablept_shp, outdetrendevalptshp);

#derive list of adjusted dods and output fieldnames for multi-raster extract
inras_outfield_list = [[inadjdodstem + str(n) + '.tif', 'poly' + str(n) + 'resid'] for n in [0,1,2,3,4]]

#extract adjusted dod residual values to points
ExtractMultiValuesToPoints(outdetrendevalptshp, inras_outfield_list, "NONE")

#read into gdf to evaluate
tempgdf = gpd.read_file(outdetrendevalptshp)

#evaluate paved/unpaved
pavedgdfstat = tempgdf[tempgdf['type'] == 'paved'].drop(columns=['geometry','pointid', 'TARGET_FID','Join_Count','id']).describe([.025, .25, .5, .75, .975])
pavedgdfstat['type'] = 'paved'
pavedgdfstat['stat'] = pavedgdfstat.index
unpavedgdfstat = tempgdf[tempgdf['type'] == 'unpaved'].drop(columns=['geometry','pointid', 'TARGET_FID','Join_Count','id']).describe([.025, .25, .5, .75, .975])
unpavedgdfstat['type'] = 'unpaved'
unpavedgdfstat['stat'] = unpavedgdfstat.index
tempgdfstat = tempgdf.drop(columns=['geometry','pointid', 'TARGET_FID','Join_Count','id']).describe([.025, .25, .5, .75, .975])
tempgdfstat['type'] = 'all'
tempgdfstat['stat'] = tempgdfstat.index

dfstat = pd.concat([tempgdfstat, pavedgdfstat, unpavedgdfstat], ignore_index=True)

# Evaluate bulk residual values and write to file
dfstat.to_csv(Path(outtrendraster_dir, detrendnamestem + r'bulk_pt_adjustment_eval.csv'))

# Evaluate bulk residual values grouped by polygon and write to file
tempgdf.groupby(['id'])['dod_unadj','poly0resid','poly1resid','poly2resid','poly3resid','poly4resid'].agg(['mean','std']).describe([.025, .25, .5, .75, .975]).to_csv(
    Path(outtrendraster_dir, detrendnamestem + r'bulk_pt_adjustment_bypolygon_eval.csv'))

print(dfstat)


        dod_unadj    poly0resid    poly1resid     poly2resid     poly3resid  \
0   336555.000000  3.365550e+05  3.365550e+05  336555.000000  336555.000000   
1       -0.183162  1.779189e-09  4.230353e-09      -0.000147      -0.000056   
2        0.212388  2.123879e-01  1.908409e-01       0.181654       0.181654   
3       -6.121185 -5.938023e+00 -5.933700e+00      -5.915316      -5.915233   
4       -0.634526 -4.513637e-01 -3.619353e-01      -0.349134      -0.349045   
5       -0.275787 -9.262498e-02 -9.024726e-02      -0.084849      -0.084756   
6       -0.149628  3.353469e-02  1.134843e-03       0.006973       0.007062   
7       -0.059570  1.235921e-01  9.501396e-02       0.090649       0.090741   
8        0.137848  3.210103e-01  3.674790e-01       0.327802       0.327895   
9        0.970703  1.153866e+00  1.291804e+00       1.191322       1.191415   
10  264783.000000  2.647830e+05  2.647830e+05  264783.000000  264783.000000   
11      -0.166343  1.681918e-02  4.446373e-03      -



In [23]:
#Trend surface chosen

#For Brandy Creek 2018-2019
SelectedPolynomialOrderSurface = 3  #Based on visual inspection of trend surface and comparison of Adjusted DoDs

# #For Boulder Creek 2018-2019
# SelectedPolynomialOrderSurface = 3  #Based on visual inspection of trend surface and comparison of Adjusted DoDs

### 7. Make co-registered DEMs for archival

In [24]:
#Apply Error Trend Surface to DEM1 Create co-registered DEM for archiving
PolynomialOrderSurface = SelectedPolynomialOrderSurface

#inputs
intrendrasterstem = outtrendrasterstem #from cell above
trendraster = intrendrasterstem + str(PolynomialOrderSurface) + '.tif'
print(f'DEM to be coregistered, DEM1: {dem1}\n'
      f'ErrorTrendRaster: {trendraster}\n\n'
      f'Co-RegisteredDEM1 = DEM1 + ErrorTrendRaster\n')

#Copy DEM2 (unaltered) to directory
arcpy.Copy_management(str(dem2), str(Path(outcoregdem_dir , str(dem2.name))));


#Co-Register DEM1 to DEM2 using ErrorTrendRaster
coregdem1 = RasterCalculator([str(dem1), trendraster], ["dem1", "trendraster"],
                                       "dem1 + trendraster", "FirstOf", "FirstOf")

#write output 
coregdem1.save(str(Path(outcoregdem_dir , str(dem1.stem) + '_coreg_errortrendpolyn_' + str(PolynomialOrderSurface) + '.tif')))
print(f'{dem2.name} copied to {str(Path(outcoregdem_dir , str(dem2.name)))}\n')
outcoregpath = str(Path(outcoregdem_dir , str(dem1.stem) + '_coreg_errortrendpolyn_' + str(PolynomialOrderSurface) + '.tif'))
print(f'Co-registered {dem1.name} copied to {outcoregpath}')



DEM to be coregistered, DEM1: D:\Whiskeytown\dem_diff\brandy_creek\dems\orig\brandy_dem2018.tif
ErrorTrendRaster: D:\Whiskeytown\dem_diff\brandy_creek\demdiff19-18\detrend\Detrend_19-18_polyn3.tif

Co-RegisteredDEM1 = DEM1 + ErrorTrendRaster

brandy_dem2019.tif copied to D:\Whiskeytown\dem_diff\brandy_creek\demdiff19-18\co-reg-dem\brandy_dem2019.tif

Co-registered brandy_dem2018.tif copied to D:\Whiskeytown\dem_diff\brandy_creek\demdiff19-18\co-reg-dem\brandy_dem2018_coreg_errortrendpolyn_3.tif


In [25]:
# Subdivide stable polygons to use as test/train sets for residual error evaluation, then get poly and subdiv poly id onto points via intersection

#inputs
# target_subpoly_area = 9 # (9 = 144 points)size of subpolygons in m^2 (too small may result in overfitting of trend surface to validation points?)
# target_subpoly_area = 100 # (100 = 1600 pts) size of subpolygons in m^2(too small may result in overfitting of trend surface to validation points?
target_subpoly_area = 500 #larger polygons to  avoid overfitting of trend surface to validation points?
#points
stablept_shp = str(Path(outdod_pt_dir, outunadjdod.stem + r'_stable_pts.shp')) #same as DoD, but '_pts.shp')
outpointswithsubpolyid_str =  str(Path(outdod_pt_dir, outunadjdod.stem + r'_stable_pts' + str(target_subpoly_area) + r'm_subpolyid.shp'))

#polygons

stablepolyshp_str = str(stablepolyshp)
outstablepolysubdivided_str = str(Path(str(stablepolyshp.with_suffix('')) + '_' + str(target_subpoly_area) + 'm.shp'))

#subdivide polygon
print('Subdividing polygons ...')
arcpy.SubdividePolygon_management(
    stablepolyshp_str, outstablepolysubdivided_str, "EQUAL_AREAS","", target_subpoly_area, "", "", 
    "STACKED_BLOCKS");

#add subdiv poly id to poly attr table
polygdf = gpd.read_file(outstablepolysubdivided_str)
polygdf['subpolyid'] = polygdf.index
polygdf.to_file(outstablepolysubdivided_str)

#spatial join stable points with subdiv polygons to get polyid and subdividedpolyid on points for later test/train filter.
print('Joining sub-polygon id to stable points ...')
ptgdf = gpd.read_file(stablept_shp)

ptwithpolygdf = gpd.sjoin(ptgdf, polygdf, how="left", op='intersects')
ptwithpolygdf.drop(columns=['id_right', 'type_right','index_right'], inplace=True)
ptwithpolygdf.rename(columns = {'id_left': 'id', 'type_left': 'type'}, inplace=True)

#write output shp
ptwithpolygdf.to_file(outpointswithsubpolyid_str)

print(f'Subpolygons written to: \n     {outstablepolysubdivided_str}')
print(f'Stable points with sub-polygon id written to: \n     {outpointswithsubpolyid_str}')

Subdividing polygons ...
Joining sub-polygon id to stable points ...
Subpolygons written to: 
     D:\Whiskeytown\dem_diff\brandy_creek\shp\Stable_poly_18-19_expanded_500m.shp
Stable points with sub-polygon id written to: 
     D:\Whiskeytown\dem_diff\brandy_creek\demdiff19-18\dod\shp\DoD_19-18_brandy_unadj_stable_pts500m_subpolyid.shp


### Evaluate Residual Error by splitting original polygons into test/train subsets
- Withhold one polygon at a time for test set
- Use remaining polygons to generate trend surface
- Evaluate using withheld test polygon
- Iterate for each polygon, then generate stats

In [26]:
#Evaluate Residual Error by splitting original polygons into test/train subsets, withholding one polygon at a time for test.
#Create new trend surface with train subset, evaluate residual offset with test subset and write output to csv

#order of polynomial for trend surface
PolynomialOrder = SelectedPolynomialOrderSurface

#Iterate and do test/train splits to evaluate by subpolygon
print(datetime.now().strftime("%Y/%d/%m %H:%M:%S"))    

#inputs
#stable pooint shapefile with polygon "id" field for filtering
stablept_shp = str(Path(outdod_pt_dir, outunadjdod.stem + r'_stable_pts.shp')) #same as DoD, but '_pts.shp')
#stable polygons, not subdivided
stablepoly_str = str(stablepolyshp) #from input cell above

#load points with subpoly id into gdf to be filtered
ptwithpolygdf = gpd.read_file(stablept_shp)

#trend surface details
zField = "dod_unadj"
cellSize = 0.25
regressionType = "LINEAR"

#result df
resultsdf = pd.DataFrame(columns = ['bulk_pt_mean','bulk_pt_std','all_poly_mean','all_poly_std','paved_poly_mean','paved_poly_std','unpaved_poly_mean','unpaved_poly_std'])
# #percent of poly to use for train 
# trainfrac = 0.6

#read stable poly
stablepolygdf = gpd.read_file(stablepoly_str)


for i in range(len(stablepolygdf)):
    #clean up scratch dir first (leave results of final iteration in scratch to let user see them)
    for fn in [r"\temp_trainpoly.shp", r"\temp_valpoly.shp", r"\temp_trainpoint.shp", r"\temp_valpoint.shp", r"\temp_valpoint_withdetrend.shp",r"\temp_trendraster.tif"]:
        arcpy.management.Delete(arcpy.env.workspace + fn)
    print(f'Beginning iteration {str(i)} of {str(len(stablepolygdf))} ...')
    
    #Move one polygon at a time into test data set
    trainpolygdf = stablepolygdf.drop([i])
    valpolygdf = stablepolygdf.iloc[i]
    
    #use polyid to select train/val points
    trainptgdf = ptwithpolygdf[ptwithpolygdf['id'].isin(trainpolygdf['id'].tolist())]
    valptgdf = ptwithpolygdf[ptwithpolygdf['id'] == valpolygdf['id']]
    
    #write to temp
    trainptgdf.to_file(arcpy.env.workspace + r"\temp_trainpoint.shp")
    valptgdf.to_file(arcpy.env.workspace + r"\temp_valpoint.shp")
    
    #create trend surface
    # Execute Trend
    print('Creating trend surface...')
    outTrend = Trend(arcpy.env.workspace + r"\temp_trainpoint.shp", zField, cellSize, 
                     PolynomialOrder, regressionType)
    outTrend.save(arcpy.env.workspace + r"\temp_trendraster.tif")
    
    #Sample raster on validate points
    print('Sampling trend surface...')
    ExtractValuesToPoints(arcpy.env.workspace + r"\temp_valpoint.shp", 
                          arcpy.env.workspace + r"\temp_trendraster.tif", 
                          arcpy.env.workspace + r"\temp_valpoint_withdetrend.shp",
                          "NONE", "VALUE_ONLY")
    
    #read into gdf to eval
    print('Evaluating trend surface...')
    evalgdf = gpd.read_file(arcpy.env.workspace + r"\temp_valpoint_withdetrend.shp")
    #apply to DoD value (dod_unadj - trendRasterValue)
    evalgdf['resid'] = evalgdf['dod_unadj'] - evalgdf['RASTERVALU']
    
    #gather residuals grouped by polygons
    data = {'bulk_pt_mean': [evalgdf['resid'].mean()],
            'bulk_pt_std': [evalgdf['resid'].std()],
            'all_poly_mean': [evalgdf.groupby('id')['resid'].mean().mean()],
            'all_poly_std': [evalgdf.groupby('id')['resid'].mean().std()],
            'paved_poly_mean': [evalgdf[evalgdf['type'] == 'paved'].groupby('id')['resid'].mean().mean()],
            'paved_poly_std': [evalgdf[evalgdf['type'] == 'paved'].groupby('id')['resid'].mean().std()],
            'unpaved_poly_mean': [evalgdf[evalgdf['type'] == 'unpaved'].groupby('id')['resid'].mean().mean()],
            'unpaved_poly_std': [evalgdf[evalgdf['type'] == 'unpaved'].groupby('id')['resid'].mean().std()]
           }
    resultsdf = resultsdf.append(pd.DataFrame(data, 
                                              columns = ['bulk_pt_mean','bulk_pt_std','all_poly_mean','all_poly_std','paved_poly_mean','paved_poly_std','unpaved_poly_mean','unpaved_poly_std']), 
                                 ignore_index=True)
    
    
    print(datetime.now().strftime("%Y/%d/%m %H:%M:%S"))  
    
    #write intermediate output, overwrite to save progress
    resultsdf.to_csv(Path(outtrendraster_dir, detrendnamestem + r'_order_' + str(PolynomialOrder) + 
                                  r'_test-train_eval_by_whole_polygon.csv'))
    resultsdf.describe([.025, .25, .5, .75, .975]).to_csv(Path(outtrendraster_dir, detrendnamestem + r'_order_' + str(PolynomialOrder) + 
                                  r'_test-train_eval_by_whole_polygon_summary_stats.csv'))



2021/03/02 13:23:42
Beginning iteration 0 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 13:31:43
Beginning iteration 1 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 13:38:57
Beginning iteration 2 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 13:45:39
Beginning iteration 3 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 13:52:21
Beginning iteration 4 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 13:59:14
Beginning iteration 5 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 14:05:54
Beginning iteration 6 of 13 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/03/02 14:12:59
Beginning iteration 7 of 13 ...
Creating trend surface..

### Evaluate Residual Error by splitting subdivided polygons into test/train subsets.
- Withhold 40% of polygons for test set
- Use remaining 60% of polygons to generate trend surface
- Evaluate residual using withheld subpolygons
- Iterate 100 times, then generate stats

In [13]:
#Evaluate Residual Error by splitting subdivided polygons into test/train subsets.
#Create new trend surface with train subset, evaluate residual offset with test subset
#group evaluations by both "subpoly" and "whole poly", and write output to csv

#order of polynomial for trend surface
PolynomialOrder = SelectedPolynomialOrderSurface

#Iterate and do test/train splits to evaluate by subpolygon
print(datetime.now().strftime("%Y/%d/%m %H:%M:%S"))    
#number of iterations (~2-3 minutes each?)
num_loops = 100

#inputs
pointswithsubpolyid = outpointswithsubpolyid_str #from cell above
stablepolysubdivided = outstablepolysubdivided_str #from cell above

#load points with subpoly id into gdf to be filtered
ptwithpolygdf = gpd.read_file(pointswithsubpolyid)

#trend surface details
zField = "dod_unadj"
cellSize = 0.25
regressionType = "LINEAR"

#result df
resultsdf_subpoly = pd.DataFrame(columns = ['bulk_pt_mean','bulk_pt_std','all_poly_mean','all_poly_std',
                                            'paved_poly_mean','paved_poly_std','unpaved_poly_mean','unpaved_poly_std'])
resultsdf_poly = pd.DataFrame(columns = ['bulk_pt_mean','bulk_pt_std','all_poly_mean','all_poly_std',
                                         'paved_poly_mean','paved_poly_std','unpaved_poly_mean','unpaved_poly_std'])
#percent of poly to use for train 
trainfrac = 0.6

for i in range(num_loops):
    #clean up scratch dir first (leave results of final iteration in scratch to let user see them)
    for fn in [r"\temp_trainpoly.shp", r"\temp_valpoly.shp", r"\temp_trainpoint.shp", r"\temp_valpoint.shp", 
               r"\temp_valpoint_withdetrend.shp",r"\temp_trendraster.tif"]:
        arcpy.management.Delete(arcpy.env.workspace + fn)
    print(f'Beginning iteration {i} ...')
    #subset poly to test/train
    arcpy.ga.SubsetFeatures(stablepolysubdivided, 
                            arcpy.env.workspace + r"\temp_trainpoly.shp", 
                            arcpy.env.workspace + r"\temp_valpoly.shp", 60, "PERCENTAGE_OF_INPUT")
    #load each to get poly id
    trainpolygdf = gpd.read_file(arcpy.env.workspace + r"\temp_trainpoly.shp")
    valpolygdf = gpd.read_file(arcpy.env.workspace + r"\temp_valpoly.shp")
    
    #use subpolyid to select train/val points
    trainptgdf = ptwithpolygdf[ptwithpolygdf['subpolyid'].isin(trainpolygdf['subpolyid'].tolist())]
    valptgdf = ptwithpolygdf[ptwithpolygdf['subpolyid'].isin(valpolygdf['subpolyid'].tolist())]
    
    #write to temp
    trainptgdf.to_file(arcpy.env.workspace + r"\temp_trainpoint.shp")
    valptgdf.to_file(arcpy.env.workspace + r"\temp_valpoint.shp")
    
    #create trend surface
    # Execute Trend
    print('Creating trend surface...')
    outTrend = Trend(arcpy.env.workspace + r"\temp_trainpoint.shp", zField, cellSize, 
                     PolynomialOrder, regressionType)
    outTrend.save(arcpy.env.workspace + r"\temp_trendraster.tif")
    
    #Sample raster on validate points
    print('Sampling trend surface...')
    ExtractValuesToPoints(arcpy.env.workspace + r"\temp_valpoint.shp", 
                          arcpy.env.workspace + r"\temp_trendraster.tif", 
                          arcpy.env.workspace + r"\temp_valpoint_withdetrend.shp",
                          "NONE", "VALUE_ONLY")
    
    #read into gdf to eval
    print('Evaluating trend surface...')
    evalgdf = gpd.read_file(arcpy.env.workspace + r"\temp_valpoint_withdetrend.shp")
    #apply to DoD value (dod_unadj - trendRasterValue)
    evalgdf['resid'] = evalgdf['dod_unadj'] - evalgdf['RASTERVALU']
    
    #gather residuals grouped by subpolygons
    data_subpoly = {'bulk_pt_mean': [evalgdf['resid'].mean()],
            'bulk_pt_std': [evalgdf['resid'].std()],
            'all_poly_mean': [evalgdf.groupby('subpolyid')['resid'].mean().mean()],
            'all_poly_std': [evalgdf.groupby('subpolyid')['resid'].mean().std()],
            'paved_poly_mean': [evalgdf[evalgdf['type'] == 'paved'].groupby('subpolyid')['resid'].mean().mean()],
            'paved_poly_std': [evalgdf[evalgdf['type'] == 'paved'].groupby('subpolyid')['resid'].mean().std()],
            'unpaved_poly_mean': [evalgdf[evalgdf['type'] == 'unpaved'].groupby('subpolyid')['resid'].mean().mean()],
            'unpaved_poly_std': [evalgdf[evalgdf['type'] == 'unpaved'].groupby('subpolyid')['resid'].mean().std()]
           }
    resultsdf_subpoly = resultsdf_subpoly.append(pd.DataFrame(data_subpoly, 
                                              columns = ['bulk_pt_mean','bulk_pt_std','all_poly_mean','all_poly_std',
                                                         'paved_poly_mean','paved_poly_std','unpaved_poly_mean','unpaved_poly_std']), 
                                 ignore_index=True)
    
    #gather residuals grouped by entire polygons
    data_poly = {'bulk_pt_mean': [evalgdf['resid'].mean()],
            'bulk_pt_std': [evalgdf['resid'].std()],
            'all_poly_mean': [evalgdf.groupby('id')['resid'].mean().mean()],
            'all_poly_std': [evalgdf.groupby('id')['resid'].mean().std()],
            'paved_poly_mean': [evalgdf[evalgdf['type'] == 'paved'].groupby('id')['resid'].mean().mean()],
            'paved_poly_std': [evalgdf[evalgdf['type'] == 'paved'].groupby('id')['resid'].mean().std()],
            'unpaved_poly_mean': [evalgdf[evalgdf['type'] == 'unpaved'].groupby('id')['resid'].mean().mean()],
            'unpaved_poly_std': [evalgdf[evalgdf['type'] == 'unpaved'].groupby('id')['resid'].mean().std()]
           }
    resultsdf_poly = resultsdf_poly.append(pd.DataFrame(data_poly, 
                                              columns = ['bulk_pt_mean','bulk_pt_std','all_poly_mean','all_poly_std',
                                                         'paved_poly_mean','paved_poly_std','unpaved_poly_mean','unpaved_poly_std']), 
                                 ignore_index=True)
    
    
    print(datetime.now().strftime("%Y/%d/%m %H:%M:%S"))   
        
    #write intermediate output, overwrite to save progress
    resultsdf_subpoly.to_csv(Path(outtrendraster_dir, detrendnamestem + r'_order_' + str(PolynomialOrder) + 
                                  r'_adjustment_eval_by_' + str(target_subpoly_area) + r'm_polygon.csv'))
    resultsdf_subpoly.describe([.025, .25, .5, .75, .975]).to_csv(Path(outtrendraster_dir, detrendnamestem + r'_order_' + str(PolynomialOrder) + 
                                     r'_adjustment_eval_by_' + str(target_subpoly_area) + r'm_polygon_summary_stats.csv'))
    
    resultsdf_poly.to_csv(Path(outtrendraster_dir, detrendnamestem + r'_order_' + str(PolynomialOrder) + r'_adjustment_eval_by_whole_polygon.csv'))
    resultsdf_poly.describe([.025, .25, .5, .75, .975]).to_csv(Path(outtrendraster_dir, detrendnamestem + r'_order_' + str(PolynomialOrder) + 
                                     r'_adjustment_eval_by_whole_polygon_summary_stats.csv'))



2021/02/02 21:52:42
Beginning iteration 0 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:01:11
Beginning iteration 1 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:09:58
Beginning iteration 2 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:18:48
Beginning iteration 3 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:27:21
Beginning iteration 4 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:35:50
Beginning iteration 5 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:44:37
Beginning iteration 6 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend surface...
2021/02/02 22:53:10
Beginning iteration 7 ...
Creating trend surface...
Sampling trend surface...
Evaluating trend sur

PermissionError: [Errno 13] Permission denied: 'D:\\Whiskeytown\\dem_diff\\boulder_creek\\demdiff19-18\\detrend\\Detrend_19-18_polyn_order_3_adjustment_eval_by_500m_polygon_summary_stats.csv'