This script takes exlu feature classes (derived from the Leon County Property Appraiser annual parcel database update) from 2009 onward & converts them into hexbins with various attributes derived from parcel attributes. Each year's exlu is converted into a new hexbin feature class, and then all hexbin feature classes are combined into one feature class. 

author: Cherie Bryant for Geog 778 (U of Wisconsin-Madison Cartography)

NEED TO FIGURE OUT HOW TO HANDLE WHEN A CELL GOES FROM 0 UNITS TO MORE; RIGHT NOW THE PERCENT CHANGE EXPRESSION THROWS OUT CELLS WHERE THE PRIOR YEAR IS 0 SO AS NOT TO CAUSE A DIVISION BY ZERO ERROR

**SETUP WORKSPACE**

In [None]:
import arcpy
# import pandas as pd
# import geopandas as gpd


# set initial workspace
arcpy.env.workspace = r"C:\Users\cheri\Documents\geog778\ResidentialUnitTimeAnalysis\ResidentialUnitTimeAnalysis.gdb"

# tell python it's OK to overwrite previous versions of layers & feature classes
arcpy.env.overwriteOutput = True

# manually set the year since we'll run each year individually
# doing this since each run takes so long & this allows capture of any errors specific to a particular year; plus in the future, will only need to run Part 1 for the new year data
yr = '2022'
prior_yr = '2021'
base_yr = '2009'

# copy exlu{yr} into a temporary feature layer 
temp_exlu_lyr = f'C:/Users/cheri/Documents/geog778/ResidentialUnitTimeAnalysis/ResidentialUnitTimeAnalysis.gdb/original_exlu_FCs/exlu_{yr}'

# copy exlu{yr} into a temporary feature layer 
# temp_exlu_lyr = f'exlu_{yr}'


Step 1: Pre-process exlu

In [None]:
#  convert to equal area projection - tests showed without conversion, hexagon bins were sizes within .000001 acre

In [None]:
# RENAME THE  HX{yr} FIELD TO  'HX'

# starting in 2010 this wasn't necessary - field is just 'HX' to start; picks up again 2018-2019, changes to HZ in 2020

new_field_name = 'HX'
new_field_alias = 'HX'

# get a list of the fields
fieldList = arcpy.ListFields(temp_exlu_lyr)

for field in fieldList:
    if field.name.startswith('HX') or field.name.startswith('HZ'):
        arcpy.management.AlterField(temp_exlu_lyr, field.name, new_field_name, new_field_alias)

In [None]:
# KEEP ONLY THE NECESSARY FIELDS

in_table = temp_exlu_lyr
fields = ['resunits', 'PYR_MARKET', 'PYR_TAXES', 'PRICE_S1', 'PRICE_S2', 'HX', 'ZONING', 'ZONED', 'CALC_ACREA', 'exlanduse', 'PROP_USE', 'BASE_SQ_FT', 'AUX_SQ_FT', 'SALEDTE_S1', 'SALEDTE_S2', 'pattern']

arcpy.management.DeleteField(in_table, fields, method='KEEP_FIELDS')

In [None]:
# CLIP parcels_{yr} BY THE URBAN SERVICE AREA BOUNDARY & SAVE TO PERMANENT FEATURE CLASS

in_features = in_table
clip_features = 'USA_Boundary_8_22_22'
out_feature_class = f'/intermediate_parcel_FCs/parcels_{yr}'  

# intermediate_FCs = r"C:\Users\cheri\Documents\geog778\ResidentialUnitTimeAnalysis\ResidentialUnitTimeAnalysis.gdb\intermediate_parcel_FCs"
# # temporarily set the environment to the intermediate parcel feature dataset & clip the features
# with arcpy.EnvManager(workspace=intermediate_FCs):
#     arcpy.analysis.Clip(in_features, clip_features, out_feature_class)

arcpy.analysis.Clip(in_features, clip_features, out_feature_class)

Step 2: Calculate Needed Fields

In [None]:
# CALCULATE NEW FIELD 'homestead'
# with value of "1" if HX is "X" and "0" if else

inTable_hmstead = f'/intermediate_parcel_FCs/parcels_{yr}'
fieldName_hmstead = 'homestead'
expression_hmstead = 'calc_hmstead_integer(!HX!)'
codeblock_hmstead = '''
def calc_hmstead_integer(HX):
    homestead = 0
    if HX == "X":
        homestead = 1
    return homestead'''

# calculate the new field
arcpy.management.CalculateField(inTable_hmstead, fieldName_hmstead, expression_hmstead, "PYTHON3", codeblock_hmstead, field_type="DOUBLE")

In [None]:
# CALCULATE 'nonResSF' 
# using exlanduse and BASE_SQ_FT+AUX_SQ_FT

inTable_nonResSF = f'/intermediate_parcel_FCs/parcels_{yr}'
fieldName_nonResSF = 'nonResSF'
expression_nonResSF = 'calc_nonResSF(!exlanduse!, !BASE_SQ_FT!, !AUX_SQ_FT!)'
codeblock_nonResSF = '''
def calc_nonResSF(exlanduse, baseSF, auxSF):
    nonResSF = 0
    if baseSF == None:
        baseSF = 0
    if auxSF == None:
        auxSF = 0
    if exlanduse in ['Retail', 'Office', 'Warehouse', 'Religious/Non-profit', 'School', 'Motel/Hospital/Clinic', 'Government']:
        nonResSF = baseSF + auxSF
    return nonResSF'''

# calculate the new field
arcpy.management.CalculateField(inTable_nonResSF, fieldName_nonResSF, expression_nonResSF, "PYTHON3", codeblock_nonResSF, field_type="DOUBLE")

In [None]:
# CALCULATE COUNT OF SALES FOR 'yr' 
# based on 'SALEDTE_S1'
        
### STARTING IN 2020, there is change to SALEDTE_S1 & SALEDTE_S2; not needed for this project at this point, so putting in a filler 0; WILL NEED TO REDO THIS FOR 2020 onward if decided to add back in.

inTable_numSales = f'/intermediate_parcel_FCs/parcels_{yr}'
fieldName_numSales = 'numSales'
expression_numSales = 'calc_numSales(!SALEDTE_S1!, !SALEDTE_S2!)'
codeblock_numSales = '''
def calc_numSales(sales_1, sales_2):
    numSales = 0
    return numSales'''
# codeblock_numSales = '''
# def calc_numSales(sales_1, sales_2):
#     numSales = 0
#     if sales_1 != None:
#         sales_1 = 0
#         if sales_1.endswith(yr):
#             numSales += 1
#     if sales_2 == None:
#         sales_2 = 0
#         if sales_2.endswith(yr):
#             numSales += 1
#     return numSales'''

# calculate the new field
arcpy.management.CalculateField(inTable_numSales, fieldName_numSales, expression_numSales, "PYTHON3", codeblock_numSales, field_type="DOUBLE")


Step 3: Place the Parcel Data Into Hexbins

""" Summarize Within (Geoprocessing) with a bin size of 224.2677 feet* with the following summary fields: (*subsequent years will use hexBin_09 polygons as inputs instead of bins)"""

In [None]:
# ************ TO DO ****************
# add variable at top to hold the summary_polygons location so it's not buried in code if it needs to change in the future
# also add variables at top for 'initialRun' = (yes or no) & 'finalRun' = (yes or no) & initialRunYr = '09' & finalRunYr = '22' (or newer) 
# then use conditional statements for those blocks that will run only with the initial or final year.  

summarized_layer = f'/intermediate_parcel_FCs/parcels_{yr}'
output_fc = f'/hexBin_FCs/hexBin_{yr}'
sum_polygons = f'/hexBin_FCs/hexBin_{prior_yr}' #need to use prior year not base year b/c result joins all previous fields not just bin geometry
sum_fields = [['resunits', 'MEAN', 'Count'], ['PYR_MARKET', 'MEAN', 'Count'], ['PYR_TAXES', 'MEAN', 'Count'], ['PRICE_S1', 'MEAN', 'Count'], \
                           ['PRICE_S2', 'MEAN', 'Count'], ['homestead', 'MEAN', 'Count'], ['nonResSF', 'MEAN', 'Count'], ['numSales', 'MEAN', 'Count']]

##################
# FIRST RUN ONLY
##################
# arcpy.gapro.SummarizeWithin(summarized_layer, output_fc, polygon_or_bin='BIN', bin_type='HEXAGON', bin_size='224.667 Feet', sum_shape='ADD_SUMMARY', shape_units='ACRES', weighted_summary_fields=sum_fields)


##################
# SUBSEQUENT RUNS
##################
arcpy.gapro.SummarizeWithin(summarized_layer, output_fc, polygon_or_bin='POLYGON', bin_type='HEXAGON', summary_polygons=sum_polygons, sum_shape='ADD_SUMMARY', shape_units='ACRES', weighted_summary_fields=sum_fields)


# ADD ['resunits_allowed', 'SUM'] TO SUMMARY FIELDS FOR PHASE II

Step 5: Calculate New Fields for the Hexbin Feature Class (Step 4 is skipped until Phase 2)

In [None]:
# ***FOR FIRST YEAR ONLY*** ASSIGN A 'bin_ID' NUMBER (can copy ObjectID) 

# inTable_makeBinID = f'/hexBin_FCs/hexBin_{yr}'
# fieldName_makeBinID = 'bin_ID'
# expression_makeBinID = '!OBJECTID!'

# # calculate the new field
# arcpy.management.CalculateField(inTable_makeBinID, fieldName_makeBinID, expression_makeBinID, "PYTHON3")

In [None]:
# CALCULATE VALUATION PER UNIT (PYR_MARKET/resunits)
# REMOVING THESE FIELDS FOR NOW - WITH SO MANY LESS THAN 1 UNIT CELLS & NOT ACCOUNTING FOR CELLS WITH NONRES SF AS WELL, THE NUMBERS ARE RIDICULOUSLY HIGH FOR MANY CELLS; REEVALUATE HOW TO MEASURE THIS LATER

# inTable_valPerUnit = f'/hexBin_FCs/hexBin_{yr}'
# fieldName_valPerUnit = f'valPerUnit_{yr}'
# expression_valPerUnit = 'calc_valPerUnit(!pSUM_PYR_MARKET!, !pSUM_resunits!)'
# codeblock_valPerUnit = '''
# def calc_valPerUnit(pyr_market, resunits):
#     if resunits > 0:
#         valPerUnit = pyr_market/resunits
#         return valPerUnit'''

# # calculate the new field
# arcpy.management.CalculateField(inTable_valPerUnit, fieldName_valPerUnit, expression_valPerUnit, "PYTHON3", codeblock_valPerUnit, field_type="DOUBLE")

In [None]:
# CALCULATE TAXES PER UNIT (PYR_TAXES/resunits)
# REMOVING THESE FIELDS FOR NOW - WITH SO MANY LESS THAN 1 UNIT CELLS & NOT ACCOUNTING FOR CELLS WITH NONRES SF AS WELL, THE NUMBERS ARE RIDICULOUSLY HIGH FOR MANY CELLS; REEVALUATE HOW TO MEASURE THIS LATER

# inTable_taxPerUnit = f'/hexBin_FCs/hexBin_{yr}'
# fieldName_taxPerUnit = f'taxPerUnit_{yr}'
# expression_taxPerUnit = 'calc_taxPerUnit(!pSUM_PYR_TAXES!, !pSUM_resunits!)'
# codeblock_taxPerUnit = '''
# def calc_taxPerUnit(pyr_taxes, resunits):
#     if resunits > 0:
#         taxPerUnit = pyr_taxes/resunits
#         return taxPerUnit'''

# # calculate the new field
# arcpy.management.CalculateField(inTable_taxPerUnit, fieldName_taxPerUnit, expression_taxPerUnit, "PYTHON3", codeblock_taxPerUnit, field_type="DOUBLE")

Step 6: Update the Analysis Field Names to Include the Year

In [None]:
# ADD '_{yr}' AS A SUFFIX TO EACH NEWLY ADDED FIELD 

inTable_prep = f'/hexBin_FCs/hexBin_{yr}'
# analysis_fields = ['pSUM_resunits', 'pSUM_PYR_MARKET', 'pSUM_PYR_TAXES', 'pSUM_PRICE_S1', 'pSUM_PRICE_S2', 'pSUM_homestead', 'pSUM_nonResSF', 'pSUM_numSales']
analysis_fields = ['pMEAN_resunits', 'pMEAN_PYR_MARKET', 'pMEAN_PYR_TAXES', 'pMEAN_PRICE_S1', 'pMEAN_PRICE_S2', 'pMEAN_homestead', 'pMEAN_nonResSF', 'pMEAN_numSales']

for field in analysis_fields:
    new_field_name = f'{field}_{yr}_NU'
    new_field_alias = f'{field}_{yr}_NU'
    arcpy.management.AlterField(inTable_prep, field, new_field_name, new_field_alias)

## IF GO BACK TO pSUM, NEED TO UPDATE CELL BELOW AS WELL

In [None]:
# REMOVE EXTRANEOUS 'pMEAN_' PREFIX FROM THE NEWLY ADDED FIELDS

# get a list of the fields
fieldList_prep = arcpy.ListFields(inTable_prep)

for field in fieldList_prep:
    if field.name.startswith('pMEAN_'):
        new_field_name = field.name.strip('pMEAN_')
        new_field_alias = field.name.strip('pMEAN_') 
        arcpy.management.AlterField(inTable_prep, field.name, new_field_name, new_field_alias)

Step 7: Calculate fields for percentage change (could be done in JS/browser but this will enhance performance)

In [None]:
# FOR EACH FIELD OF INTEREST, CALCULATE PERCENT CHANGE FROM THE PRIOR YEAR
# analysis_fields = [f'resunits'_{yr}, f'PYR_MARKET_{yr}', f'PYR_TAXES_{yr}', f'PRICE_S1_{yr}', f'PRICE_S2_{yr}', f'homestead_{yr}', f'nonResSF_{yr}', f'numSales_{yr}', f'valPerUnit_{yr}', f'taxPerUnit_{yr}']
analysis_fields = ['resunits', 'PYR_MARKET', 'PYR_TAXES', 'PRICE_S1', 'PRICE_S2', 'homestead', 'nonResSF', 'numSales']
# since valPerUnit & taxPerUnit will generate 'None' if there are no resunits, need to filter out the 'None' values with 'if' statements; tool will still run, but will lag the machine & throw a bunch of errors
inTable_annChange = f'/hexBin_FCs/hexBin_{yr}'
codeblock_annChange = '''
def calc_annChange (prior_yr_field, yr_field):
    if prior_yr_field != None and yr_field != None:
        if prior_yr_field > 0:
            ann_perc_chg = ((yr_field - prior_yr_field) / prior_yr_field) * 100
            return ann_perc_chg'''

for field in analysis_fields:
    fieldName_annChange = f'{field}_{yr}_APC' # 'APC' stands for Annual Percentage Change
    expression_annChange = f'calc_annChange(!{field}_{prior_yr}_NU!, !{field}_{yr}_NU!)'
    arcpy.management.CalculateField(inTable_annChange, fieldName_annChange, expression_annChange, 'PYTHON', codeblock_annChange, field_type="DOUBLE")

In [None]:
# FOR EACH FIELD OF INTEREST, CALCULATE PERCENT CHANGE FROM THE BASE YEAR
analysis_fields = ['resunits', 'PYR_MARKET', 'PYR_TAXES', 'PRICE_S1', 'PRICE_S2', 'homestead', 'nonResSF', 'numSales']

inTable_bypChange = f'/hexBin_FCs/hexBin_{yr}'
# since valPerUnit & taxPerUnit will generate 'None' if there are no resunits, need to filter out the 'None' values with 'if' statements; tool will still run, but will lag the machine & throw a bunch of errors
codeblock_bypChange = '''
def calc_baseYrChange (base_yr_field, yr_field):
    if base_yr_field != None and yr_field != None:
        if base_yr_field > 0:
            baseYr_perc_chg = ((yr_field - base_yr_field) / base_yr_field) * 100
            return baseYr_perc_chg'''

for field in analysis_fields:
    fieldName_bypChange = f'{field}_{yr}_CPC' # 'CPC' stands for Cumulative Percentage Change
    expression_bypChange = f'calc_baseYrChange(!{field}_{base_yr}_NU!, !{field}_{yr}_NU!)'
    arcpy.management.CalculateField(inTable_bypChange, fieldName_bypChange, expression_bypChange, 'PYTHON', codeblock_bypChange, field_type="DOUBLE")

Step 8: For each field that ends with 'APC' or 'CPC', assign percentage categories to values - this is also to help the browser render faster

In [None]:
inTable_cat = f'/hexBin_FCs/hexBin_{yr}'

#  obtain a list of the fields
# fieldList_all = [i.name for i in arcpy.ListFields(inTable_cat)] # not needed since use .name in next step but saving for reference
fieldList_all = arcpy.ListFields(inTable_cat)

# select only fields that end with 'APC' or 'CPC'
apc_fields = f'{yr}_APC'
cpc_fields = f'{yr}_CPC'
fieldList_cat = [i.name for i in fieldList_all if i.name.endswith(apc_fields) or i.name.endswith(cpc_fields)]


codeblock_cat = '''
def calc_perc_cat (perc_field):
    if perc_field != None:  
        if perc_field < -50:
            return 'neg50plus'
        elif perc_field >= -50 and perc_field < -25:
            return 'neg50toNeg25'
        elif perc_field >= -25 and perc_field < 0:
            return 'neg25toZero'
        elif perc_field >= 0 and perc_field < 25:
            return 'zeroTo25'
        elif perc_field >= 25 and perc_field < 50:
            return 'twenty5To50'
        elif perc_field >= 50 and perc_field < 75:
            return 'fiftyTo75'
        elif perc_field >= 75 and perc_field < 100:
            return 'seventy5to100'
        elif perc_field >= 100:
            return 'over100'
            '''

# loop over fields & create new field with values assigned to categories
for field in fieldList_cat:
        fieldName_cat = f'{field}cat' 
        expression_cat = f'calc_perc_cat(!{field}!)'
        arcpy.management.CalculateField(inTable_cat, fieldName_cat, expression_cat, 'PYTHON', codeblock_cat, field_type="TEXT")

Step 9: Perform Hot-Spot Analysis for Fields of Interest --- **ONLY FOR LAST RUN**  

In [None]:
# TRANSFORM 'hexBin_ALL_{yr}' SO THERE IS A ROW FOR EACH FEATURE FOR EACH YEAR

In [None]:
# LOOP TO DO ANALYSIS FOR FIELDS OF INTEREST

NOT NEEDED - Step ??: Overwrite the Prior Year 'hexBins_ALL_{final_yr}' With the New One

In [None]:
# source_FC = f'/hexBin_FCs/hexBin_{yr}'
# destination_FC = f'/hexBin_FCs/hexBin_ALL_{final_yr}'

# # delete existing version (regular overwrite doesn't work here) - the last year's hexBin_{yr} will be identical
# if arcpy.Exists(destination_FC):
#     arcpy.Delete_management(destination_FC)

# # copy new version
# arcpy.Copy_management(source_FC, destination_FC)

For Phase II - Land Use Analysis 

In [None]:
# # calculate 'resUnitsAllowed' (using zoning category dictionary & CALC_ACREA)


# # TODO: Need to add in additional zoning districts & make assumption for PUD/UPUD OR go  by future land use 

# densityDict = {
#         'AC': 45,
#         'ASN-A': 50,
#         'ASN-B': 50,
#         'ASN-C': 75,
#         'ASN-D': 100,
#         'C-1': 16,
#         'C-2': 16,
#         'CC': 150,
#         'CM': 20,
#         'CP': 16,
#         'CU-12': 12,
#         'CU-18': 18,
#         'CU-26': 26,
#         'CU-45': 45,
#         'IC': 16,
#         'LP': 0.5,
#         'MCN': 12,
#         'MH': 8,
#         'MR-1': 20,
#         'NB-1': 18,
#         'NBO': 8,
#         'OR-1': 8,
#         'OR-2': 16,
#         'OR-3': 20,
#         'RA': 1,
#         'R-1': 3.63,
#         'R-2': 4.84,
#         'R-3': 8,
#         'R-4': 10,
#         'R-5': 8,
#         'R': 0.1,
#         'RP': 6,
#         'RP-1': 3.6,
#         'RP-2': 6,
#         'RP-MH': 6,
#         'SCD': 6,
#         'UP-1': 16,
#         'UP-2': 20,
#         'UT': 50,
#         'UV': 100
#         }

# inTable_unitsAllowed = f'/intermediate_parcel_FCs/parcels_{yr}'
# fieldName_unitsAllowed = 'unitsAllowed'
# expression_unitsAllowed = 'calc_unitsAllowed(!ZONING!, !CALC_ACREA!)'
# codeblock_unitsAllowed = '''
# def calc_unitsAllowed(zoning, acreage):
#     for district in densityDict:
#         if zoning == district:
#             return acreage * densityDict[district]'''

# # calculate the new field
# arcpy.management.CalculateField(inTable_unitsAllowed, fieldName_unitsAllowed, expression_unitsAllowed, "PYTHON3", codeblock_unitsAllowed, field_type="DOUBLE")  

In [None]:
# WILL BE STEP 4 & WILL NEED TO CHANGE THE NAME OF 'hexBin_{yr}' TO 'hexBin_preJoin_{yr}' IN STEP 3

# THIS ISN'T NEEDED UNTIL PHASE 2 - HOLD FOR NOW

# Step 4: Spatial Join to exlu{yr} to Populate Fields by Largest Overlap

# Spatial Join between 'parcels_{yr}' & 'hexBin_preJoin_{yr}' based on Largest Overlap to populate the following fields:
# target_features = f'/hexBin_FCs/hexBin_preJoin_{yr}'
# join_features = f'/intermediate_parcel_FCs/parcels_{yr}'
# out_feature_class = f'/hexBin_FCs/hexBin_{yr}'

# arcpy.analysis.SpatialJoin(target_features, join_features, out_feature_class, match_option='LARGEST_OVERLAP')

# ExLandUse
# Zoning
# pattern
# YR_BLT
# SALEDTE_S1
# SALEDTE_S2


# maybe later add 'PROP_USE'

In [1]:
## STAND ALONE SCRIPT TO ADJUST THE CLASS BREAKS

import arcpy
arcpy.env.workspace = r"C:\Users\cheri\Documents\geog778\ResidentialUnitTimeAnalysis\hexBin_4_newClasses.gdb"
arcpy.env.overwriteOutput = True

inTable_cat = 'hexBin_2022GDB_newClasses'

#  obtain a list of the fields
fieldList_all = arcpy.ListFields(inTable_cat)

# select only fields that end with 'APC' or 'CPC'
apc_fields = '_APC'
cpc_fields = '_CPC'
fieldList_cat = [i.name for i in fieldList_all if i.name.endswith(apc_fields) or i.name.endswith(cpc_fields)]


codeblock_cat = '''
def calc_perc_cat (perc_field):
    if perc_field != None:  
        if perc_field < -75:
            return 'lessNeg75'
        elif perc_field >= -75 and perc_field < -50:
            return 'lessNeg50'
        elif perc_field >= -50 and perc_field < -4:
            return 'lessNeg4'
        elif perc_field >= -4 and perc_field < 4:
            return 'noChange'
        elif perc_field >= 4 and perc_field < 25:
            return 'less25'
        elif perc_field >= 25 and perc_field < 50:
            return 'less50'
        elif perc_field >= 50 and perc_field < 75:
            return 'less75'
        elif perc_field >= 75 and perc_field < 100:
            return 'less100'
        elif perc_field >= 100:
            return 'over100'
            '''

# loop over fields & create new field with values assigned to categories
for field in fieldList_cat:
        fieldName_cat = f'{field}cat' 
        expression_cat = f'calc_perc_cat(!{field}!)'
        arcpy.management.CalculateField(inTable_cat, fieldName_cat, expression_cat, 'PYTHON', codeblock_cat, field_type="TEXT")