In [1]:
import os
import re
import random
import string
import requests

import numpy as np
import pandas as pd
import geopandas as gpd

import arcpy
import arcpy.sa as sa
import arcpy.mp as mp
arcpy.CheckOutExtension("Spatial")

'CheckedOut'

#### Define a function to run zonal statistics. The variables are:  
##### zone_fc - feature class to act as the aggregator, for us that's trimmed CBG polygons  
##### zone_field - the field that acts as a unique identifier, for us that will be GEOIDFQ  
##### raster_path - the path to the raster file used for the analysis
##### output_folder - the path to the output folder for the DBF file containing statistics
##### stat_type - the type of statistics, MEAN, MAXIMUM, MINIMUM, etc  
#####
#### Coordinate system must match between polygon and rasters being analyzed!!

In [2]:
def func_zonal_stats(zone_fc, zone_field, raster_path, output_folder, stat_type):
    import os
    import arcpy
    import arcpy.sa as sa
    arcpy.CheckOutExtension("Spatial")
    arcpy.env.overwriteOutput = True

    # Sanitize raster base name for use in DBF field/table names (max 8 chars is safest for DBF)
    raster_base = os.path.splitext(os.path.basename(raster_path))[0]
    raster_base_clean = raster_base.replace(" ", "_")[:32]  # Max safe length for filename
    stat_type_clean = stat_type.lower()

    out_table_name = f"{raster_base_clean}_{stat_type_clean}.dbf"
    out_table_path = os.path.join(output_folder, out_table_name)

    # Run zonal statistics
    sa.ZonalStatisticsAsTable(
        in_zone_data=zone_fc,
        zone_field=zone_field,
        in_value_raster=raster_path,
        out_table=out_table_path,
        statistics_type=stat_type,
        ignore_nodata="DATA"
    )
    
    return out_table_path  # return path to the table so you can print for confirmation

In [3]:
def func_vector_match(zone_fc, zone_field, vector_path, output_folder, match_type):
    import os
    import arcpy
    arcpy.env.overwriteOutput = True

    # Sanitize base name
    vector_base = os.path.splitext(os.path.basename(vector_path))[0]
    vector_base_clean = vector_base.replace(" ", "_")[:32]
    match_type_clean = match_type.lower()

    # Output table name
    out_table_name = f"{vector_base_clean}_{match_type_clean}.dbf"
    out_table_path = os.path.join(output_folder, out_table_name)

    # Create feature layers
    zone_layer = "zone_layer"
    vector_layer = "vector_layer"
    arcpy.MakeFeatureLayer_management(zone_fc, zone_layer)
    arcpy.MakeFeatureLayer_management(vector_path, vector_layer)

    # Perform spatial join
    temp_join = "in_memory/temp_join"
    arcpy.analysis.SpatialJoin(
        target_features=zone_layer,
        join_features=vector_layer,
        out_feature_class=temp_join,
        join_operation="JOIN_ONE_TO_ONE",
        join_type="KEEP_ALL",
        match_option=match_type.upper()  # INTERSECT, WITHIN, CONTAINS, etc.
    )

    # Add a binary 1/0 field indicating presence/absence of match
    fieldname_short = f"{match_type_clean}"[:10]  # Ensure DBF-safe
    arcpy.management.AddField(temp_join, fieldname_short, "SHORT")

    # Determine if match occurred by checking for nulls in join fields
    arcpy.management.CalculateField(
        in_table=temp_join,
        field=fieldname_short,
        expression="0 if !Join_Count! is None or !Join_Count! == 0 else 1",
        expression_type="PYTHON3"
    )

    # Export table
    arcpy.conversion.TableToTable(temp_join, output_folder, out_table_name)

    return out_table_path

In [4]:
def func_vector_percent_overlap(zone_fc, zone_field, vector_path, output_folder, match_type):
    import os
    import arcpy

    arcpy.env.overwriteOutput = True
    arcpy.CheckOutExtension("Spatial")

    # Clean and shorten names for DBF-safe output
    vector_base = os.path.splitext(os.path.basename(vector_path))[0]
    vector_base_clean = vector_base.replace(" ", "_")[:32]
    match_type_clean = match_type.upper()

    # Prepare file names
    out_table_name = f"{vector_base_clean}_pct.dbf"
    out_table_path = os.path.join(output_folder, out_table_name)

    # Intermediate outputs
    intersect_fc = os.path.join(output_folder, f"int_{vector_base_clean[:20]}")
    summary_table = os.path.join(output_folder, f"sum_{vector_base_clean[:20]}.dbf")

    # Step 1: Intersect zones with input vector
    arcpy.analysis.Intersect([zone_fc, vector_path], intersect_fc, "ONLY_FID")

    # Step 2: Add area to intersected features
    arcpy.management.AddField(intersect_fc, "Intersect_Area", "DOUBLE")
    arcpy.management.CalculateGeometryAttributes(intersect_fc, [["Intersect_Area", "AREA"]])

    # Step 3: Add area to zones if not already present
    zone_area_field = "Zone_Area"
    zone_fields = [f.name for f in arcpy.ListFields(zone_fc)]
    if zone_area_field not in zone_fields:
        arcpy.management.AddField(zone_fc, zone_area_field, "DOUBLE")
        arcpy.management.CalculateGeometryAttributes(zone_fc, [[zone_area_field, "AREA"]])

    # Step 4: Summarize intersected areas by zone
    arcpy.analysis.Statistics(
        in_table=intersect_fc,
        out_table=summary_table,
        statistics_fields=[["Intersect_Area", "SUM"]],
        case_field=zone_field
    )

    # Step 5: Join summary to zone features
    temp_join = os.path.join(output_folder, f"temp_{vector_base_clean[:20]}.dbf")
    arcpy.management.JoinField(zone_fc, zone_field, summary_table, zone_field, ["SUM_Intersect_Area"])
    arcpy.conversion.TableToTable(zone_fc, output_folder, os.path.basename(temp_join))

    # Step 6: Add and calculate percent field
    arcpy.management.AddField(temp_join, "PCT", "FLOAT")
    arcpy.management.CalculateField(
        temp_join,
        "PCT",
        expression="(!SUM_Intersect_Area! / !Zone_Area!) * 100 if !Zone_Area! else 0",
        expression_type="PYTHON3"
    )

    # Step 7: Export final table
    arcpy.conversion.TableToTable(temp_join, output_folder, out_table_name)

    return out_table_path

#### Define a function to combine the multiple DBF files into one CSV. The field name in the CSV is taken from the DBF name, which originally was taken from the raster:

In [5]:
def func_combine_tables(output_folder_path):
    key_field = "GEOIDFQ"
    output_csv = "combined_zonal_stats.csv"
    dbf_files = [f for f in os.listdir(output_folder_path) if f.lower().endswith(".dbf")]
    merged_df = None

    for dbf in dbf_files:
        dbf_path = os.path.join(output_folder_path, dbf)
        
        # Get all non-geometry, non-OID fields
        fields = [f.name for f in arcpy.ListFields(dbf_path) if f.type not in ("Geometry", "OID")]

        if key_field not in fields or len(fields) < 2:
            continue  # Skip if missing key or only one useful field

        stat_field = fields[-1]  # Use rightmost field
        table = arcpy.da.TableToNumPyArray(dbf_path, [key_field, stat_field])
        df = pd.DataFrame(table)

        # Rename stat field to the DBF base name
        stat_name = os.path.splitext(dbf)[0]
        df = df.rename(columns={stat_field: stat_name})

        if merged_df is None:
            merged_df = df
        else:
            merged_df = pd.merge(merged_df, df, on=key_field, how="outer")

    # Save combined CSV
    if merged_df is not None:
        output_csv_path = os.path.join(output_folder_path, output_csv)
        merged_df.to_csv(output_csv_path, index=False)
        return output_csv_path
    else:
        raise ValueError("No valid DBFs found or no data to merge.")

#### Get the path of this notebook and use it to generate path for input shapefile:

In [6]:
current_dir = os.getcwd()
shapefile_cbg = r"input\cbg_kontur.shp"
shapefile_cbg_path = os.path.join(current_dir, shapefile_cbg)
output_folder_path = os.path.join(current_dir, "output")


print(f"Location of this notebook: {current_dir}")
print(f"Location of CBG SHP: {shapefile_cbg_path}")
print(f"Location for output: {output_folder_path}")

Location of this notebook: C:\GITHUB\CCSVI\Scripts\Spatial_Analysis
Location of CBG SHP: C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\cbg_kontur.shp
Location for output: C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\output


#### Define output folder and create it if it doesn't exist

In [7]:
output_folder = os.path.join(current_dir, "output")
os.makedirs(output_folder, exist_ok=True)

#### The following lines run the zonal statistics function for each raster independently.  
#### The outputs are all generated as DBF files with names originating from the rasters.  
#### As many of these lines can be added as necessary, just change the inputs for the appropriate raster and statistic:

In [8]:
func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\environmental\staterf_inann.tif", output_folder_path, "MEAN")


'C:\\GITHUB\\CCSVI\\Scripts\\Spatial_Analysis\\output\\staterf_inann_mean.dbf'

In [9]:
func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\environmental\igtn_prob_test.tif", output_folder_path, "MAXIMUM")

'C:\\GITHUB\\CCSVI\\Scripts\\Spatial_Analysis\\output\\igtn_prob_test_maximum.dbf'

In [10]:
func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\environmental\n10_landslide_susc.tif", output_folder_path, "MAXIMUM")

'C:\\GITHUB\\CCSVI\\Scripts\\Spatial_Analysis\\output\\n10_landslide_susc_maximum.dbf'

In [11]:
func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\environmental\cat4_mom_slosh_hightide.tif", output_folder_path, "MAXIMUM")

'C:\\GITHUB\\CCSVI\\Scripts\\Spatial_Analysis\\output\\cat4_mom_slosh_hightide_maximum.dbf'

In [12]:
func_vector_match(shapefile_cbg_path, "GEOIDFQ", r"C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\environmental\slrxa_3pt2ft.shp", output_folder_path, "INTERSECT")

In [13]:
#func_vector_percent_overlap(shapefile_cbg_path, "GEOIDFQ", r"C:\GITHUB\CCSVI\Scripts\Spatial_Analysis\input\environmental\slrxa_3pt2ft.shp", output_folder_path, "PERCENT")

ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000313: The length of Field Name must not be larger than 10
Failed to execute (AddField).


#### This takes the previously generated DBF files and merges them all to one CSV file with fieldnames from the DBFs:

In [None]:
func_combine_tables(output_folder_path)