# Create classified Overture building footprints
3/14/2025

In [None]:
import arcpy
from arcpy.sa import *
import pandas as pd
import numpy as np
import os
import urllib.request
import zipfile
import time

from tqdm import tqdm
import tempfile
from datetime import datetime

from arcgis.features import GeoAccessor, GeoSeriesAccessor

data_path = "your_path"
scratch_gdb = "your_scratch_path"
results_path = "your_results_path"

arcpy.env.workspace = data_path
arcpy.env.overwriteOutput = True
arcpy.env.outputCoordinateSystem = None
arcpy.env.parallelProcessingFactor = "100%"

## Set up

In [None]:
input_nsi_gdb = os.path.join(data_path, "NSI/nsi_2022.gdb")
input_mbd_gdb = os.path.join(data_path, "microsoft_buildings/microsoft_buildings.gdb")
input_usa_struct_gdb = os.path.join(data_path, "USA_structures/usa_structures.gdb")
input_overture_gdb = os.path.join(data_path, "overture/overture_2024_11_13.gdb")

# Dictionary mapping Census state FIPS codes to state abbreviations
fips_to_abbreviation = {
    '01': 'AL',  # Alabama
    # '02': 'AK',  # Alaska
    '04': 'AZ',  # Arizona: No state layer available
    '05': 'AR',  # Arkansas
    '06': 'CA',  # California
    '08': 'CO',  # Colorado: No state layer available
    '09': 'CT',  # Connecticut
    '10': 'DE',  # Delaware
    '11': 'DC',  # District of Columbia
    '12': 'FL',  # Florida: No state layer available
    '13': 'GA',  # Georgia
    # '15': 'HI',  # Hawaii
    '16': 'ID',  # Idaho
    '17': 'IL',  # Illinois: No state layer available
    '18': 'IN',  # Indiana
    '19': 'IA',  # Iowa
    '20': 'KS',  # Kansas
    '21': 'KY',  # Kentucky
    '22': 'LA',  # Louisiana
    '23': 'ME',  # Maine
    '24': 'MD',  # Maryland
    '25': 'MA',  # Massachusetts
    '26': 'MI',  # Michigan
    '27': 'MN',  # Minnesota
    '28': 'MS',  # Mississippi: No state layer available
    '29': 'MO',  # Missouri
    '30': 'MT',  # Montana
    '31': 'NE',  # Nebraska
    '32': 'NV',  # Nevada
    '33': 'NH',  # New Hampshire
    '34': 'NJ',  # New Jersey
    '35': 'NM',  # New Mexico: No state layer available
    '36': 'NY',  # New York
    '37': 'NC',  # North Carolina
    '38': 'ND',  # North Dakota
    '39': 'OH',  # Ohio
    '40': 'OK',  # Oklahoma
    '41': 'OR',  # Oregon
    '42': 'PA',  # Pennsylvania
    '44': 'RI',  # Rhode Island
    '45': 'SC',  # South Carolina
    '46': 'SD',  # South Dakota
    '47': 'TN',  # Tennessee
    '48': 'TX',  # Texas: No state layer available
    '49': 'UT',  # Utah
    '50': 'VT',  # Vermont
    '51': 'VA',  # Virginia
    '53': 'WA',  # Washington
    '54': 'WV',  # West Virginia
    '55': 'WI',  # Wisconsin
    '56': 'WY'   # Wyoming
}

stfips = list(fips_to_abbreviation.values())[0:50]
stfips

## Run classification workflow for contiguous US states

In [None]:
for st in stfips:
    print(f"Working on {st}")
    start_time = time.time()

    # load input data
    nsi_layer = arcpy.management.MakeFeatureLayer(os.path.join(input_nsi_gdb, f"{st}_nsi_2022"), "nsi_layer")
    mbd_layer = arcpy.management.MakeFeatureLayer(os.path.join(input_mbd_gdb, f"{st}_mbd"), "mbd_layer")
    usa_struct_layer = arcpy.management.MakeFeatureLayer(os.path.join(input_usa_struct_gdb, f"{st}_Structures"), "usa_struct_layer")
    overture_layer = arcpy.management.MakeFeatureLayer(os.path.join(input_overture_gdb, f"{st}_buildings"), "overture_layer")
        

    print("Selecting NSI that does not overlap Overture buildings")
    arcpy.management.SelectLayerByLocation(
        in_layer=nsi_layer,
        overlap_type="INTERSECT",
        select_features=overture_layer,
        search_distance=None,
        selection_type="NEW_SELECTION",
        invert_spatial_relationship="INVERT"
    )

    nsi_selected_layer = os.path.join(scratch_gdb, "nsi_no_overture_intersect")
    
    arcpy.management.CopyFeatures(nsi_layer, nsi_selected_layer)

    # Create dynamic field mappings for the spatial joins later
    mbd_fieldmappings_string = f'release "release" true true false 8 Double 0 0,Range,#,{input_mbd_gdb}/{st}_mbd,release,-1,-1;capture_dates_range "capture_dates_range" true true false 2000000000 Text 0 0,Join,",",{input_mbd_gdb}/{st}_mbd,capture_dates_range,0,1999999999;Shape_Length "Shape_Length" false true true 8 Double 0 0,First,#,{st}_mbd,Shape_Length,-1,-1,{input_mbd_gdb}/{st}_mbd,Shape_Length,-1,-1;Shape_Area "Shape_Area" false true true 8 Double 0 0,First,#,{input_mbd_gdb}/{st}_mbd,Shape_Area,-1,-1;fd_id "fd_id" true true false 2000 Text 0 0,Join,",",{st}_nsi_2022,fd_id,-1,-1;bid "bid" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,bid,0,2147483646;cbfips "cbfips" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,cbfips,0,2147483646;st_damcat "st_damcat" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,st_damcat,0,2147483646;occtype "occtype" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,occtype,0,2147483646;num_story "num_story" true true false 8 Double 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,num_story,-1,-1;sqft "sqft" true true false 20000 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,sqft,-1,-1;ftprntid "ftprntid" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,ftprntid,0,2147483646;ftprntsrc "ftprntsrc" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,ftprntsrc,0,2147483646;source "source" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,source,0,2147483646'
    mbd_fieldmappings = arcpy.FieldMappings()
    mbd_fieldmappings.loadFromString(mbd_fieldmappings_string)
    
    usa_fieldmappings_string = f'usa_id "usa_id" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,BUILD_ID,-1,-1;usa_height "usa_height" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,HEIGHT,-1,-1;usa_image_date "usa_image_date" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,IMAGE_DATE,-1,-1;usa_occ_cls "usa_occ_cls" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,OCC_CLS,0,19;usa_prim_occ "usa_prim_occ" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,PRIM_OCC,0,34;usa_prop_addr "usa_prop_addr" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,PROP_ADDR,0,79;usa_prop_city "usa_prop_city" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,PROP_CITY,0,49;usa_prop_st "usa_prop_st" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,PROP_ST,0,49;usa_prop_zip "usa_prop_zip" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,PROP_ZIP,0,49;usa_sec_occ "usa_sec_occ" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,SEC_OCC,0,12;usa_source "usa_source" true true false 255 Text 0 0,Join,",",{input_usa_struct_gdb}/{st}_Structures,SOURCE,0,49;fd_id "fd_id" true true false 2000 Text 0 0,Join,",",{st}_nsi_2022,fd_id,-1,-1;bid "bid" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,bid,0,2147483646;cbfips "cbfips" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,cbfips,0,2147483646;st_damcat "st_damcat" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,st_damcat,0,2147483646;occtype "occtype" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,occtype,0,2147483646;num_story "num_story" true true false 8 Double 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,num_story,-1,-1;sqft "sqft" true true false 20000 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,sqft,-1,-1;ftprntid "ftprntid" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,ftprntid,0,2147483646;ftprntsrc "ftprntsrc" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,ftprntsrc,0,2147483646;source "source" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,source,0,2147483646'
    usa_fieldmappings = arcpy.FieldMappings()
    usa_fieldmappings.loadFromString(usa_fieldmappings_string)
    
    overture_fieldmappings_string = f'overture_id "overture_id" true true false 255 Text 0 0,Join,",",{input_overture_gdb}/{st}_buildings,id,0,31;overture_source "overture_source" true true false 255 Text 0 0,Join,",",{input_overture_gdb}/{st}_buildings,source,0,99;overture_class "overture_class" true true false 255 Text 0 0,Join,",",{input_overture_gdb}/{st}_buildings,class,0,99;overture_height "overture_height" true true false 255 Text 0 0,Join,",",{input_overture_gdb}/{st}_buildings,height,-1,-1;overture_num_floors "overture_num_floors" true true false 255 Long 0 0,First,#,{input_overture_gdb}/{st}_buildings,num_floors,-1,-1;overture_subtype "overture_subtype" true true false 255 Text 0 0,Join,",",{input_overture_gdb}/{st}_buildings,subtype,0,99;overture_update_time "overture_update_time" true true false 255 Text 0 0,Join,",",{input_overture_gdb}/{st}_buildings,update_time,0,99;fd_id "fd_id" true true false 2000 Text 0 0,Join,",",{st}_nsi_2022,fd_id,-1,-1;bid "bid" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,bid,0,2147483646;cbfips "cbfips" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,cbfips,0,2147483646;st_damcat "st_damcat" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,st_damcat,0,2147483646;occtype "occtype" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,occtype,0,2147483646;num_story "num_story" true true false 8 Double 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,num_story,-1,-1;sqft "sqft" true true false 20000 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,sqft,-1,-1;ftprntid "ftprntid" true true false 2147483647 Text 0 0,First,#,{input_nsi_gdb}/{st}_nsi_2022,ftprntid,0,2147483646;ftprntsrc "ftprntsrc" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,ftprntsrc,0,2147483646;source "source" true true false 2147483647 Text 0 0,Join,",",{input_nsi_gdb}/{st}_nsi_2022,source,0,2147483646'
    overture_fieldmappings = arcpy.FieldMappings()
    overture_fieldmappings.loadFromString(overture_fieldmappings_string)

    # Select 'Bing' footprint sources, and find the MBD footprints that
    print("Selecting NSI points that align with Microsoft Building footprints")
    arcpy.management.SelectLayerByAttribute(
        in_layer_or_view=nsi_selected_layer,
        selection_type="NEW_SELECTION",
        where_clause="ftprntsrc = 'Bing' And source <> 'X'",
        invert_where_clause=None
    )
    
    arcpy.management.SelectLayerByLocation(
        in_layer=mbd_layer,
        overlap_type="HAVE_THEIR_CENTER_IN",
        select_features=nsi_selected_layer,
        search_distance="0.5 Meters",
        selection_type="NEW_SELECTION",
        invert_spatial_relationship="NOT_INVERT"
    )

    arcpy.management.SelectLayerByLocation(
        in_layer=mbd_layer,
        overlap_type="INTERSECT",
        select_features=overture_layer,
        search_distance=None,
        selection_type="REMOVE_FROM_SELECTION",
        invert_spatial_relationship="NOT_INVERT"
    )

    arcpy.management.MakeFeatureLayer(mbd_layer, "nsi_mbd_ftprnt", where_clause="")  
    arcpy.conversion.ExportFeatures("nsi_mbd_ftprnt", os.path.join(scratch_gdb, "nsi_mbd_ftprnt"))

    arcpy.management.SelectLayerByAttribute(
        in_layer_or_view=nsi_selected_layer,
        selection_type="NEW_SELECTION",
        where_clause="ftprntsrc IN ('NGA', 'ORNL') And source <> 'X'", #
        invert_where_clause=None
    )

    
    print("Selecting NSI points that align with USA Structures")
    arcpy.management.SelectLayerByLocation(
        in_layer=usa_struct_layer,
        overlap_type="HAVE_THEIR_CENTER_IN",
        select_features=nsi_selected_layer,
        search_distance="0.5 Meters",
        selection_type="NEW_SELECTION",
        invert_spatial_relationship="NOT_INVERT"
    )

    arcpy.management.SelectLayerByLocation(
        in_layer=usa_struct_layer,
        overlap_type="INTERSECT",
        select_features=overture_layer,
        search_distance=None,
        selection_type="REMOVE_FROM_SELECTION",
        invert_spatial_relationship="NOT_INVERT"
    )

    arcpy.management.MakeFeatureLayer(usa_struct_layer, "nsi_usa_struc_ftprnt", where_clause="")  
    arcpy.conversion.ExportFeatures("nsi_usa_struc_ftprnt", os.path.join(scratch_gdb, "nsi_usa_struc_ftprnt"))

    print("Merging all buildings")
    arcpy.management.Merge(
        inputs="overture_layer;nsi_mbd_ftprnt;nsi_usa_struc_ftprnt",
        output=os.path.join(scratch_gdb, "overture_layer_Merge"),
        field_mappings='id "id" true true false 32 Text 0 0,First,#,overture_layer,id,0,31;height "height" true true false 4 Float 0 0,First,#,overture_layer,height,-1,-1;source "source" true true false 2147483647 Text 0 0,First,#,overture_layer,source,0,99;update_time "update_time" true true false 100 Text 0 0,First,#,overture_layer,update_time,0,99;subtype "subtype" true true false 100 Text 0 0,First,#,overture_layer,subtype,0,99;class "class" true true false 100 Text 0 0,First,#,overture_layer,class,0,99;level "level" true true false 4 Long 0 0,First,#,overture_layer,level,-1,-1;num_floors "num_floors" true true false 4 Long 0 0,First,#,overture_layer,num_floors,-1,-1;release "release" true true false 8 Double 0 0,First,#,nsi_mbd_ftprnt,release,-1,-1;capture_dates_range "capture_dates_range" true true false 2000000000 Text 0 0,First,#,nsi_mbd_ftprnt,capture_dates_range,0,1999999999;PROD_DATE "PROD_DATE" true true false 8 Date 0 0,First,#,nsi_usa_struc_ftprnt,PROD_DATE,-1,-1;IMAGE_DATE "IMAGE_DATE" true true false 8 Date 0 0,First,#,nsi_usa_struc_ftprnt,IMAGE_DATE,-1,-1;Shape_Length "Shape_Length" false true true 8 Double 0 0,First,#,overture_layer,Shape_Length,-1,-1,nsi_mbd_ftprnt,Shape_Length,-1,-1,nsi_usa_struc_ftprnt,Shape_Length,-1,-1;Shape_Area "Shape_Area" false true true 8 Double 0 0,First,#,overture_layer,Shape_Area,-1,-1,nsi_mbd_ftprnt,Shape_Area,-1,-1,nsi_usa_struc_ftprnt,Shape_Area,-1,-1',
        add_source="NO_SOURCE_INFO",
        field_match_mode="MANUAL_EDIT"
    )

    overture_merged = arcpy.management.MakeFeatureLayer(os.path.join(scratch_gdb, "overture_layer_Merge"), "overture_merged")

    arcpy.management.SelectLayerByAttribute(
        in_layer_or_view=usa_struct_layer,
        selection_type="CLEAR_SELECTION",
        where_clause="",
        invert_where_clause=None
    )
    
    arcpy.management.SelectLayerByAttribute(
        in_layer_or_view=nsi_layer,
        selection_type="CLEAR_SELECTION",
        where_clause="",
        invert_where_clause=None
    )

    print("Joining overture and USA structures layers")
    arcpy.gapro.JoinFeatures(
        target_layer=overture_merged,
        join_layer=usa_struct_layer,
        output=os.path.join(scratch_gdb, "buildings_class"),
        join_operation="JOIN_ONE_TO_ONE",
        spatial_relationship="INTERSECTS",
        spatial_near_distance=None,
        temporal_relationship="",
        temporal_near_distance=None,
        attribute_relationship=None,
        summary_fields="OCC_CLS MAX;PRIM_OCC MAX;IMAGE_DATE MAX;HEIGHT MAX;UUID MAX",
        join_condition="",
        keep_all_target_features="KEEP_ALL",
        include_distance=None,
        distance_unit=""
    )

    print("Joining overture and nsi layers")
    arcpy.gapro.JoinFeatures(
        target_layer=os.path.join(scratch_gdb, "buildings_class"),
        join_layer=nsi_layer,
        output=os.path.join(scratch_gdb, "buildings_class_nsi"),
        join_operation="JOIN_ONE_TO_ONE",
        spatial_relationship="NEAR",
        spatial_near_distance="10 Meters",
        temporal_relationship="",
        temporal_near_distance=None,
        attribute_relationship=None,
        summary_fields="st_damcat MAX;bid MAX",
        join_condition=None, 
        keep_all_target_features="KEEP_ALL",
        include_distance=None,
        distance_unit=""
    )

    overture_classified = arcpy.management.MakeFeatureLayer(os.path.join(scratch_gdb, "buildings_class_nsi"), "overture_classified")
    
    # Create final layer with res_or_not
    print("Calculating Residential or Not")
    arcpy.gapro.CalculateField(
        input_layer=overture_classified,
        output=os.path.join(scratch_gdb, "buildings_class_w_stacks"),
        field_to_calculate="NEW_FIELD",
        field_name="res_or_not",
        existing_field=None,
        field_type="INTEGER",
        expression='IIf($feature.subtype != null, IIf($feature.subtype == "residential", IIf($feature.class == "garage" || $feature.class == "garages" || $feature.class == "parking", 0, IIf($feature.MAX_OCC_CLS != null || $feature.MAX_st_damcat != null, 1, IIf($feature.update_time < "2021-01-01", 1, -1))),0), IIf($feature.MAX_OCC_CLS != null && $feature.MAX_OCC_CLS != "Unclassified", IIf($feature.MAX_OCC_CLS == "Residential" && $feature.MAX_PRIM_OCC != "Temporary Lodging", 1, 0), IIf($feature.MAX_st_damcat != null, IIf($feature.MAX_st_damcat == "RES", 1, 0), -1)))',
        track_aware=None,
        track_fields=None,
        time_boundary_split=None,
        time_boundary_reference=None
    )

    overture_classified = arcpy.management.MakeFeatureLayer(os.path.join(scratch_gdb, "buildings_class_w_stacks"), "overture_classified")
    
    print("Unioning buildings")
    with arcpy.EnvManager(parallelProcessingFactor="100%"):
        arcpy.analysis.Union(
            in_features=overture_classified,
            out_feature_class=os.path.join(scratch_gdb, "buildings_class_w_stacks_union"),
            join_attributes="ALL",
            cluster_tolerance=None,
            gaps="GAPS"
        )

    overture_classified_union = arcpy.management.MakeFeatureLayer(os.path.join(scratch_gdb, "buildings_class_w_stacks_union"), "overture_classified")
    
    print("Finding identical geometries in buildings - these are overlaps or duplicates")
    arcpy.management.FindIdentical(
        in_dataset=overture_classified_union,
        out_dataset=os.path.join(scratch_gdb, "overture_class_FindIdentical"),
        fields="SHAPE",
        xy_tolerance=None,
        z_tolerance=0,
        output_record_option="ONLY_DUPLICATES"
    )
    
    # Input layer and identical table
    input_layer =overture_classified_union  
    identical_table = os.path.join(scratch_gdb, "overture_class_FindIdentical")  
    
    # Create a dictionary to group features by FEAT_SEQ
    identical_dict = {}
    with arcpy.da.SearchCursor(identical_table, ["FEAT_SEQ", "IN_FID"]) as cursor:
        for feat_seq, in_fid in cursor:
            if feat_seq not in identical_dict:
                identical_dict[feat_seq] = []
            identical_dict[feat_seq].append(in_fid)
    
    # List to store OBJECTID_1 values of features to delete
    features_to_delete = []
    
    print("Deleting identicals, prioritizing residential and more recent polygons")
    # Process each FEAT_SEQ group
    with arcpy.da.SearchCursor(input_layer, ["OBJECTID_1", "res_or_not", "update_time"]) as search_cursor:
        features_data = {row[0]: row for row in search_cursor}  # Store all rows in a dictionary for quick lookup
    
    for feat_seq, fids in identical_dict.items():
        # Collect all features in the current FEAT_SEQ group
        features = [features_data[fid] for fid in fids if fid in features_data]
    
        # Sort features by prioritization rules
        # 1. Prioritize by res_or_not (descending)
        # 2. Then by most recent update_time
        # 3. If equal, pick the first in sorted order
        features.sort(key=lambda x: (-x[1], x[2] or datetime.min))
    
        # Mark features to delete except the highest-priority one
        features_to_delete.extend([row[0] for row in features[1:]])
    
    # Delete marked features in bulk using a SQL query
    if features_to_delete:
        # Convert list of IDs to a comma-separated string
        delete_ids = ", ".join(map(str, features_to_delete))
        
        # Build a SQL query to select features for deletion
        sql_query = f"OBJECTID_1 IN ({delete_ids})"
        
        # Use MakeFeatureLayer and DeleteFeatures for bulk deletion
        arcpy.MakeFeatureLayer_management(input_layer, "temp_layer", sql_query)
        arcpy.DeleteFeatures_management("temp_layer")
    
        print(f"Deleted {len(features_to_delete)} duplicate features.")

    print("Repairing geometries")
    arcpy.management.RepairGeometry(
        in_features=overture_classified_union,
        delete_null="DELETE_NULL",
        validation_method="OGC"
    )
    
    buildings_layer = overture_classified_union   # Full dataset of buildings
    floodplain_layer = f"{data_path}/floodplain/nfhl/sfha_dec_12_2024.gdb/{st}_sfha"  
    est_floodplain_layer = os.path.join(data_path, "estimated_floodplain/estimated_floodplain_CONUS.gdb/estimated_floodplain_CONUS")
    
    sfha_output = os.path.join(scratch_gdb, "sfha_scratch")  # Temporary output
    est_sfha_output = os.path.join(scratch_gdb, "est_sfha_scratch") # Temporary output
    
    census_blocks = os.path.join(data_path, "census/blocks/tlgdb_2024_a_us_block.gdb/Block20_proj")
    state_cb = os.path.join(data_path,"census/blocks/blocks_by_state.gdb",st+"_blocks")
    
    state_bounds = os.path.join(scratch_gdb, "state_bounds")
    all_state_bounds = os.path.join(data_path, "census/state_boundaries/tl_2024_us_state_proj.shp")
    
    print("Selecting the state")
    arcpy.analysis.Select(all_state_bounds, state_bounds, f"STUSPS = '{st}'")
    
    print("Clipping census blocks to the state bounds")
    arcpy.analysis.PairwiseClip(
        census_blocks, 
        state_bounds,
        state_cb
    )
    
    print("Clipping the buildings with the SFHA")
    arcpy.gapro.ClipLayer(buildings_layer, floodplain_layer, sfha_output)
    
    #  Extract OBJECTID_1 values from the clip result for SFHA
    print("Finding objectid values of clip")
    objectid_list = []
    with arcpy.da.SearchCursor(sfha_output, ["OBJECTID_1"]) as cursor:
        objectid_list = [row[0] for row in cursor]
    
    # Step 3: Update the "within_sfha" field in the overall buildings dataset
    if objectid_list:
        # Ensure the field "within_sfha" exists; add it if not
        field_names = [f.name for f in arcpy.ListFields(buildings_layer)]
        if "within_sfha" not in field_names:
            print("Adding field 'within_sfha'")
            arcpy.AddField_management(buildings_layer, "within_sfha", "SHORT")
    
        # Build a SQL query to select buildings within the intersection
        objectid_query = ", ".join(map(str, objectid_list))
        sql_query = f"OBJECTID_1 IN ({objectid_query})"
    
        # Use an UpdateCursor to set the "within_sfha" field to 1
        print("Updating buildings layers")
        with arcpy.da.UpdateCursor(buildings_layer, ["OBJECTID_1", "within_sfha"], sql_query) as cursor:
            for row in cursor:
                row[1] = 1
                cursor.updateRow(row)
    
        print(f"Updated {len(objectid_list)} buildings with 'within_sfha = 1'.")
    else:
        print("No buildings found within the floodplain.")
    
    
    print("Clipping est sfha to state")
    state_est_sfha = os.path.join(scratch_gdb, "state_est_sfha")
    arcpy.analysis.PairwiseClip(est_floodplain_layer, state_bounds, state_est_sfha)
    
    print("Clipping the buildings to state est sfha")
    arcpy.gapro.ClipLayer(buildings_layer, state_est_sfha, est_sfha_output)
    
    #  Extract OBJECTID_1 values from the clip result for SFHA
    print("Finding objectid values of clip")
    objectid_list = []
    with arcpy.da.SearchCursor(est_sfha_output, ["OBJECTID_1"]) as cursor:
        objectid_list = [row[0] for row in cursor]
    
    # Step 3: Update the "within_sfha" field in the overall buildings dataset
    if objectid_list:
        # Ensure the field "within_sfha" exists; add it if not
        field_names = [f.name for f in arcpy.ListFields(buildings_layer)]
        if "within_est_sfha" not in field_names:
            print("Adding field 'within_est_sfha'")
            arcpy.AddField_management(buildings_layer, "within_est_sfha", "SHORT")
    
        # Build a SQL query to select buildings within the intersection
        objectid_query = ", ".join(map(str, objectid_list))
        sql_query = f"OBJECTID_1 IN ({objectid_query})"
    
        # Use an UpdateCursor to set the "within_sfha" field to 1
        print("Updating buildings layers")
        with arcpy.da.UpdateCursor(buildings_layer, ["OBJECTID_1", "within_est_sfha"], sql_query) as cursor:
            for row in cursor:
                row[1] = 1
                cursor.updateRow(row)
    
        print(f"Updated {len(objectid_list)} buildings with 'within_est_sfha = 1'.")
    else:
        print("No buildings found within the floodplain.")

    print("Spatial joining census blocks with buildings")
    arcpy.analysis.SpatialJoin(
        target_features=overture_classified_union,
        join_features=state_cb,
        out_feature_class=os.path.join(data_path, f"overture/overture_classified.gdb/{st}_buildings_class"),
        join_operation="JOIN_ONE_TO_ONE",
        join_type="KEEP_ALL",
        field_mapping=f'OBJECTID "OBJECTID" true true false 4 Long 0 0,First,#,buildings_class_w_stacks_union,FID_buildings_class_w_stacks,-1,-1;id "id" true true false 32 Text 0 0,First,#,buildings_class_w_stacks_union,id,0,31;height "height" true true false 4 Float 0 0,First,#,buildings_class_w_stacks_union,height,-1,-1;source "source" true true false 100 Text 0 0,First,#,buildings_class_w_stacks_union,source,0,99;update_time "update_time" true true false 100 Text 0 0,First,#,buildings_class_w_stacks_union,update_time,0,99;subtype "subtype" true true false 100 Text 0 0,First,#,buildings_class_w_stacks_union,subtype,0,99;class "class" true true false 100 Text 0 0,First,#,buildings_class_w_stacks_union,class,0,99;level "level" true true false 4 Long 0 0,First,#,buildings_class_w_stacks_union,level,-1,-1;num_floors "num_floors" true true false 4 Long 0 0,First,#,buildings_class_w_stacks_union,num_floors,-1,-1;MAX_OCC_CLS "MAX_OCC_CLS" true true false 10485758 Text 0 0,First,#,buildings_class_w_stacks_union,MAX_OCC_CLS,0,10485757;MAX_PRIM_OCC "MAX_PRIM_OCC" true true false 10485758 Text 0 0,First,#,buildings_class_w_stacks_union,MAX_PRIM_OCC,0,10485757;MAX_UUID "MAX_UUID" true true false 10485758 Text 0 0,First,#,buildings_class_w_stacks_union,MAX_UUID,0,10485757;MAX_IMAGE_DATE "MAX_IMAGE_DATE" true true false 8 Date 0 0,First,#,buildings_class_w_stacks_union,MAX_IMAGE_DATE,-1,-1;MAX_HEIGHT "MAX_HEIGHT" true true false 4 Float 0 0,First,#,buildings_class_w_stacks_union,MAX_HEIGHT,-1,-1;MAX_st_damcat "MAX_st_damcat" true true false 10485758 Text 0 0,First,#,buildings_class_w_stacks_union,MAX_st_damcat,0,10485757;MAX_bid "MAX_bid" true true false 10485758 Text 0 0,First,#,buildings_class_w_stacks_union,MAX_bid,0,10485757;res_or_not "res_or_not" true true false 4 Long 0 0,First,#,buildings_class_w_stacks_union,res_or_not,-1,-1;within_sfha "within_sfha" true true false 2 Short 0 0,First,#,buildings_class_w_stacks_union,within_sfha,-1,-1;within_est_sfha "within_est_sfha" true true false 2 Short 0 0,First,#,buildings_class_w_stacks_union,within_est_sfha,-1,-1;GEOID20 "GEOID20" true true false 15 Text 0 0,First,#,{state_cb},GEOID20,0,14;SHAPE_Length "SHAPE_Length" false true true 8 Double 0 0,First,#,buildings_class_w_stacks_union,SHAPE_Length,-1,-1;SHAPE_Area "SHAPE_Area" false true true 8 Double 0 0,First,#,buildings_class_w_stacks_union,SHAPE_Area,-1,-1',
        match_option="HAVE_THEIR_CENTER_IN",
        search_radius=None,
        distance_field_name="",
        match_fields=None
    )

    # List of fields to delete
    fields_to_delete = ["Join_Count", "TARGET_FID"]
    
    # Delete extra fields
    print("Deleting extra fields")
    arcpy.management.DeleteField(os.path.join(data_path, f"overture/overture_classified.gdb/{st}_buildings_class"), fields_to_delete)

    arcpy.management.Delete(os.path.join(scratch_gdb, "buildings_class"))
    arcpy.management.Delete(os.path.join(scratch_gdb, "buildings_class_nsi"))
    arcpy.management.Delete(overture_merged)
    arcpy.management.Delete(overture_classified_union)
    arcpy.management.Delete(sfha_output)
    arcpy.management.Delete(est_sfha_output)
    arcpy.management.Delete(identical_table)
    arcpy.management.Delete(overture_classified)
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(round(elapsed_time/60, 2), "minutes")

## Run final operation to tag buildings if they are in a FEMA study area

In [None]:
nfhl_footprint = os.path.join(data_path, "floodplain/nfhl/nfhl_footprints.gdb/nfhl_footprint_simple")
state_nfhl_footprint = os.path.join(scratch_gdb, "state_nfhl_footprint")

for st in stfips:
    print(f"Working on {st}")
    start_time = time.time()

    state_bounds = os.path.join(scratch_gdb, "state_bounds")
    all_state_bounds = os.path.join(data_path, "census/state_boundaries/tl_2024_us_state_proj.shp")

    buildings_layer = os.path.join(data_path, f"overture/overture_classified.gdb/{st}_buildings_class")
    output_buildings = os.path.join(results_path, f"building_footprints.gdb/{st}_buildings_fp")
    
    print("Selecting the state")
    arcpy.analysis.Select(all_state_bounds, state_bounds, f"STUSPS = '{st}'")
    
    print("Clipping NFHL footprint to the state bounds")
    arcpy.analysis.PairwiseClip(
        nfhl_footprint, 
        state_bounds,
        state_nfhl_footprint
    )

    print("Spatial joining nfhl footprint with buildings")
    arcpy.analysis.SpatialJoin(
        target_features=buildings_layer,
        join_features=state_nfhl_footprint,
        out_feature_class=output_buildings,
        join_operation="JOIN_ONE_TO_ONE",
        join_type="KEEP_ALL",
        match_option="HAVE_THEIR_CENTER_IN",
        search_radius=None,
        distance_field_name="",
        match_fields=None
    )

    # List of fields to delete
    fields_to_delete = ["Join_Count", "TARGET_FID", "OBJECTID", "Shape_Length_1", "Shape_Area_1"]
    
    # Delete extra fields
    print("Deleting extra fields")
    arcpy.management.DeleteField(output_buildings, fields_to_delete)

    # Get a list of fields
    fields = [f.name for f in arcpy.ListFields(output_buildings)]

    if "OBJECTID_1" in fields:
        print("Renaming alias of 'OBJECTID_1'")
        arcpy.management.AlterField(output_buildings, "OBJECTID_1", new_field_alias = "OBJECTID")
        print("Renaming successful.")
    else:
        print("'OBJECTID_1' column not found.")


    end_time = time.time()
    elapsed_time = end_time - start_time
    print(round(elapsed_time/60, 2), "minutes")

# Export tabular data to Parquet files for analysis in R

In [None]:
# Set the file geodatabase path
gdb_path = os.path.join(results_path, "building_footprints.gdb")

# List all feature classes in the geodatabase
arcpy.env.workspace = gdb_path
feature_classes = arcpy.ListFeatureClasses()

output_folder = os.path.join(results_path, "building_footprint_dfs")

# Function to read attribute table into a pandas dataframe
def feature_class_to_dataframe(fc):
    state_name = fc.split("_")[0]
    
    fields = [f.name for f in arcpy.ListFields(fc) if f.type != "Geometry"]
    with arcpy.da.SearchCursor(fc, fields) as cursor:
        data = [row for row in cursor]

    pd.DataFrame(data, columns=fields).to_parquet(os.path.join(output_folder, state_name + "_buildings_fp.parquet"))
    print(f"Fininshed {state_name}")
    return 

# Read all feature classes into a dictionary of dataframes
dataframes = {fc: feature_class_to_dataframe(fc) for fc in feature_classes}