# Download Overture building footprints

In [5]:
import arcpy
from arcpy.sa import *
import pandas as pd
import numpy as np
import os
import urllib.request
import zipfile
from datetime import datetime
import duckdb

arcpy.env.overwriteOutput = True

def getNow():
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
# try:
#     import duckdb
# except:
#     !pip install duckdb==1.0.0
#     import duckdb

data_path = "your_path"
scratch_gdb = "your_scratch_path"

In [6]:
# Connect to DuckDB in-memory database
conn = duckdb.connect()
conn.sql("install spatial;load spatial;")
conn.sql("install httpfs;load httpfs;")

In [7]:
# Dictionary mapping Census state FIPS codes to state abbreviations
fips_to_abbreviation = {
    '01': 'AL',  # Alabama
    '02': 'AK',  # Alaska
    '04': 'AZ',  # Arizona: No state layer available
    '05': 'AR',  # Arkansas
    '06': 'CA',  # California
    '08': 'CO',  # Colorado: No state layer available
    '09': 'CT',  # Connecticut
    '10': 'DE',  # Delaware
    '11': 'DC',  # District of Columbia
    '12': 'FL',  # Florida: No state layer available
    '13': 'GA',  # Georgia
    '15': 'HI',  # Hawaii
    '16': 'ID',  # Idaho
    '17': 'IL',  # Illinois: No state layer available
    '18': 'IN',  # Indiana
    '19': 'IA',  # Iowa
    '20': 'KS',  # Kansas
    '21': 'KY',  # Kentucky
    '22': 'LA',  # Louisiana
    '23': 'ME',  # Maine
    '24': 'MD',  # Maryland
    '25': 'MA',  # Massachusetts
    '26': 'MI',  # Michigan
    '27': 'MN',  # Minnesota
    '28': 'MS',  # Mississippi: No state layer available
    '29': 'MO',  # Missouri
    '30': 'MT',  # Montana
    '31': 'NE',  # Nebraska
    '32': 'NV',  # Nevada
    '33': 'NH',  # New Hampshire
    '34': 'NJ',  # New Jersey
    '35': 'NM',  # New Mexico: No state layer available
    '36': 'NY',  # New York
    '37': 'NC',  # North Carolina
    '38': 'ND',  # North Dakota
    '39': 'OH',  # Ohio
    '40': 'OK',  # Oklahoma
    '41': 'OR',  # Oregon
    '42': 'PA',  # Pennsylvania
    '44': 'RI',  # Rhode Island
    '45': 'SC',  # South Carolina
    '46': 'SD',  # South Dakota
    '47': 'TN',  # Tennessee
    '48': 'TX',  # Texas: No state layer available
    '49': 'UT',  # Utah
    '50': 'VT',  # Vermont
    '51': 'VA',  # Virginia
    '53': 'WA',  # Washington
    '54': 'WV',  # West Virginia
    '55': 'WI',  # Wisconsin
    '56': 'WY'   # Wyoming
}

stfips = list(fips_to_abbreviation.keys())

# Path to the feature class. Census state boundaries in wgs84
state_bounds = os.path.join(data_path, "census/state_boundaries/tl_2024_us_state_wgs84.shp")

In [None]:
for st in stfips:
    
    # Get the correct state abbreviation
    state_name = fips_to_abbreviation[st]
    print("Working on " + state_name)
    sql_query = f"STATEFP = '{st}'" 

    # Create a search cursor to find the and get the WKT and bounding box
    xmin, ymin, xmax, ymax = None, None, None, None

    print("- Finding state outline")
    with arcpy.da.SearchCursor(state_bounds, ["SHAPE@"], sql_query) as cursor:
        for row in cursor:
            # Extract the geometry of the state
            geometry = row[0]  # "SHAPE@" returns the geometry object
    
            # Convert geometry to WKB
            wkt = geometry.WKT
    
            # Assign to a variable (or use directly as needed)
            state_wkt = wkt
    
            # Get the bounding box (extent) of the geometry
            extent = geometry.extent
            xmin, ymin = extent.XMin, extent.YMin
            xmax, ymax = extent.XMax, extent.YMax
    
            # Break after the first result, since we only need one state
            break

    print("- Creating buildings_view")
    # Run the SQL query to create a view for the building dataset, wihtin the bounding box and intersecting our state
    sql = f"""create or replace view buildings_view as select *,
            json_extract_string(sources, '$[0].dataset') AS source,
            substr(json_extract_string(sources, '$[0].update_time'), 1, 10) AS update_time,
            ST_AsWKB(geometry) AS geometry_wkb
          from read_parquet('s3://overturemaps-us-west-2/release/2024-11-13.0/theme=buildings/type=building/*',filename=false, hive_partitioning=1)
          where bbox.xmin >= {xmin}
            and bbox.ymin >= {ymin}
            and bbox.xmax <= {xmax}
            and bbox.ymax <= {ymax}
            and ST_Intersects(ST_GeomFromText('{state_wkt}'), geometry);
            """
    conn.sql(sql)

    # Create a feature class in the scratch GDB to write into from DuckDB
    arcpy.env.workspace = scratch_gdb

    print("- Creating scratch layer")
    arcPlaces = arcpy.management.CreateFeatureclass(out_path=arcpy.env.workspace,
                                                    out_name="scratch_buildings",
                                                    geometry_type="POLYGON",
                                                    has_m="DISABLED",
                                                    has_z="DISABLED",
                                                    spatial_reference=arcpy.SpatialReference(4326)).getOutput(0)  
    
    arcpy.management.AddField(in_table=arcPlaces,field_name="id",field_type="TEXT",field_length=32)
    arcpy.management.AddField(in_table=arcPlaces,field_name="height",field_type="FLOAT")
    arcpy.management.AddField(in_table=arcPlaces,field_name="source",field_type="TEXT",field_length=100)
    arcpy.management.AddField(in_table=arcPlaces,field_name="update_time",field_type="TEXT",field_length=100)
    arcpy.management.AddField(in_table=arcPlaces,field_name="subtype",field_type="TEXT",field_length=100)
    arcpy.management.AddField(in_table=arcPlaces,field_name="class",field_type="TEXT",field_length=100)
    arcpy.management.AddField(in_table=arcPlaces,field_name="level",field_type="LONG")
    arcpy.management.AddField(in_table=arcPlaces,field_name="num_floors",field_type="LONG",field_length=100)

    print("- Getting buildings from duckdb")
    sql = f"""select id,
                     height,
                     source,
                     update_time,
                     subtype,
                     class,
                     level,
                     geometry_wkb
                     from buildings_view;"""
    duckPlaces = conn.sql(sql)

    print("- Inserting buildings to scratch")
    with arcpy.da.InsertCursor(arcPlaces,["id","height","source","update_time","subtype","class", "level", "shape@"]) as iCursor:
        row = duckPlaces.fetchone()
        i = 1
        if row:
            while row:
                if i % 100000 == 0:
                    print('- Inserted {} Building rows at {}'.format(str(i),getNow()))
                row = list(row)
                row[-1] = arcpy.FromWKB(row[-1])
                iCursor.insertRow(row)
                i+=1
                row = duckPlaces.fetchone()
    del iCursor

    print("- Projecting " + state_name)
    arcpy.management.Project(os.path.join(scratch_gdb, "scratch_buildings"), 
                             os.path.join(data_path, "overture/overture_2024_11_13.gdb",state_name + "_buildings"),
                             arcpy.SpatialReference("USA Contiguous Albers Equal Area Conic USGS"))
    
    print("- Cleaning up " + state_name)
    arcpy.management.Delete(os.path.join(scratch_gdb,"scratch_buildings"))
    