# Download USA Structures

In [2]:
import arcpy
from arcpy.sa import *
import pandas as pd
import numpy as np
import os
import urllib.request
import zipfile
import requests
import zipfile
from bs4 import BeautifulSoup


from arcgis.features import GeoAccessor, GeoSeriesAccessor

data_path = "your_path"
scratch_folder = "your_scratch_path"

arcpy.env.workspace = data_path
arcpy.env.overwriteOutput = True
arcpy.env.outputCoordinateSystem = None
arcpy.env.parallelProcessingFactor = "100%"

In [4]:
# Set timeout for requests
TIMEOUT = 30 * 60  # 30 minutes

# URL of the page
url = "https://disasters.geoplatform.gov/USA_Structures/"

# Get the HTML content
response = requests.get(url, timeout=TIMEOUT)
html = response.text

# Parse the HTML
soup = BeautifulSoup(html, "html.parser")

# Extract all links
links = [a['href'] for a in soup.find_all("a", href=True)]

In [6]:
# Dictionary mapping Census state FIPS codes to state abbreviations
fips_to_abbreviation = {
    '01': 'AL',  # Alabama
    '02': 'AK',  # Alaska
    '04': 'AZ',  # Arizona: No state layer available
    '05': 'AR',  # Arkansas
    '06': 'CA',  # California
    '08': 'CO',  # Colorado: No state layer available
    '09': 'CT',  # Connecticut
    '10': 'DE',  # Delaware
    '11': 'DC',  # District of Columbia
    '12': 'FL',  # Florida: No state layer available
    '13': 'GA',  # Georgia
    '15': 'HI',  # Hawaii
    '16': 'ID',  # Idaho
    '17': 'IL',  # Illinois: No state layer available
    '18': 'IN',  # Indiana
    '19': 'IA',  # Iowa
    '20': 'KS',  # Kansas
    '21': 'KY',  # Kentucky
    '22': 'LA',  # Louisiana
    '23': 'ME',  # Maine
    '24': 'MD',  # Maryland
    '25': 'MA',  # Massachusetts
    '26': 'MI',  # Michigan
    '27': 'MN',  # Minnesota
    '28': 'MS',  # Mississippi: No state layer available
    '29': 'MO',  # Missouri
    '30': 'MT',  # Montana
    '31': 'NE',  # Nebraska
    '32': 'NV',  # Nevada
    '33': 'NH',  # New Hampshire
    '34': 'NJ',  # New Jersey
    '35': 'NM',  # New Mexico: No state layer available
    '36': 'NY',  # New York
    '37': 'NC',  # North Carolina
    '38': 'ND',  # North Dakota
    '39': 'OH',  # Ohio
    '40': 'OK',  # Oklahoma
    '41': 'OR',  # Oregon
    '42': 'PA',  # Pennsylvania
    '44': 'RI',  # Rhode Island
    '45': 'SC',  # South Carolina
    '46': 'SD',  # South Dakota
    '47': 'TN',  # Tennessee
    '48': 'TX',  # Texas: No state layer available
    '49': 'UT',  # Utah
    '50': 'VT',  # Vermont
    '51': 'VA',  # Virginia
    '53': 'WA',  # Washington
    '54': 'WV',  # West Virginia
    '55': 'WI',  # Wisconsin
    '56': 'WY'   # Wyoming
}

stfips = list(fips_to_abbreviation.values())

In [None]:
# Create DataFrame
usa_struc_df = pd.DataFrame({"url": links})
usa_struc_df["state"] = usa_struc_df["url"].str.extract(r"([A-Z]{2})\.zip$")
usa_struc_df["valid_state"] = usa_struc_df["state"].isin(stfips)

# Filter for valid states
usa_struc_df = usa_struc_df[usa_struc_df["valid_state"]].reset_index(drop=True)


In [33]:
# File paths
download_path = os.path.join(scratch_folder, "download_struc.zip")
unzip_path = os.path.join(scratch_folder, "usa_strucs_downloads")

# Ensure directories exist
os.makedirs(unzip_path, exist_ok=True)

# Make the gdb
arcpy.management.CreateFileGDB(os.path.join(data_path, "USA_structures"), "usa_structures")
out_gdb = os.path.join(data_path, "USA_structures/usa_structures.gdb")

In [None]:
# Download and extract files
for i, row in usa_struc_df.iterrows():
    state = row["state"]
    file_url = row["url"]
    
    print(f"Processing {state}: {file_url}")
    
    # Download file
    response = requests.get(file_url, timeout=TIMEOUT)
    with open(download_path, "wb") as file:
        file.write(response.content)
    
    # Unzip file
    with zipfile.ZipFile(download_path, "r") as zip_ref:
        zip_ref.extractall(unzip_path)
    

    walk = arcpy.da.Walk(unzip_path, datatype="FeatureClass", type="Polygon")

    for dirpath, dirnames, filenames in walk:
        for filename in filenames:
            
            if filename == f"{state}_Structures":
                fc_path = os.path.join(dirpath, filename)
                output_fc = os.path.join(out_gdb, state + "_Structures")
                
                arcpy.management.Project(fc_path, output_fc, arcpy.SpatialReference("USA Contiguous Albers Equal Area Conic USGS"))
                arcpy.management.RepairGeometry(output_fc)
                print(f"- Projected and repaired {filename} to {output_fc}")

    print(f"- Finished processing {state}")