In [25]:
import time
import requests
import pandas as pd
import geopandas as gpd

In [26]:
# Resolve the Feature Service URL from the ArcGIS Item ID
ITEM_ID = "2b245b7f816044d7a779a61a5844be23"

item_url = f"https://www.arcgis.com/sharing/rest/content/items/{ITEM_ID}"
item_json = requests.get(item_url, params={"f": "json"}).json()

# Many ArcGIS items have a 'url' that points to the service
service_url = item_json.get("url")
service_url

'https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_Flood_Hazard_Reduced_Set_gdb/FeatureServer'

In [27]:
# Pick the sublayer (0) and confirm fields
LAYER_ID = 0
layer_url = f"{service_url}/{LAYER_ID}"

layer_info = requests.get(layer_url, params={"f": "json"}).json()
layer_info.keys(), layer_info.get("name")

(dict_keys(['currentVersion', 'id', 'name', 'inDedicatedHosting', 'preferredHost', 'type', 'serviceItemId', 'cacheMaxAge', 'displayField', 'description', 'copyrightText', 'defaultVisibility', 'editingInfo', 'relationships', 'isDataVersioned', 'hasContingentValuesDefinition', 'supportsAppend', 'supportsCalculate', 'supportsASyncCalculate', 'supportsTruncate', 'supportsAttachmentsByUploadId', 'supportsAttachmentsResizing', 'supportsRollbackOnFailureParameter', 'supportsStatistics', 'supportsExceedsLimitStatistics', 'supportsAdvancedQueries', 'supportsValidateSql', 'supportsCoordinatesQuantization', 'supportsLayerOverrides', 'supportsTilesAndBasicQueriesMode', 'supportsFieldDescriptionProperty', 'supportsQuantizationEditMode', 'supportsColumnStoreIndex', 'supportsApplyEditsWithGlobalIds', 'supportsMultiScaleGeometry', 'supportsReturningQueryGeometry', 'enableNullGeometry', 'hasGeometryProperties', 'geometryProperties', 'advancedQueryCapabilities', 'advancedQueryAnalyticCapabilities', 'que

In [28]:
[f["name"] for f in layer_info["fields"]][:30]

['OBJECTID',
 'DFIRM_ID',
 'VERSION_ID',
 'FLD_AR_ID',
 'STUDY_TYP',
 'FLD_ZONE',
 'ZONE_SUBTY',
 'SFHA_TF',
 'STATIC_BFE',
 'V_DATUM',
 'DEPTH',
 'LEN_UNIT',
 'VELOCITY',
 'VEL_UNIT',
 'DUAL_ZONE',
 'SOURCE_CIT',
 'GFID',
 'esri_symbology',
 'GlobalID',
 'Shape__Area',
 'Shape__Length']

In [29]:
def fetch_arcgis_layer_geojson(
    layer_url: str,
    where: str = "1=1",
    out_fields: str = "*",
    bbox: tuple | None = None,       # (minx, miny, maxx, maxy) in WGS84 (EPSG:4326)
    chunk_size: int = 5000,
    sleep_s: float = 0.2             # small pause to be polite to the service
) -> gpd.GeoDataFrame:
    """
    Fetch features from an ArcGIS FeatureServer/MapServer layer using pagination.
    Returns a GeoDataFrame built from GeoJSON.
    """
    query_url = f"{layer_url}/query"

    params = {
        "f": "geojson",
        "where": where,
        "outFields": out_fields,
        "returnGeometry": "true",
        "outSR": 4326,                 # return geometry in WGS84 for easy mapping & joins
        "resultOffset": 0,
        "resultRecordCount": chunk_size
    }

    # Filter to avoid nationwide data
    if bbox is not None:
        minx, miny, maxx, maxy = bbox
        params.update({
            "geometry": f"{minx},{miny},{maxx},{maxy}",
            "geometryType": "esriGeometryEnvelope",
            "inSR": 4326,
            "spatialRel": "esriSpatialRelIntersects"
        })

    # Pagination loop
    all_features = []
    total_batches = 0

    while True:
        r = requests.get(query_url, params=params, timeout=120)
        r.raise_for_status()
        gj = r.json()

        batch = gj.get("features", [])
        if not batch:
            break

        all_features.extend(batch)
        total_batches += 1

        # Move to next page
        params["resultOffset"] += chunk_size

        # Stop conditions:
        # - If not exceeded transfer limit AND we got fewer than chunk_size, we're done
        exceeded = gj.get("exceededTransferLimit", False)
        if (not exceeded) and (len(batch) < chunk_size):
            break

        time.sleep(sleep_s)

    gdf = gpd.GeoDataFrame.from_features(all_features, crs="EPSG:4326")
    print(f"Pulled {len(gdf):,} features across {total_batches} request batches.")
    return gdf

In [30]:
# Keep only relevant fields
out_fields = "DFIRM_ID,FLD_ZONE,ZONE_SUBTY,SFHA_TF,STATIC_BFE,V_DATUM,GFID,GlobalID"

# Display High-Risk only
where = "SFHA_TF = 'T'"

# Pull South Florida only
county_bboxes = {
    "Miami-Dade":  (-80.95, 25.05, -80.05, 25.98),
    "Broward":     (-80.60, 25.95, -79.90, 26.35),
    "Palm Beach":  (-80.90, 26.35, -79.90, 26.95),
    "Monroe":      (-82.00, 24.30, -80.10, 25.20)
}

In [31]:
# Pull NFHL for each county and combine
all_nfhl = []

for county_name, bbox in county_bboxes.items():
    print(f"\n--- Downloading NFHL for {county_name} ---")
    gdf = fetch_arcgis_layer_geojson(
        layer_url=layer_url,
        where=where,
        out_fields=out_fields,
        bbox=bbox,
        chunk_size=5000
    )
    gdf["county_name"] = county_name
    all_nfhl.append(gdf)

nfhl_sf_raw = gpd.GeoDataFrame(pd.concat(all_nfhl, ignore_index=True), crs="EPSG:4326")
print("\nCombined shape:", nfhl_sf_raw.shape)


--- Downloading NFHL for Miami-Dade ---
Pulled 2,000 features across 1 request batches.

--- Downloading NFHL for Broward ---
Pulled 2,000 features across 1 request batches.

--- Downloading NFHL for Palm Beach ---
Pulled 2,000 features across 1 request batches.

--- Downloading NFHL for Monroe ---
Pulled 1,606 features across 1 request batches.

Combined shape: (7606, 10)


In [32]:
# Cleaning
# ----------------------------
nfhl_sf = nfhl_sf_raw.copy()

# Standardize key fields
nfhl_sf["FLD_ZONE"] = nfhl_sf["FLD_ZONE"].astype(str).str.upper().replace({"NONE": None})
nfhl_sf["ZONE_SUBTY"] = nfhl_sf["ZONE_SUBTY"].astype(str).str.upper().replace({"NONE": None})
nfhl_sf["SFHA_TF"] = nfhl_sf["SFHA_TF"].astype(str).str.upper()

# Numeric conversion
nfhl_sf["STATIC_BFE"] = pd.to_numeric(nfhl_sf["STATIC_BFE"], errors="coerce")

# Classify flood risk level
def classify_flood_risk(zone: str) -> str:
    if not zone or zone == "NAN":
        return "Unknown"
    zone = zone.upper()
    if zone.startswith(("VE", "V")):
        return "Coastal High Risk (V/VE)"
    if zone.startswith(("AE", "A")):
        return "Riverine High Risk (A/AE)"
    if zone.startswith("X"):
        return "Moderate/Low Risk (X)"
    return "Other"

nfhl_sf["flood_risk_level"] = nfhl_sf["FLD_ZONE"].apply(classify_flood_risk)

# Geometry cleanup: drop empties, fix invalid geometries (safe for polygons)
nfhl_sf = nfhl_sf[~nfhl_sf.geometry.is_empty & nfhl_sf.geometry.notnull()].copy()
nfhl_sf["geometry"] = nfhl_sf["geometry"].buffer(0)  # common fix for invalid polygons

# Quick checks
print("\nFlood risk distribution:\n", nfhl_sf["flood_risk_level"].value_counts(dropna=False))
print("\nGeometry types:\n", nfhl_sf.geometry.type.value_counts())


Flood risk distribution:
 flood_risk_level
Riverine High Risk (A/AE)    6539
Coastal High Risk (V/VE)     1067
Name: count, dtype: int64

Geometry types:
 Polygon         7542
MultiPolygon      64
Name: count, dtype: int64


In [34]:
nfhl_sf.head()

Unnamed: 0,geometry,DFIRM_ID,FLD_ZONE,ZONE_SUBTY,SFHA_TF,STATIC_BFE,V_DATUM,GFID,GlobalID,county_name,flood_risk_level
0,"POLYGON ((-80.45453 25.71259, -80.45452 25.712...",12086C,AH,,T,9.0,NGVD29,e1112337-2edd-4ba0-a5d7-91a124dab4d0,1c38ae3b-5266-4f40-9265-afc3cd6abf71,Miami-Dade,Riverine High Risk (A/AE)
1,"POLYGON ((-80.30065 25.62836, -80.30074 25.628...",12086C,AE,,T,11.0,NGVD29,e1112337-2edd-4ba0-a5d7-91a124dab4d0,919a72bf-b4af-433c-8f10-9877ddbcb0f0,Miami-Dade,Riverine High Risk (A/AE)
2,"POLYGON ((-80.36867 25.35887, -80.36868 25.358...",12086C,VE,,T,7.0,NGVD29,e1112337-2edd-4ba0-a5d7-91a124dab4d0,c2309f96-5603-447a-9060-285c6317d2b8,Miami-Dade,Coastal High Risk (V/VE)
3,"POLYGON ((-80.41872 25.63837, -80.41903 25.638...",12086C,AH,,T,9.0,NGVD29,e1112337-2edd-4ba0-a5d7-91a124dab4d0,84f58c41-10e0-4dc7-86ee-0887873897b3,Miami-Dade,Riverine High Risk (A/AE)
4,"POLYGON ((-80.38294 25.79423, -80.38294 25.794...",12086C,AH,,T,8.0,NGVD29,e1112337-2edd-4ba0-a5d7-91a124dab4d0,fdd735e0-73fa-4f1d-9b2e-3577c25c8e8f,Miami-Dade,Riverine High Risk (A/AE)


In [37]:
nfhl_sf.to_parquet("/data/ARCGISFloodHazardSouthFlorida_dataset CLEANED.parquet", index=False)
nfhl_sf.to_file("/data/ARCGISFloodHazardSouthFlorida_dataset CLEANED.geojson", driver="GeoJSON")