# Download FEMA floodplain data

In [None]:
############ modified from https://github.com/jkrohn5/Code-Snips/blob/main/downloadfloodhazard.py #################

import os
import requests
import json
import re
import io
import zipfile
import arcpy
import csv
import time
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import urllib.request

arcpy.env.overwriteOutput = True
arcpy.env.outputCoordinateSystem = arcpy.SpatialReference("USA Contiguous Albers Equal Area Conic USGS")
arcpy.env.parallelProcessingFactor = "100%"


data_path = "your_path_here"

##Change this to working directory
os.chdir(data_path)
base_url = 'https://hazards.fema.gov/nfhlv2/output/State/'

In [None]:
# Dictionary mapping Census state FIPS codes to state abbreviations
fips_to_abbreviation = {
    '01': 'AL',  # Alabama
    '02': 'AK',  # Alaska
    '04': 'AZ',  # Arizona: No state layer available
    '05': 'AR',  # Arkansas
    '06': 'CA',  # California
    '08': 'CO',  # Colorado: No state layer available
    '09': 'CT',  # Connecticut
    '10': 'DE',  # Delaware
    '11': 'DC',  # District of Columbia
    '12': 'FL',  # Florida: No state layer available
    '13': 'GA',  # Georgia
    '15': 'HI',  # Hawaii
    '16': 'ID',  # Idaho
    '17': 'IL',  # Illinois: No state layer available
    '18': 'IN',  # Indiana
    '19': 'IA',  # Iowa
    '20': 'KS',  # Kansas
    '21': 'KY',  # Kentucky
    '22': 'LA',  # Louisiana
    '23': 'ME',  # Maine
    '24': 'MD',  # Maryland
    '25': 'MA',  # Massachusetts
    '26': 'MI',  # Michigan
    '27': 'MN',  # Minnesota
    '28': 'MS',  # Mississippi: No state layer available
    '29': 'MO',  # Missouri
    '30': 'MT',  # Montana
    '31': 'NE',  # Nebraska
    '32': 'NV',  # Nevada
    '33': 'NH',  # New Hampshire
    '34': 'NJ',  # New Jersey
    '35': 'NM',  # New Mexico: No state layer available
    '36': 'NY',  # New York
    '37': 'NC',  # North Carolina
    '38': 'ND',  # North Dakota
    '39': 'OH',  # Ohio
    '40': 'OK',  # Oklahoma
    '41': 'OR',  # Oregon
    '42': 'PA',  # Pennsylvania
    '44': 'RI',  # Rhode Island
    '45': 'SC',  # South Carolina
    '46': 'SD',  # South Dakota
    '47': 'TN',  # Tennessee
    '48': 'TX',  # Texas: No state layer available
    '49': 'UT',  # Utah
    '50': 'VT',  # Vermont
    '51': 'VA',  # Virginia
    '53': 'WA',  # Washington
    '54': 'WV',  # West Virginia
    '55': 'WI',  # Wisconsin
    '56': 'WY'   # Wyoming
}

stfips = list(fips_to_abbreviation.keys())

## Scrape HTML to get download links

In [None]:
# Get HTML from NFHL database
url = 'https://hazards.fema.gov/femaportal/NFHL/searchResult'
output_file = 'nfhl.html'

try:
    response = requests.get(url, timeout=120)  # Set timeout to 120 seconds
    response.raise_for_status()  # Check if the request was successful (status code 200)
    
    with open(output_file, 'w') as file:
        file.write(response.text)  # Save the content to a file
    
    print(f"Downloaded the file successfully: {output_file}")
except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")


## Create dataframe of download links

In [None]:
# Parse the HTML table into a Pandas dataframe, then save as csv
# Define file paths
input_file = 'nfhl.html'
output_file = 'parsed_nfhl_table.csv'

# Load the HTML file
with open(input_file, 'r', encoding='utf-8') as file:
    soup = BeautifulSoup(file, 'html.parser')

# Find the first table
table = soup.find('table')

# Extract table headers and format them
headers = [header.text.strip().lower().replace(' ', '_') for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find('tbody').find_all('tr'):
    cols = [col.text.strip() for col in row.find_all('td')]

    # Clean the "county" column to remove duplicate names and extra whitespace
    if len(cols) > 1:
        county = cols[1]
        county = county.split("\n")[0].strip()
        cols[1] = county

    # Parse the "update_date" column and reformat the date
    if len(cols) > 3:
        raw_date = cols[3]
        
        try:
            # Extract the first occurrence of the date
            cleaned_date = raw_date.split('\n')[0].strip()
            split_date = cleaned_date.split()
            del split_date[-2]
            cleaned_date_without_tz = " ".join(split_date)
            
            parsed_date = datetime.strptime(cleaned_date_without_tz, "%a %b %d %H:%M:%S %Y")

            formatted_date = parsed_date.strftime("%m/%d/%Y")
            cols[3] = formatted_date
        except ValueError:
            # If date parsing fails, keep the original text
            pass

    # Get download URL, fix it, and prepend the base URL
    download_col = row.find('a')['href']
    download_url = "https://hazards.fema.gov/femaportal/NFHL/" + download_col.replace(' ', '%20')
    cols.append(download_url)

    rows.append(cols)

# Add "download_url" to headers
headers.append("download_url")

# Create DataFrame
df = pd.DataFrame(rows, columns=headers)
df = df.drop('download', axis=1)

# Dictionary mapping full state names to abbreviations
state_abbreviations = {
    'ALABAMA': 'AL', 'ALASKA': 'AK', 'ARIZONA': 'AZ', 'ARKANSAS': 'AR', 'CALIFORNIA': 'CA',
    'COLORADO': 'CO', 'CONNECTICUT': 'CT', 'DISTRICT OF COLUMBIA': 'DC', 'DELAWARE': 'DE', 'FLORIDA': 'FL', 'GEORGIA': 'GA',
    'HAWAII': 'HI', 'IDAHO': 'ID', 'ILLINOIS': 'IL', 'INDIANA': 'IN', 'IOWA': 'IA',
    'KANSAS': 'KS', 'KENTUCKY': 'KY', 'LOUISIANA': 'LA', 'MAINE': 'ME', 'MARYLAND': 'MD',
    'MASSACHUSETTS': 'MA', 'MICHIGAN': 'MI', 'MINNESOTA': 'MN', 'MISSISSIPPI': 'MS', 'MISSOURI': 'MO',
    'MONTANA': 'MT', 'NEBRASKA': 'NE', 'NEVADA': 'NV', 'NEW HAMPSHIRE': 'NH', 'NEW JERSEY': 'NJ',
    'NEW MEXICO': 'NM', 'NEW YORK': 'NY', 'NORTH CAROLINA': 'NC', 'NORTH DAKOTA': 'ND', 'OHIO': 'OH',
    'OKLAHOMA': 'OK', 'OREGON': 'OR', 'PENNSYLVANIA': 'PA', 'RHODE ISLAND': 'RI', 'SOUTH CAROLINA': 'SC',
    'SOUTH DAKOTA': 'SD', 'TENNESSEE': 'TN', 'TEXAS': 'TX', 'UTAH': 'UT', 'VERMONT': 'VT',
    'VIRGINIA': 'VA', 'WASHINGTON': 'WA', 'WEST VIRGINIA': 'WV', 'WISCONSIN': 'WI', 'WYOMING': 'WY',
    'PUERTO RICO': 'PR', 'VIRGIN ISLANDS': 'VI', 'GUAM': 'GU', 'AMERICAN SAMOA': 'AS', 'NORTHERN MARIANA ISLANDS': 'MP'
}

# Function to replace full state name with abbreviation
df['state_abb'] = df['state'].apply(lambda x: state_abbreviations.get(x, x))  # Use the abbreviation if it exists, else keep the original

# Save to a CSV file
df.to_csv(output_file, index=False)

print(f"DataFrame saved to {output_file}")

## Download data for every county

In [None]:
df = pd.read_csv('parsed_nfhl_table.csv')

# Location to save the files
location = "nfhl_zipped"

nrows = len(df)

# DataFrame to store error logs
error_log = pd.DataFrame(columns=['index', 'url', 'item_id', 'error_message'])

# Loop through the states and download the files
for index, row in df.iterrows():
    # print(index)
    # Construct the URL for the download
    url = row['download_url']
    st_abb = row['state_abb']
    item_id = row['item_id'].replace('-NFHL', '')
    date = row['update_date'].replace('/', '')
    
    # Construct the file path to save the downloaded file
    file_path = os.path.join(location, f"{st_abb}_{item_id}_{date}.zip")
    print(f"({index+1}/{nrows}) ", row['size'], file_path)
    
    try:
        # Download the file
        urllib.request.urlretrieve(url, file_path)
    except Exception as e:
        # If there's an error, print it and continue
        print(f"Failed to download {url}: {e}")

        # Log the error details in the error_log DataFrame
        error_log = error_log.append({
            'index': index,
            'url': url,
            'item_id': item_id,
            'error_message': str(e)
        }, ignore_index=True)

    time.sleep(0.5) 

## Unzip the data

In [None]:
### Unzip the nfhl
zipped_folder = "nfhl_zipped"
unzipped_folder = "nfhl_unzipped"
os.makedirs(unzipped_folder, exist_ok=True)

file_prefix = "S_FLD_HAZ_AR"

# Loop through each ZIP file in the folder
for zip_filename in os.listdir(zipped_folder):
    if zip_filename.endswith(".zip"):
        zip_path = os.path.join(zipped_folder, zip_filename)
        
        # Create a new folder to extract to (named after the zip file without the extension)
        extract_folder = os.path.join(unzipped_folder, os.path.splitext(zip_filename)[0])
        os.makedirs(extract_folder, exist_ok=True)
        
        try:
            # Open the ZIP file
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                # Get all files in the ZIP archive
                files_in_zip = zip_ref.namelist()
                
                # Loop through the files and extract only those with the desired prefix
                for file_name in files_in_zip:
                    if file_name.startswith(file_prefix):  # Check if the file starts with the prefix
                        # Extract the file to the designated folder
                        zip_ref.extract(file_name, extract_folder)
                        print(f"Extracted: {file_name} from {zip_filename}")
            
            print(f"Successfully extracted files from: {zip_filename}")
        except Exception as e:
            print(f"Failed to extract {zip_filename}: {e}")


## Create the necessary GDBs

In [None]:
df = pd.read_csv('parsed_nfhl_table.csv')

# Set the output geodatabase path
input_folder = "nfhl_unzipped"
output_nfhl_gdb = "nfhl_dec_12_2024.gdb"
output_sfha_gdb = "sfha_dec_12_2024.gdb"

# Check if the geodatabases exists
if not arcpy.Exists(output_nfhl_gdb):
    # If it doesn't exist, create the geodatabase
    arcpy.CreateFileGDB_management(os.path.dirname(output_nfhl_gdb), os.path.basename(output_nfhl_gdb))
    print(f"Geodatabase created: {output_nfhl_gdb}")
else:
    print(f"Geodatabase already exists: {output_nfhl_gdb}")


if not arcpy.Exists(output_sfha_gdb):
    # If it doesn't exist, create the geodatabase
    arcpy.CreateFileGDB_management(os.path.dirname(output_sfha_gdb), os.path.basename(output_sfha_gdb))
    print(f"Geodatabase created: {output_sfha_gdb}")
else:
    print(f"Geodatabase already exists: {output_sfha_gdb}")
    
# Iterate through each state and find the folders for each state. Then merge them, extract SFHA and repair
state_abb = sorted(df.state_abb.unique())
state_abb

## Create NFHL and SFHA feature classes

In [None]:
folder_list = os.listdir(input_folder)

for st in state_abb:
    print(f"Working on: {st}")
    state_layers = []

    for folder_name in folder_list:
        if folder_name.split('_')[0] == st:
            shp_path = os.path.join(data_path, "nfhl_unzipped", folder_name, "S_FLD_HAZ_AR.shp")
            if os.path.exists(shp_path):
                state_layers.append(shp_path)
                # print(f"Added {shp_path} to state list")
            else:
                print(f"File does not exists at: {shp_path}")

    print(f"Merging {len(state_layers)} layers of NFHL for {st}")
    arcpy.management.Merge(state_layers, os.path.join(output_nfhl_gdb, st + "_nfhl"))

    print(f"Selecting and repairing the SFHA for {st}")
    arcpy.analysis.Select(
            in_features=os.path.join(output_nfhl_gdb, st + "_nfhl"),
            out_feature_class=os.path.join(output_sfha_gdb, st + "_sfha"),
            where_clause="SFHA_TF = 'T'"
        )

    arcpy.management.RepairGeometry(os.path.join(output_sfha_gdb, st + "_sfha"))