# Configure

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Packages
import requests
import os
from os.path import join
from pathlib import Path
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
from pyproj import CRS

os.environ["USE_PYGEOS"] = "0"

In [3]:
# Filepaths (could be replaced by config files or user input)
# Get the absolute path to the project directory
# Which is one directory above notebooks/
ABS_DIR = os.path.abspath(Path(os.getcwd()).parents[0])
# Get raw data directory
FR = join(ABS_DIR, "data", "raw")
# Get interim data directory
FI = join(ABS_DIR, "data", "interim")

# Directories for exposure, vulnerability (vuln) and
# administrative reference files
EXP_DIR_R = join(FR, "exposure")
VULN_DIR_R = join(FR, "vuln")
REF_DIR_R = join(FR, "ref")
# Haz is for FEMA NFHL and depth grids
HAZ_DIR_R = join(FR, "haz")

# Make sure directories exist
Path(EXP_DIR_R).mkdir(parents=True, exist_ok=True)
Path(VULN_DIR_R).mkdir(parents=True, exist_ok=True)
Path(REF_DIR_R).mkdir(parents=True, exist_ok=True)
Path(HAZ_DIR_R).mkdir(parents=True, exist_ok=True)

In [26]:
# Constants (could be replaced by config files or user input)
FIPS = '42101'

# FEMA "chunk" size for API
CHUNK_FEMA = 1000


# Exposure Data

## National Structure Inventory

In [7]:
# Get the URL
# (Could be specified in a config file)
url = "https://nsi.sec.usace.army.mil/nsiapi/structures"

# Loop through counties, 
# Get the data from the NSI API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for NSI DFs
nsi_df_list = []

for fips in FIPS:
    # GET Request
    nsi_get = requests.get(url + '?fips=' + fips)
    
    # Temp data frame
    temp = pd.json_normalize(nsi_get.json()['features'])
    
    # Add to list
    nsi_df_list.append(temp)

# Concat
nsi = pd.concat(nsi_df_list, axis=0)

# TODO: Provide helpful log information
# Things like number of rows, whether the get request
# was successful, file size, if it was written successfully

# Write to file
nsi.to_parquet(join(EXP_DIR_R, 'nsi.pqt'))

# Hazard Data

In [14]:
# TODO: Create a script of helpful functions and add this
# Helper function for downloading zip files
# from https://stackoverflow.com/questions/9419162/
# download-returned-zip-file-from-url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

## National Flood Hazard Layer Data

In [6]:
# I went to FEMA Flood Map Service Center
# I chose Philadelphia County from the drop down menus
# I got the following link for the current county NFHL after
# downloading & cancelling the download
# https://map1.msc.fema.gov/data/
# FRP/FRD_02040202_PA_GeoTIFFs_20160801
# .zip?LOC=ccad78e48360e7a0a5cf6848dfa4db11

# I went to FEMA Flood Map Service Center
# I chose Philadelphia County from the drop down menus
# I got the following link for GeoTIFFs for the Flood Risk Database
# https://hazards.fema.gov/nfhlv2/output/County/420757_20230701.zip
url = ("https://hazards.fema.gov/nfhlv2/output/County/420757_20230701.zip")

# Destination file directory
dst = Path(join(HAZ_DIR_R, 'nfhl'))
dst.mkdir(parents=True, exist_ok=True)
# Destination path
dst_path = join(dst, 'nfhl.zip')

# Download nfhl
download_url(url, dst_path)

## Depth Grids

In [16]:
# I went to FEMA Flood Map Service Center
# I chose Philadelphia County from the drop down menus
# I got the following link for GeoTIFFs for the Flood Risk Database
# "https://map1.msc.fema.gov/data/FRP/FRD_02040202_PA_GeoTIFFs_20160801" +
# ".zip?LOC=ccad78e48360e7a0a5cf6848dfa4db11"
# This takes a while to download because it's a large file
# You can confirm the endpoint for this download by following the steps, 
# clicking download on the DL icon on the webpage, immediately
# cancelling the download, and checking your browser's download
# page to see what server the download happens from
# I did these steps on Google Chrome 114.0.5735.133

url = ("https://map1.msc.fema.gov/data/FRP/"
       + "FRD_02040202_PA_GeoTIFFs_20160801.zip")

# Destination file directory
dst = Path(join(HAZ_DIR_R, 'dg'))
dst.mkdir(parents=True, exist_ok=True)
# Destination path
dst_path = join(dst, 'dg.zip')

# Download depth grids
# In order to download in a reproducible way, you might have to
# set verify=False with the latest requests version
# Withoug this setting, I get SSL error
# I do not feel comfortable with this, and directly
# downloaded the file and uploaded to the data directory.

# Therefore, I comment out the download_url line here

# download_url(url, dst_path)


# Vulnerability Data

In [17]:
# TODO: Create a script of helpful functions and add this
# Helper function for downloading zip files
# from https://stackoverflow.com/questions/9419162/
# download-returned-zip-file-from-url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

## Social Vulnerability

In [18]:
# NOAA SOVI
url = 'https://coast.noaa.gov/htdata/SocioEconomic/SoVI2010/SoVI_2010_PA.zip'
save_path = join(VULN_DIR_R, 'social', 'noaa.zip')
# Make sure parent directory exists
# TODO: There could be a useful helper function for this
Path(save_path).parent.absolute().mkdir(parents=True, exist_ok=True)

# Request and write
download_url(url, save_path)

In [19]:
# CEJST
# Data from https://screeningtool.geoplatform.gov/en/downloads
url = ('https://static-data-screeningtool.geoplatform.gov/data-versions/'
       + '1.0/data/score/downloadable/1.0-communities.csv')

save_path = join(VULN_DIR_R, 'social', 'cejst.csv')

# Make sure parent directory exists
# TODO: There could be a useful helper function for this
Path(save_path).parent.absolute().mkdir(parents=True, exist_ok=True)

# Request and write
download_url(url, save_path)

In [20]:
# FHA LMI
# Data from https://www.hudexchange.info/programs/
# acs-low-mod-summary-data/
# acs-low-mod-summary-data-block-groups-places/

url = ('https://www.hudexchange.info/sites/onecpd/assets/File/'
       + 'ACS_2015_lowmod_blockgroup_all.xlsx')

# Unfortunately xlsx file
# But you can use openpyxl engine with pd.read_excel
save_path = join(VULN_DIR_R, 'social', 'lmi.xlsx')

# Make sure parent directory exists
# TODO: There could be a useful helper function for this
Path(save_path).parent.absolute().mkdir(parents=True, exist_ok=True)

# Request and write
download_url(url, save_path)

# Administrative Reference Data

In [22]:
# TODO: Create a script of helpful functions and add this
# Helper function for downloading zip files
# from https://stackoverflow.com/questions/9419162/
# download-returned-zip-file-from-url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

## US Census TIGER

In [29]:
# U.S. wide data
# County boundaries and zip code tabulation areas
# TODO: These could be prespecified in a config file
COUNTY_URL = ('https://www2.census.gov/geo/tiger/TIGER2022/'
              + 'COUNTY/tl_2022_us_county.zip')

ZCTA_URL = ('https://www2.census.gov/geo/tiger/TIGER2022/'
            + 'ZCTA520/tl_2022_us_zcta520.zip')

# State-level data
# Get state fips from county code
# Get tract, block group, block URLs from state fips
STATE_FIPS = FIPS[:2]

# Filename conventions for tract, block group, block
base_url = 'https://www2.census.gov/geo/tiger/TIGER2022/'
TRACT_URL = base_url + '/TRACT/tl_2022_' + STATE_FIPS + '_tract.zip'
BG_URL = base_url + '/BG/tl_2022_' + STATE_FIPS + '_bg.zip'
BLOCK_URL = (base_url + '/TABBLOCK20/tl_2022_' + STATE_FIPS
             + '_tabblock20.zip')

In [30]:
# Loop through references
ref_list = [COUNTY_URL, ZCTA_URL, TRACT_URL, BG_URL, BLOCK_URL]
ref_names = ['county', 'zcta', 'tract', 'bg', 'block']
for i, ref in enumerate(ref_list):
    # Get save path from ref name and .zip
    save_path = join(REF_DIR_R, ref_names[i] + '.zip')
    # Request and write
    download_url(ref_list[i], save_path)
    # Helpful log message
    print('Downloaded and wrote file: ' + ref_names[i])

Downloaded and wrote file: county
Downloaded and wrote file: zcta
Downloaded and wrote file: tract
Downloaded and wrote file: bg
Downloaded and wrote file: block
