# Configure packages and filepaths

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import requests
import os
import math
from os.path import join
from pathlib import Path
import pandas as pd
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
from shapely.geometry import shape
from pyproj import CRS


In [3]:
# Filepaths

# Get the absolute path to the project directory
# Which is one directory above notebooks/
ABS_DIR = os.path.abspath(Path(os.getcwd()).parents[0])
# Get raw data directory
FR = join(ABS_DIR, 'data', 'raw')
# Get interim data directory
FI = join(ABS_DIR, 'data', 'interim')
# Get processed data directory
FP = join(ABS_DIR, 'data', 'processed')

# Directories for exposure, vulnerability (vuln) and 
# administrative reference files
EXP_DIR_R = join(FR, 'exposure')
VULN_DIR_R = join(FR, 'vuln')
REF_DIR_R = join(FR, 'ref')
# Haz is for FEMA NFHL and depth grids
HAZ_DIR_R = join(FR, 'haz')

# Make sure directories exist
Path(EXP_DIR_R).mkdir(parents=True, exist_ok=True)
Path(VULN_DIR_R).mkdir(parents=True, exist_ok=True)
Path(REF_DIR_R).mkdir(parents=True, exist_ok=True)
Path(HAZ_DIR_R).mkdir(parents=True, exist_ok=True)

In [4]:
# Constants (could be replaced by config files or user input)
# County fips (list for some scalability)
FIPS = ['42101']

# FEMA "chunk" size for API
CHUNK_FEMA = 1000

# Exposure Data

## National Structure Inventory

In [6]:
# Get the URL
url = "https://nsi.sec.usace.army.mil/nsiapi/structures"

# Loop through counties, 
# Get the data from the NSI API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for NSI DFs
nsi_df_list = []

for fips in FIPS:
    # GET Request
    nsi_get = requests.get(url + '?fips=' + fips)
    
    # Temp data frame
    temp = pd.json_normalize(nsi_get.json()['features'])
    
    # Add to list
    nsi_df_list.append(temp)

# Concat
nsi = pd.concat(nsi_df_list, axis=0)

# Write to file
nsi.to_parquet(join(EXP_DIR_R, 'nsi.pqt'))

## Local Municipal Data

### Parcels/Assessments

In [31]:
# PWD parcels (Need BRT_ID and PIN for merge with OPA data)
# These will be used for linking assessment data from OPA
# with building footprints based on attribute merges

# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/
# 5543864620583086178c4e7a/representationdetails/55438a829b989a05172d0cfa/

# Download the geojson
# https://opendata.arcgis.com/datasets/
# 84baed491de44f539889f2af178ad85c_0.geojson

url = ("https://opendata.arcgis.com/datasets/"
       + "84baed491de44f539889f2af178ad85c_0.geojson")

# GET request for pwd parcel data
pwd_get = requests.get(url)

# Json object to access data and crs
get_json = pwd_get.json()

# Get coordinate reference system
crs = get_json['crs']['properties']['name']
# Get geographic crs
crs_geo = CRS.from_user_input(crs)

# Get features of request
temp = get_json['features']

# Normalize into dataframe
temp_df = pd.json_normalize(temp)

# Get the geometry from each row
temp_geo = [shape(i['geometry']) for i in temp]

# Get geodataframe from last two steps
pwd_geo = gpd.GeoDataFrame(temp_df,
                           crs=crs_geo,
                           geometry=temp_geo)

# Need to drop geometry.coordinates to write file
# Also can drop type, geometry.type
drop_col = ['type', 'geometry.type', 'geometry.coordinates']
pwd_geo_out = pwd_geo.drop(columns=drop_col)

# Write file
pwd_geo_out.to_file(join(EXP_DIR_R, 'pc_pwd.gpkg'),
                    driver='GPKG')

In [55]:
# OPA Parcels (These have all the assessments)
# only point based data

# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/
# 5543865f20583086178c4ee5/representationdetails/
# 55d624fdad35c7e854cb21a4/?view_287_per_page=100&view_287_page=1

# API endpoint, .csv
# https://opendata-downloads.s3.amazonaws.com/opa_properties_public.csv
# Since this is just a point database and we're linking it to
# building footprints through pwd, I think downloading
# csv is simpler and more straight forward
url = ("https://opendata-downloads.s3.amazonaws.com/"
       + "opa_properties_public.csv")

# Pandas can read directly from the url
opa = pd.read_csv(url)

# Write out file as csv
opa_out.to_csv(join(EXP_DIR_R, 'pc_opa.csv'))

  opa = pd.read_csv(url)


### Building Footprints

In [56]:
# Building footprint polygons with PARCEL_ID_ 
# which links to bld parcels

# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/
# 5543864f20583086178c4ea5/representationdetails/595e8e85ac27025c82c53c7c/

# API endpoint, .geojson
# https://opendata.arcgis.com/datasets/
# ab9e89e1273f445bb265846c90b38a96_0.geojson

# This is geojson like bld above so code can be same
# Makes sense to have a function that does this stuff, but
# for a single county case-study like this for the framework
# it could be more interpretable to maintain more readability
url = ("https://opendata.arcgis.com/datasets/"
       + "ab9e89e1273f445bb265846c90b38a96_0.geojson")

# GET request for bld footprint data
bld_get = requests.get(url)

# Json object to access data and crs
get_json = bld_get.json()

# Get coordinate reference system
crs = get_json['crs']['properties']['name']
# Get geographic crs
crs_geo = CRS.from_user_input(crs)

# Get features of request
temp = get_json['features']

# Normalize into dataframe
temp_df = pd.json_normalize(temp)

# Get the geometry from each row
temp_geo = [shape(i['geometry']) for i in temp]

# Get geodataframe from last two steps
bld_geo = gpd.GeoDataFrame(temp_df,
                           crs=crs_geo,
                           geometry=temp_geo)

# Need to drop geometry.coordinates to write file
# Also can drop type, geometry.type
drop_col = ['type', 'geometry.type', 'geometry.coordinates']
bld_geo_out = bld_geo.drop(columns=drop_col)

# Write file
bld_geo_out.to_file(join(EXP_DIR_R, 'bld_fp.gpkg'),
                    driver='GPKG')

## FEMA Data

### NFIP Policies

In [8]:
# Get the URL for querying policies
url = "https://www.fema.gov/api/open/v2/FimaNfipPolicies?$"
# Get the URL for # policies that meet request
check = url + "inlinecount=allpages&$top=1&$select=id&$"


# Loop through counties, 
# Get the data from the Pols API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for Pols DFs
pol_df_list = []

# NFIP API usage adapts R code here: https://docs.ropensci.org/rfema/
# And follows OpenFEMA guide: 
# https://www.fema.gov/about/openfema/working-with-large-data-sets#app-a

for fips in FIPS:
    # County endpoint
    c_end = "filter=countyCode%20eq%20%27" + fips + "%27"
    
    # First, get the total number of records
    records = requests.get(check + c_end)
    n_rec = pd.json_normalize(records.json())['metadata.count'][0]
    
    # Get iterations needed (1,000 record limit)
    iterations = math.ceil(n_rec / CHUNK_FEMA)
    
    # Now, download 1,000 records at a time and store in list
    # Loop through required iterations and keep appending policy 
    # data from the GET request to the pol_df_list
    for i in range(iterations):
        skip_str = "&$skip=" + str(i*CHUNK_FEMA)
    
        # GET Request
        pol_get = requests.get(url + c_end + skip_str)

        # Temp data frame
        temp = pd.json_normalize(pol_get.json()['FimaNfipPolicies'])

        # Add to list
        pol_df_list.append(temp)

# Concat
nfip_pol = pd.concat(pol_df_list, axis=0)

# Write to file
nfip_pol.to_parquet(join(EXP_DIR_R, 'nfip_pols.pqt'))

### NFIP Claims

In [12]:
# Get the URL for querying claimicies
url = "https://www.fema.gov/api/open/v2/FimaNfipClaims?$"
# Get the URL for # claimicies that meet request
check = url + "inlinecount=allpages&$top=1&$select=id&$"


# Loop through counties, 
# Get the data from the claims API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for claims DFs
claim_df_list = []

# NFIP API usage adapts R code here: https://docs.ropensci.org/rfema/
# And follows OpenFEMA guide: 
# https://www.fema.gov/about/openfema/working-with-large-data-sets#app-a

for fips in FIPS:
    # County endpoint
    c_end = "filter=countyCode%20eq%20%27" + fips + "%27"
    
    # First, get the total number of records
    records = requests.get(check + c_end)
    n_rec = pd.json_normalize(records.json())['metadata.count'][0]
    
    # Get iterations needed (1,000 record limit)
    iterations = math.ceil(n_rec / CHUNK_FEMA)
    
    # Now, download 1,000 records at a time and store in list
    # Loop through required iterations and keep appending claimicy 
    # data from the GET request to the claim_df_list
    for i in range(iterations):
        skip_str = "&$skip=" + str(i*CHUNK_FEMA)
    
        # GET Request
        claim_get = requests.get(url + c_end + skip_str)

        # Temp data frame
        temp = pd.json_normalize(claim_get.json()['FimaNfipClaims'])

        # Add to list
        claim_df_list.append(temp)

# Concat
nfip_claim = pd.concat(claim_df_list, axis=0)

# Write to file
nfip_claim.to_parquet(join(EXP_DIR_R, 'nfip_claims.pqt'))

## Federal Housing Finance Agency

In [5]:
# TODO: Create a script of helpful functions and add this
# Helper function for downloading zip files
# from https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

In [6]:
# Want to download housing price index for deflating
# market values to a standard 

# https://www.fhfa.gov/DataTools/Downloads/Pages/
# House-Price-Index-Datasets.aspx#qat

# Use annual house price indices
# Counties (Developmental Index; Not Seasonally Adjusted)
# https://www.fhfa.gov/DataTools/Downloads/Documents/HPI/
# HPI_AT_BDL_county.xlsx
url = ("https://www.fhfa.gov/DataTools/Downloads/Documents/HPI/"
       + "HPI_AT_BDL_county.xlsx")

# Destination path
dst_path = join(EXP_DIR_R, 'hpi_county.xlsx')

# Download data
download_url(url, dst_path)

# Hazard Data

In [20]:
# TODO: Create a script of helpful functions and add this
# Helper function for downloading zip files
# from https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

## NFHL Data

In [20]:
# I went to FEMA Flood Map Service Center
# I chose Philadelphia County from the drop down menus
# I got the following link for the current county NFHL after
# downloading & cancelling the download
# https://map1.msc.fema.gov/data/FRP/FRD_02040202_PA_GeoTIFFs_20160801
# .zip?LOC=ccad78e48360e7a0a5cf6848dfa4db11

# I went to FEMA Flood Map Service Center
# I chose Philadelphia County from the drop down menus
# I got the following link for GeoTIFFs for the Flood Risk Database
# https://hazards.fema.gov/nfhlv2/output/County/420757_20230701.zip
url = ("https://hazards.fema.gov/nfhlv2/output/County/420757_20230701.zip")

# Destination file directory
dst = Path(join(HAZ_DIR_R, 'nfhl'))
dst.mkdir(parents=True, exist_ok=True)
# Destination path
dst_path = join(dst, 'nfhl.zip')

# Download nfhl
download_url(url, dst_path)

## Depth Grids

In [21]:
# I went to FEMA Flood Map Service Center
# I chose Philadelphia County from the drop down menus
# I got the following link for GeoTIFFs for the Flood Risk Database
# "https://map1.msc.fema.gov/data/FRP/FRD_02040202_PA_GeoTIFFs_20160801" +
# ".zip?LOC=ccad78e48360e7a0a5cf6848dfa4db11"
# This takes a while to download because it's a large file
# You can confirm the endpoint for this download by following the steps, 
# clicking download on the DL icon on the webpage, immediately
# cancelling the download, and checking your browser's download
# page to see what server the download happens from
# I did these steps on Google Chrome 114.0.5735.133

url = ("https://map1.msc.fema.gov/data/FRP/FRD_02040202_PA_GeoTIFFs_20160801"
       + ".zip?LOC=ccad78e48360e7a0a5cf6848dfa4db11")

# Destination file directory
dst = Path(join(HAZ_DIR_R, 'dg'))
dst.mkdir(parents=True, exist_ok=True)
# Destination path
dst_path = join(dst, 'dg.zip')

# Download depth grids
download_url(url, dst_path)

# Vulnerability Data

In [None]:
# TODO: Create a script of helpful functions and add this
# Helper function for downloading zip files
# from https://stackoverflow.com/questions/9419162/download-returned-zip-file-from-url
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

## Social Vulnerability

In [23]:
# NOAA SOVI
url = 'https://coast.noaa.gov/htdata/SocioEconomic/SoVI2010/SoVI_2010_PA.zip'
save_path = join(VULN_DIR_R, 'social', 'noaa.zip')
# Make sure parent directory exists
# TODO: There could be a useful helper function for this
Path(save_path).parent.absolute().mkdir(parents=True, exist_ok=True)

# Request and write
download_url(url, save_path)

In [25]:
# CEJST
# Data from https://screeningtool.geoplatform.gov/en/downloads
url = ('https://static-data-screeningtool.geoplatform.gov/data-versions/'
       + '1.0/data/score/downloadable/1.0-communities.csv')

save_path = join(VULN_DIR_R, 'social', 'cejst.csv')

# Make sure parent directory exists
# TODO: There could be a useful helper function for this
Path(save_path).parent.absolute().mkdir(parents=True, exist_ok=True)

# Request and write
download_url(url, save_path)

In [24]:
# FHA LMI
# Data from https://www.hudexchange.info/programs/acs-low-mod-summary-data/
# acs-low-mod-summary-data-block-groups-places/

url = ('https://www.hudexchange.info/sites/onecpd/assets/File/'
       + 'ACS_2015_lowmod_blockgroup_all.xlsx')

# Unfortunately xlsx file
# But you can use openpyxl engine with pd.read_excel
save_path = join(VULN_DIR_R, 'social', 'lmi.xlsx')

# Make sure parent directory exists
# TODO: There could be a useful helper function for this
Path(save_path).parent.absolute().mkdir(parents=True, exist_ok=True)

# Request and write
download_url(url, save_path)

# Administrative Reference Data

In [9]:
# TODO: It would be much better to have helper 
# functions for this in the future, especially if
# using generic reference data (like from TIGER)

In [8]:
# County boundary (city limits)
# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/
# 5543868820583086178c4f89/representationdetails/55438ada9b989a05172d0d92/

# Download the geojson
# https://opendata.arcgis.com/datasets/
# 405ec3da942d4e20869d4e1449a2be48_0.geojson

url = ("https://opendata.arcgis.com/datasets/"
       + "405ec3da942d4e20869d4e1449a2be48_0.geojson")

# GET request for city limits
city_get = requests.get(url)

# Json object to access data and crs
get_json = city_get.json()

# Get coordinate reference system
crs = get_json['crs']['properties']['name']
# Get geographic crs
crs_geo = CRS.from_user_input(crs)

# Get features of request
temp = get_json['features']

# Normalize into dataframe
temp_df = pd.json_normalize(temp)

# Get the geometry from each row
temp_geo = [shape(i['geometry']) for i in temp]

# Get geodataframe from last two steps
city_geo = gpd.GeoDataFrame(temp_df,
                            crs=crs_geo,
                            geometry=temp_geo)

# Need to drop geometry.coordinates to write file
# Also can drop type, geometry.type
drop_col = ['type', 'geometry.type', 'geometry.coordinates']
city_geo_out = city_geo.drop(columns=drop_col)

# Write file
city_geo_out.to_file(join(REF_DIR_R, 'city.gpkg'),
                     driver='GPKG')

In [10]:
# Census tracts, 2010
# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/5543867720583086178c4f47/
# representationdetails/55438aca9b989a05172d0d7a/

# Download the geojson
# https://opendata.arcgis.com/datasets/
# 8bc0786524a4486bb3cf0f9862ad0fbf_0.geojson

url = ("https://opendata.arcgis.com/datasets/"
       + "8bc0786524a4486bb3cf0f9862ad0fbf_0.geojson")

# GET request for tracts
tract_get = requests.get(url)

# Json object to access data and crs
get_json = tract_get.json()

# Get coordinate reference system
crs = get_json['crs']['properties']['name']
# Get geographic crs
crs_geo = CRS.from_user_input(crs)

# Get features of request
temp = get_json['features']

# Normalize into dataframe
temp_df = pd.json_normalize(temp)

# Get the geometry from each row
temp_geo = [shape(i['geometry']) for i in temp]

# Get geodataframe from last two steps
tract_geo = gpd.GeoDataFrame(temp_df,
                             crs=crs_geo,
                             geometry=temp_geo)

# Need to drop geometry.coordinates to write file
# Also can drop type, geometry.type
drop_col = ['type', 'geometry.type', 'geometry.coordinates']
tract_geo_out = tract_geo.drop(columns=drop_col)

# Write file
tract_geo_out.to_file(join(REF_DIR_R, 'tracts.gpkg'),
                      driver='GPKG')

In [11]:
# Census block groups, 2010
# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/
# 5543867720583086178c4f46/representationdetails/55438ac99b989a05172d0d79/

# Download the geojson
# https://opendata.arcgis.com/datasets/
# 8bc0786524a4486bb3cf0f9862ad0fbf_0.geojson

url = ("https://opendata.arcgis.com/datasets/"
       + "2f982bada233478ea0100528227febce_0.geojson")

# GET request for blocks
block_get = requests.get(url)

# Json object to access data and crs
get_json = block_get.json()

# Get coordinate reference system
crs = get_json['crs']['properties']['name']
# Get geographic crs
crs_geo = CRS.from_user_input(crs)

# Get features of request
temp = get_json['features']

# Normalize into dataframe
temp_df = pd.json_normalize(temp)

# Get the geometry from each row
temp_geo = [shape(i['geometry']) for i in temp]

# Get geodataframe from last two steps
block_geo = gpd.GeoDataFrame(temp_df,
                             crs=crs_geo,
                             geometry=temp_geo)

# Need to drop geometry.coordinates to write file
# Also can drop type, geometry.type
drop_col = ['type', 'geometry.type', 'geometry.coordinates']
block_geo_out = block_geo.drop(columns=drop_col)

# Write file
block_geo_out.to_file(join(REF_DIR_R, 'blocks.gpkg'),
                      driver='GPKG')

In [12]:
# Zip codes (Important: zip codes, not ZCTA)

# Metadata link
# https://metadata.phila.gov/#home/datasetdetails/
# 555f813af15fcb6c6ed44153/representationdetails/5589aa52b80410802d7e643b/

# Download the geojson
# https://opendata.arcgis.com/datasets/
# b54ec5210cee41c3a884c9086f7af1be_0.geojson

url = ("https://opendata.arcgis.com/datasets/"
       + "b54ec5210cee41c3a884c9086f7af1be_0.geojson")

# GET request for zipss
zips_get = requests.get(url)

# Json object to access data and crs
get_json = zips_get.json()

# Get coordinate reference system
crs = get_json['crs']['properties']['name']
# Get geographic crs
crs_geo = CRS.from_user_input(crs)

# Get features of request
temp = get_json['features']

# Normalize into dataframe
temp_df = pd.json_normalize(temp)

# Get the geometry from each row
temp_geo = [shape(i['geometry']) for i in temp]

# Get geodataframe from last two steps
zips_geo = gpd.GeoDataFrame(temp_df,
                            crs=crs_geo,
                            geometry=temp_geo)

# Need to drop geometry.coordinates to write file
# Also can drop type, geometry.type
drop_col = ['type', 'geometry.type', 'geometry.coordinates']
zips_geo_out = zips_geo.drop(columns=drop_col)

# Write file
zips_geo_out.to_file(join(REF_DIR_R, 'zips.gpkg'),
                     driver='GPKG')