In [1]:
%load_ext autoreload
%autoreload 2

In [42]:
import requests
import os
import math
from os.path import join
from pathlib import Path
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, shape

In [3]:
# Filepaths

# Get the absolute path to the precal_hazard directory
# Which is two directories above notebooks/exploration/
abs_dir = os.path.abspath(Path(os.getcwd()).parents[1])
# Get raw data directory
fr = join(abs_dir, 'data', 'raw')
# Get interim data directory
fi = join(abs_dir, 'data', 'interim')
# Get processed data directory
fp = join(abs_dir, 'data', 'processed')

# Get National Structure Inventory Data

In [4]:
# Call the NSI API by fips
# Camden, County NJ
fips_list = ['34007']

# Get the URL
url = "https://nsi.sec.usace.army.mil/nsiapi/structures"

# Loop through counties, 
# Get the data from the NSI API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for NSI DFs
nsi_df_list = []

for fips in fips_list:
    # GET Request
    nsi_get = requests.get(url + '?fips=' + fips)
    
    # Temp data frame
    temp = pd.json_normalize(nsi_get.json()['features'])
    
    # Add to list
    nsi_df_list.append(temp)

# Concat
nsi = pd.concat(nsi_df_list, axis=0)

# Write to file
nsi.to_parquet(join(fr, 'exposure', 'nsi.pqt'))

In [9]:
nsi.head()

Unnamed: 0,type,geometry.type,geometry.coordinates,properties.fd_id,properties.bid,properties.occtype,properties.st_damcat,properties.bldgtype,properties.found_type,properties.cbfips,...,properties.val_vehic,properties.source,properties.med_yr_blt,properties.firmzone,properties.o65disable,properties.u65disable,properties.x,properties.y,properties.ground_elv,properties.ground_elv_m
0,Feature,Point,"[-75.027694, 39.836728]",549377745,87F6RXPC+MWV-7-10-8-9,COM3,COM,W,S,340076074012026,...,180000,X,1964,,0.25,0.04,-75.027694,39.836728,79.250601,24.155582
1,Feature,Point,"[-75.028788, 39.939514]",549416164,87F6WXQC+RF4-12-13-12-14,COM3,COM,S,S,340076032003015,...,0,X,1987,,0.25,0.04,-75.028788,39.939514,47.315855,14.421872
2,Feature,Point,"[-75.033884, 39.91555]",549971600,87F6WX88+6C9-3-3-2-3,RES3A,RES,W,B,340076062001005,...,54000,X,1939,,0.25,0.04,-75.033884,39.91555,15.376165,4.686655
3,Feature,Point,"[-75.084571, 39.955341]",550301356,87F6XW48+45M-4-4-4-4,RES1-2SWB,RES,W,B,340076011021001,...,27000,X,2003,,0.25,0.04,-75.084571,39.955341,18.458536,5.626162
4,Feature,Point,"[-74.999513, 39.870824]",550414706,87F7V2C2+85H-3-3-3-3,COM8,COM,M,S,340076035051009,...,0,X,1964,,0.25,0.04,-74.999513,39.870824,52.01705,15.854796


# Download NFIP Data

In [51]:
# pol Policies for Camden
# Call the pol API by fips
# Camden, County NJ
fips_list = ['34007']

# Get the URL for querying policies
url = "https://www.fema.gov/api/open/v1/FimaNfipPolicies?$"
# Get the URL for # policies that meet request
check = url + "inlinecount=allpages&$top=1&$select=id&$"


# Loop through counties, 
# Get the data from the Pols API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for Pols DFs
pol_df_list = []

# NFIP API usage adapts R code here: https://docs.ropensci.org/rfema/
# And follows OpenFEMA guide: 
# https://www.fema.gov/about/openfema/working-with-large-data-sets#app-a

for fips in fips_list:
    # County endpoint
    c_end = "filter=countyCode%20eq%20%27" + fips + "%27"
    
    # First, get the total number of records
    records = requests.get(check + c_end)
    n_rec = pd.json_normalize(records.json())['metadata.count'][0]
    
    # Get iterations needed (1,000 record limit)
    iterations = math.ceil(n_rec / 1000)
    
    # Now, download 1,000 records at a time and store in list
    # Loop through required iterations and keep appending policy 
    # data from the GET request to the pol_df_list
    for i in range(iterations):
        skip_str = "&$skip=" + str(i*1000)
    
        # GET Request
        pol_get = requests.get(url + c_end + skip_str)

        # Temp data frame
        temp = pd.json_normalize(pol_get.json()['FimaNfipPolicies'])

        # Add to list
        pol_df_list.append(temp)

# Concat
nfip_pol = pd.concat(pol_df_list, axis=0)

# Write to file
nfip_pol.to_parquet(join(fr, 'exposure', 'nfip_pols.pqt'))

In [62]:
# NFIP Claims for Camden
# claim claimicies for Camden
# Call the claim API by fips
# Camden, County NJ
fips_list = ['34007']

# Get the URL for querying claimicies
url = "https://www.fema.gov/api/open/v1/FimaNfipClaims?$"
# Get the URL for # claimicies that meet request
check = url + "inlinecount=allpages&$top=1&$select=id&$"


# Loop through counties, 
# Get the data from the claims API
# Store in dataframe
# Add to list
# Concat all the dfs

# List for claims DFs
claim_df_list = []

# NFIP API usage adapts R code here: https://docs.ropensci.org/rfema/
# And follows OpenFEMA guide: 
# https://www.fema.gov/about/openfema/working-with-large-data-sets#app-a

for fips in fips_list:
    # County endpoint
    c_end = "filter=countyCode%20eq%20%27" + fips + "%27"
    
    # First, get the total number of records
    records = requests.get(check + c_end)
    n_rec = pd.json_normalize(records.json())['metadata.count'][0]
    
    # Get iterations needed (1,000 record limit)
    iterations = math.ceil(n_rec / 1000)
    
    # Now, download 1,000 records at a time and store in list
    # Loop through required iterations and keep appending claimicy 
    # data from the GET request to the claim_df_list
    for i in range(iterations):
        skip_str = "&$skip=" + str(i*1000)
    
        # GET Request
        claim_get = requests.get(url + c_end + skip_str)

        # Temp data frame
        temp = pd.json_normalize(claim_get.json()['FimaNfipClaims'])

        # Add to list
        claim_df_list.append(temp)

# Concat
nfip_claim = pd.concat(claim_df_list, axis=0)

# Write to file
nfip_claim.to_parquet(join(fr, 'exposure', 'nfip_claims.pqt'))

In [None]:
# TODO: Add all data sources currently in keller-lab/data
# Remove those data sources from that repo, makes sense
# to instead use project by project data model
# since we need the data accessible for reproducibility
# HMGP, IHP, PA
# Also Camden County NFHL

# Download Camden County Data

In [156]:
# Parcels
par_df_list = []

# Data is in epsg: 3424
EPSG = '3424'

# Store base URL
par_url = "https://services3.arcgis.com/JGF6qCAQFbROcocK/arcgis/rest/services/Parcel_Data_2021_Redacted/FeatureServer/1/query?outFields=*"
close_str = "&f=geojson"

# Filter on municipality
mun_str = "&where=MUNICIPALITY%3D%27GLOUCESTER+CITY%27"
# Record count, 2000 at a time
rec_str = "&resultRecordCount=2000"
# Update resultOffset by 2000 at a time
rec_n = 2000

# Get number of records
num_rec_url = "https://services3.arcgis.com/JGF6qCAQFbROcocK/arcgis/rest/services/Parcel_Data_2021_Redacted/FeatureServer/1/query?where=MUNICIPALITY%3D%27GLOUCESTER+CITY%27&returnCountOnly=true&f=json"
num_r = requests.get(num_rec_url).json()['count']

# Get iterations needed 
iterations = math.ceil(num_r / rec_n)

# Now, download 2,000 records at a time and store in list
# Loop through required iterations and keep appending claimicy 
# data from the GET request to the claim_df_list
for i in range(iterations):
    skip_str = "&resultOffset=" + str(i*rec_n)

    # GET Request
    par_get = requests.get(par_url + mun_str + skip_str + rec_str + close_str)

    # Temp data frame
    temp = par_get.json()['features']
    temp_df = pd.json_normalize(temp)
    temp_geo = [shape(i['geometry']) for i in temp]

    # Geodataframe with temp_df & temp_geo linked
    par_geo = gpd.GeoDataFrame(temp_df,
                               crs=EPSG,
                               geometry=temp_geo) 

    # Add to list
    par_df_list.append(par_geo)

# Concat
pars = pd.concat(par_df_list, axis=0)

# Get back to geodataframe
pars_geo = gpd.GeoDataFrame(pars,
                            crs=EPSG,
                            geometry=pars['geometry'])

# Drop type, id, geometry.type, geometry.coordinates
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
pars_geo = pars_geo.drop(columns=drop_col)

# Write data to file
pars_geo.to_file(join(fr, 'exposure', 'pc.gpkg'),
                 driver='GPKG')

ValueError: Invalid field type <class 'list'>

In [214]:
# Tracts
# tract Codes
url = "https://services3.arcgis.com/JGF6qCAQFbROcocK/arcgis/rest/services/CensusTracts/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"

# Data in epsg: 3424
EPSG = '3424'

# GET Request
tract_get = requests.get(url)

# No loop needed, just the one tracticipality
temp = tract_get.json()['features']
temp_df = pd.json_normalize(temp)
temp_geo = [shape(i['geometry']) for i in temp]

# Final df
tract_geo = gpd.GeoDataFrame(temp_df,
                           crs=EPSG,
                           geometry=temp_geo)

# Drop type, id, geometry.type, geometry.coordinates
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
tract_geo = tract_geo.drop(columns=drop_col)

# Write data to file
tract_geo.to_file(join(fr, 'ref', 'tracts.gpkg'),
                driver='GPKG')

In [213]:
# Zip Codes
url = "https://services3.arcgis.com/JGF6qCAQFbROcocK/arcgis/rest/services/Zip_Codes/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"

# Data in epsg: 3424
EPSG = '3424'

# GET Request
zip_get = requests.get(url)

# No loop needed, just the one zipicipality
temp = zip_get.json()['features']
temp_df = pd.json_normalize(temp)
temp_geo = [shape(i['geometry']) for i in temp]

# Final df
zip_geo = gpd.GeoDataFrame(temp_df,
                           crs=EPSG,
                           geometry=temp_geo)

# Drop type, id, geometry.type, geometry.coordinates
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
zip_geo = zip_geo.drop(columns=drop_col)

# Write data to file
zip_geo.to_file(join(fr, 'ref', 'zipcodes.gpkg'),
                driver='GPKG')

In [212]:
# Municipalities for Camden (useful for clipping other data to GC)
url = "https://services3.arcgis.com/JGF6qCAQFbROcocK/arcgis/rest/services/CamdenCountyMunicipalLayer/FeatureServer/0/query?f=geojson&where=(NAMELSAD%20IN%20(%27Gloucester%20City%20city%27))&outFields=*"

# Data is in epsg: 26918
EPSG = '26918'

# GET Request
mun_get = requests.get(url)

# No loop needed, just the one municipality
temp = mun_get.json()['features']
temp_df = pd.json_normalize(temp)
temp_geo = [shape(i['geometry']) for i in temp]

# Final df
mun_geo = gpd.GeoDataFrame(temp_df,
                           crs=EPSG,
                           geometry=temp_geo)

# Drop type, id, geometry.type, geometry.coordinates
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
mun_geo = mun_geo.drop(columns=drop_col)

# Reproject to 3424
OUT_EPSG = '3424'
mun_geo = mun_geo.to_crs(epsg=OUT_EPSG)

# Write data to file
mun_geo.to_file(join(fr, 'ref', 'city_clip.gpkg'),
                driver='GPKG')

In [208]:
# Land Uses
url = "https://services3.arcgis.com/JGF6qCAQFbROcocK/arcgis/rest/services/DVRPC_2010_Land_Use/FeatureServer/0/query?f=geojson&where=(Mun_Name%20IN%20(%27Gloucester%20City%27))&outFields=*"
# Data is in epsg: 3424
EPSG = '3424'

# GET Request
lu_get = requests.get(url)

# No loop needed because only ~100 records
# Temp data frame
temp = lu_get.json()['features']
temp_df = pd.json_normalize(temp)
temp_geo = [shape(i['geometry']) for i in temp]

# Final df
lu_geo = gpd.GeoDataFrame(temp_df,
                          crs=EPSG,
                          geometry=temp_geo)
   

# Drop type, id, geometry.type, geometry.coordinates
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
lu_geo = lu_geo.drop(columns=drop_col)

# Write data to file
lu_geo.to_file(join(fr, 'exposure', 'landuse.gpkg'),
               driver='GPKG')

# Download Social Vulnerability Data

In [5]:
# NOAA SOVI
url = 'https://coast.noaa.gov/htdata/SocioEconomic/SoVI2010/SoVI_2010_NJ.zip'

In [6]:
# NJ env. burd communities
url = "https://services1.arcgis.com/QWdNfRs7lkPq4g4Q/ArcGIS/rest/services/Overburdened_Communities_2020_Hosted/FeatureServer/0/query?where=NAME%3D%27Gloucester+City%27&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&relationParam=&returnGeodetic=false&outFields=*&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&defaultSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pgeojson&token="

In [7]:
# CEJST (download later)