# Configure

In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
# Packages
import requests
import os
from os.path import join
from pathlib import Path
import pandas as pd

from util.download import *
from util.files import *
from util.const import *
from util.unzip import *
from util.exp import *
from util.ddfs import *

In [3]:
# Name the fips, statefips, stateabbr, and nation that
# we are using for this analysis
# We pass these in as a list even though the framework currently
# processes a single county so that it can facilitate that
# expansion in the future
# TODO - could make sense to define these in the future
# in json or other formats instead of as input in code
fips_args = {
    'FIPS': ['42101'], 
    'STATEFIPS': ['42'],
    'STATEABBR': ['PA'],
    'NATION': ['US']
}
FIPS = fips_args['FIPS'][0]
NATION = fips_args['NATION'][0]

# Download (and unzip) data

In [11]:
# The util.const library provides us with
# convenient functions for quickly downloading data from the sources
# we specified in the config.yaml

# URL_WILDCARDS has entries like {FIPS} which we want to replace
# with the county code that is in a URL for downloading. 
# We create a dictionary of these mappings from our fips_args
# dictionary. This is what we need to use the download_raw()
# function 

wcard_dict = {x: fips_args[x[1:-1]][0] for x in URL_WILDCARDS}
download_raw(DOWNLOAD, wcard_dict)

Downloaded from: https://nsi.sec.usace.army.mil/nsiapi/structures?fips=42101
Downloaded from: https://www2.census.gov/geo/tiger/TIGER2022/TRACT/tl_2022_42_tract.zip
Downloaded from: https://www2.census.gov/geo/tiger/TIGER2022/BG/tl_2022_42_bg.zip
Downloaded from: https://www2.census.gov/geo/tiger/TIGER2022/TABBLOCK20/tl_2022_42_tabblock20.zip
Downloaded from: https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/1.0-communities.csv
Downloaded from: https://svi.cdc.gov/Documents/Data/2020/csv/states/SVI_2020_US.csv
Downloaded from: https://www2.census.gov/geo/tiger/TIGER2022/COUNTY/tl_2022_us_county.zip
Downloaded from: https://www2.census.gov/geo/tiger/TIGER2022/ZCTA520/tl_2022_us_zcta520.zip


In [14]:
# We call unzip_raw from util.unzip
unzip_raw()

Unzipped: noaa
Unzipped: 10027236
Unzipped: county
Unzipped: zcta
Unzipped: block
Unzipped: bg
Unzipped: tract
Unzipped: dg
Unzipped: nfhl


# Prepare data for ensemble

## Exposure

In [4]:
# For this case study, we want single family houses from the
# national structure inventory. We will call functions from exp.py
# that takes the raw nsi data and converts it to a gdf
# Then we will grab our properties of interest using the RES1
# code for the 'occtype' variable. In addition, this case study
# will look at properties <= 2 stories because these are
# the properties we can represent structural uncertainty in
# depth-damage relationships for

nsi_gdf = get_nsi_geo(FIPS)

# Set the values that we pass into the get_struct_subset
# function. In this case, occtype==RES1 and num_story <= 2
occtype_list=['RES1-1SNB', 'RES1-2SNB', 'RES1-1SWB', 'RES1-2SWB']
sub_string = 'occtype.isin(@occtype_list) and num_story <= 2'
nsi_sub = get_struct_subset(nsi_gdf,
                            filter=sub_string,
                            occtype_list=occtype_list)

# For this case study, let us save some memory and just
# write out the single family houses 
EXP_OUT_FILEP = join(EXP_DIR_I, FIPS, 'nsi_sf.gpkg')
prepare_saving(EXP_OUT_FILEP)
nsi_sub.to_file(EXP_OUT_FILEP, driver='GPKG')

## Reference

In [21]:
# We are going to clip reference data to a clip file that
# represents our study boundaries. In this case, it's the county
# of Philadelphia, so we will prepare that as our clip file
county_filep = join(REF_DIR_UZ, NATION, 'county', 'tl_2022_us_county.shp')
county_gdf = gpd.read_file(county_filep)
clip_gdf = county_gdf[county_gdf[REF_ID_NAMES_DICT['county']] == FIPS]

# clip_ref_files will go through all unzipped ref files,
# clip them in the clip file geometry, and write them
clip_ref_files(clip_gdf, FIPS)

Saved Ref: block
Saved Ref: bg
Saved Ref: tract
Saved Ref: county
Saved Ref: zcta


## Physical vulnerability

In [29]:
# For NACCS DDFs, we are just going to call process_naccs
# For HAZUS DDFs, we are going to call process_hazus but also
# specify how to define the uncertainty around these point estimate
# DDFs
# In general, the functions could be expanded to allow the user to
# specify which building types to consider, but right now
# that is baked-in to the implementation in unsafe 
# Both of these functions will write out all the data you need
# for estimating losses later on
# We break it out into two scripts because not all analyses
# will want to represent deep uncertainty in DDFs and will
# only call one of the process functions

process_naccs()

# .3 was used in Zarekarizi et al. 2020
# https://www.nature.com/articles/s41467-020-19188-9
# and we are going to use that for this case study
UNIF_UNC = .3
process_hazus(unif_unc=UNIF_UNC)

NACCS DDFs Processed
HAZUS DDFs processed


## Social vulnerability

In [35]:
# Process national social vulnerability data
# Tell the function which datasets we want processed
# In this case study, we will use cejst, svi, and lmi
# LMI was downloaded manually (not from the download code above)
# because of the way it is hosted at its url

sovi_list = ['cejst', 'svi', 'lmi']
process_national_sovi(sovi_list, FIPS)



Processed cejst
Processed CDC SVI
Processed low-mod income


## Policy

In [32]:
# We need NFHL for the ensemble and visualizations
process_nfhl(FIPS)

Wrote NFHL for county


## Link flood zones and references to structures

In [6]:
# Link flood zones
# I checked for issues like overlapping flood zones
# resulting in NSI structures in multiple polygons
# and did not find any. That's good, but chances
# are there will be counties where this happens
# and we will need code to handle these consistently for other
# case studies
nfhl_filep = join(POL_DIR_I, FIPS, 'fld_zones.gpkg')
nfhl = gpd.read_file(nfhl_filep)
keep_cols = ['fld_zone', 'fld_ar_id', 'static_bfe']
get_spatial_var(nsi_sub,
                nfhl,
                'fz',
                FIPS,
                keep_cols)

# Link references
# This will do spatial joins for structures within
# all the reference spatial files (besides county)
# and output a file of fd_id (these are unique strucutre ids)
# linked to all of the reference ids
get_ref_ids(nsi_sub, FIPS)

Wrote out: fz
Linked reference to NSI: tract_id
Linked reference to NSI: block_id
Linked reference to NSI: bg_id
Linked reference to NSI: zcta_id


## Hazard

In [14]:
# Sample the inundation grids and write out the
# fd_id/depths dataframe
depth_df = get_inundations(nsi_sub, FIPS)

Store NSI coordinates in list
Read in 0_2 depth grid
Sampled depths from grid
Added depths to list

Read in 01 depth grid
Sampled depths from grid
Added depths to list

Read in 02 depth grid
Sampled depths from grid
Added depths to list

Read in 10 depth grid
Sampled depths from grid
Added depths to list

Wrote depth dataframe


# Generate ensemble

In [None]:
# Get a dataframe conducive for loss estimation
# This procedure is separate fro mpreparing data for the ensemble
# so will just take the county code to load in and merge
# all the relevant data
full_df = get_base_df(FIPS)

# Generate SOWs based on this dataframe. The function gives
# users the option to specify what to treat as uncertain. It could
# be improved to give the user more customization on the "how" part
# We pass in tract_id to specify in this case study that
# we will draw from basement and stories distributions defined
# at the tract level
# We specify hazus & naccs for the ddfs we want losses estimated
# under
# We specify val, stories, basement, and ffe as the features
# we want to represent with uncertainty
generate_ensemble(full_df,
                  'tract_id',
                  ['hazus', 'naccs'],
                  ['val', 'stories', 'basement', 'ffe'])

# Estimate losses

In [None]:
# We estimate losses for the full ensemble. For now, when deep 
# uncertainty is specified in the DDF (i.e. you 
# want to get damages with HAZUS and NACCS) they are estimated on
# the same SOWs and that's returned. No synthesis of 
# deep unceratinties in UNSAFE yet. 

# We also want benchmark estimates without uncertainty 
# which we can do with the full_df specified above