In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import os
import json
from pathlib import Path
from os.path import join
os.environ["USE_PYGEOS"] = "0"
import geopandas as gpd
import pandas as pd
import numpy as np

from util.files import *
from util.const import *
from util.ddfs import *

In [6]:
# FIPS will be passed in as an argument, one day...
FIPS = '34007'
# STATE ABBR and NATION will be derived from FIPS, one day...
STATEABBR = 'NJ'
NATION = 'US'

# I think it also could make sense to pass in scenario and
# ddf type as arguments. For main results
# we're using 'mid' and 'naccs' but for generating
# our sensitivity analysis results we will need to pass
# in the other scenarios and 'hazus'
# Very well suited for snakemake :) 
SCENARIO = 'Mid'
DDF_TYPE = 'naccs'

In [7]:
# This notebook evaluates optimal elevation 
# per house across an ensemble
# Optimal elevation height for each house is returned based on
# the ensemble generated in benchmark_ensemble
# This is estimated w/o uncertainty (the benchmark estimates)
# as well

In [8]:
# The gist of this is to evaluate costs & benefits for each increment
# of elevation from 3 to 10 feet (I think it's up to 10 feet - will
# double check)
# There are values for costs from the FEMA report 
# that go in the config file
# This gets multipled by sq ft of the building
# There is uncertainty in the inflation costs (relative to 2009 when
# the report was published) and the fixed costs (CLARA and
# NACCS reports provide us with a range)
# All of these values are in the config.yaml, and 
# the data for these estimates is in the resources/
# directory of the project repository
# For benefits, we'll do it "through" the DDF by moving along the
# x-axis of the DDF. We do this by changing the FFE, which we are
# treating under uncertainty. We are not treating the elevation
# as an uncertain amount for this case study. We need
# to estimate the losses with the new first-floor elevation adjusted
# height under uncertainty

# Load and Prepare Data

In [19]:
# Load the mid scenario data
# There are only a few columns we need
ens_filep = join(FO, 'ensemble_' + SCENARIO + '.pqt')
ens_df = pd.read_parquet(ens_filep)

# Also load the benchmark data
# There are only a few columns we need
bench_df = pd.read_parquet(join(FO, 'benchmark_loss.pqt'))

# Everything that we do here is based on the ensemble values
# That means we take the ffe variable in our ensemble df
# and adjust it by the heightening amount, re-estimate losses
# across all return periods, and re-estimate eal
# In fact, since depths are "fixed" in our case study
# we don't have to adjust the ffe variable, and can instead
# adjust the depth_ffe_* columns

# These are shared columns for subsetting
# We need found_type because it is used in
# elevation cost estimation
# We need sqft because it's a key variable for
# elevation cost estimation
# We need bldgtype for elevation cost estimation, too
sub_cols = ['fd_id', 'found_type', 'sqft', 'bldgtype']
# We need to add depth_ffe_* columns 
depth_ffe_cols = ['depth_ffe_' + x for x in RET_PERS]
sub_cols = sub_cols + depth_ffe_cols

# These are columns for the benchmark df
# We need fz_ddf for DDF estimation
bench_cols = ['fz_ddf'] + sub_cols

# We need to use DDF_TYPE argument to add either bld_types (naccs)
# or hazus_types to our ens_cols list
ddf_col = 'bld_types' if DDF_TYPE == 'naccs' else 'hazus_types'
# These are general columns for the ensemble df
# We need to keep track of SOW, and val_s is needed for
# estimating avoided losses in a particular SOW
ens_cols = ['val_s', 'sow_ind', ddf_col] + sub_cols

# Subset ens_df and bench_df on their column lists
ens_df = ens_df.loc[ens_cols]
bench_df = bench_df.loc[bench_cols]

In [None]:
# We'll need DDFs for estimating benefits
# Load DDFs
naccs_ddfs = pd.read_parquet(join(VULN_DIR_I, 'physical', 'naccs_ddfs.pqt'))
hazus_ddfs = pd.read_parquet(join(VULN_DIR_I, 'physical', 'hazus_ddfs.pqt'))
hazus_nounc = pd.read_parquet(join(VULN_DIR_I, 'physical', 'hazus_ddfs_nounc.pqt'))

# Load helper dictionaries
with open(join(VULN_DIR_I, 'physical', 'hazus.json'), 'r') as fp:
    HAZUS_MAX_DICT = json.load(fp)

with open(join(VULN_DIR_I, 'physical', 'hazus_nounc.json'), 'r') as fp:
    HAZUS_MAX_NOUNC_DICT = json.load(fp)

with open(join(VULN_DIR_I, 'physical', 'naccs.json'), 'r') as fp:
    NACCS_MAX_DICT = json.load(fp)

# Use DDF_TYPE argument for storing the correct ddfs
benefit_ddfs = naccs_ddfs if DDF_TYPE == 'naccs' else hazus_ddfs
MAX_DICT = NACCS_MAX_DICT if DDF_TYPE == 'naccs' else HAZUS_MAX_DICT

In [None]:
# From the config file take the inflation values, heightening values, 
# and fixed cost values. For heightening, we need to linearly interpolate
# for our structure specific heightening cost estimates. For the others, 
# we need to generate N_SOW length realizations. We can pre-populate
# a cost dataframe with this information and for each 
# foundation type, heightening combo, we will have
# the SOW specific cost estimate to apply to the structure. 
# The costs are applied against the expected annual losses
# to figure out the optimal heightening. We do not need to include
# discount rates at this step since these are uniformly applied
# in our case study since all the elevations are assumed to occur
# at the same time. Time-based elevations that account for changing
# cost estimates and discount rates is an extension of this work. 

# Optimal Elevation Under Uncertainty

In [None]:
# To do this, we loop through each of the possible heightenings
# This effectively creates a new first floor elevation for each home
# and then we repeat the loss estimation from benchmark_ensemble
# for each structure
# In addition to this, we estimate the costs for each heightening
# This is done on a state of the world basis since it's informed
# by macroeconomic conditions. What this means is we generate
# a N_SOW length vector of the inflation value & the fixed cost
# value that is used for the elevation cost. The per sq. ft. costs
# are not changing across SOWs
# It seems like because DDFs are monotonic increasing with depth
# and costs are monotonic increasing with heightening (due to
# linear interpolation), you could use an approach like binary
# sort on the heightening possibilities to save some search time
# but it won't take that long to loop through the 8 possible
# heightenings we're considering. Because the cost data is less than
# ideal, I don't want to introduce an approach like binary sort that
# works only on the cost data we have (in reality, cost estimates
# may not be well suited to this approach)