In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
from pathlib import Path
import sys
import glob
from os.path import join
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio 
import rasterio.plot
import rasterio.mask
import json
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import ticker
from matplotlib.lines import Line2D
from matplotlib.patches import Patch, Rectangle
import matplotlib.colors as mpc
from matplotlib import colormaps, cm
from matplotlib.collections import PatchCollection
from pandas.plotting import parallel_coordinates

from util.files import *
from util.const import *
from util.ddfs import *
from util.figures import *

In [None]:
# FIPS will be passed in as an argument, one day...
FIPS = '34007'
# STATE ABBR and NATION will be derived from FIPS, one day...
STATEABBR = 'NJ'
NATION = 'US'

# I think it also could make sense to pass in scenario and
# ddf type as arguments. For main results
# we're using 'mid' and 'naccs' but for generating
# our sensitivity analysis results we will need to pass
# in the other scenarios and 'hazus'
scenarios = ['Mid']
ddf_types = ['naccs']

# Load Data

In [None]:
# Load geospatial data
clip_geo = gpd.read_file(join(REF_DIR_I, FIPS, "clip.gpkg"))
tract_geo = gpd.read_file(join(REF_DIR_I, FIPS, "tract.gpkg"))
bg_geo = gpd.read_file(join(REF_DIR_I, FIPS, "bg.gpkg"))
nsi_geo = gpd.read_file(join(EXP_DIR_I, FIPS, "nsi_sf.gpkg"))

# including vulnerability data
lmi = gpd.read_file(join(VULN_DIR_I, 'social', FIPS, 'lmi.gpkg'))
ovb = gpd.read_file(join(VULN_DIR_I, 'social', FIPS, 'ovb.gpkg'))
cejst = gpd.read_file(join(VULN_DIR_I, 'social', FIPS, 'cejst.gpkg'))
sovi = gpd.read_file(join(VULN_DIR_I, 'social', FIPS, 'sovi.gpkg'))
# and the indicators dataframe to subset for maps
c_ind = pd.read_parquet(join(VULN_DIR_I, 'social', FIPS, 'c_indicators.pqt'))

In [None]:
# For the figures, we need the following data

# The objective evaluations for each policy
obj_filep = join(FO, 'pol_obj_vals.pqt')
objs = pd.read_parquet(obj_filep)

# The houses that are elevated for each policy
elev_ids_filep = join(FO, 'pol_elev_ids.json')
with open(elev_ids_filep, 'r') as fp:
    elev_ids = json.load(fp)

# The ensemble w/ opt heightenings & associated values
# We will do some plots based on the expected value of some
# metrics across SOWs
# Load the ensemble data, along with the optimal
# elevation results
ens_agg_dfs = {}

for scen in scenarios:
    ens_filep = join(FO, 'ensemble_' + scen + '.pqt')
    ens_df = pd.read_parquet(ens_filep)
    print('Load data: ' + scen)
    
    for ddf_type in ddf_types:
        opt_elev_filename = 'ens_opt_elev_' + ddf_type + '_' + scen + '.pqt'
        opt_elev_df = pd.read_parquet(join(EXP_DIR_I, FIPS, opt_elev_filename))
        print('Load opt elev: ' + ddf_type)

        # Merge on fd_id and sow_ind to get eal_avoid, elev_cost, and opt_elev
        # into the ensemble
        ens_df_m = ens_df.merge(opt_elev_df,
                                on=['fd_id', 'sow_ind'],
                                suffixes=['','_opt'])

        # Get ens_agg for rel_eal, resid_rel_eal, & val_s metrics
        eal_col = ddf_type + '_eal'
        ens_df_m['rel_eal'] = ens_df_m[eal_col]/ens_df_m['val_s']
        ens_df_m['npv_opt'] = ens_df_m['pv_avoid'] - ens_df_m['pv_cost']
        ens_df_m['elev_ft'] = ens_df_m['opt_elev'].astype(int)
        ens_agg = ens_df_m.groupby('fd_id')[['rel_eal',
                                             'npv_opt',
                                             'pv_resid',
                                             'elev_invst',
                                             'resid_rel_eal',
                                             'elev_ft',
                                             'val_s']].mean().reset_index()

        # We need to merge ens_agg with nsi_geo 
        ens_agg_plot = nsi_geo.merge(ens_agg,
                                     how='inner',
                                     on='fd_id')

        ens_agg_dfs[scen + '_' + ddf_type] = ens_agg_plot

        print('Store gdf of aggregated values\n')

In [None]:
# Subset sovi and lmi based on their threshholds
sovi = sovi[sovi['sovi'] > .6]
lmi = lmi[lmi['Lowmod_pct'] > .5]
ovb = ovb[ovb['ovb_crit'] != 'Adjacent']

# Main Figures

## Figure 2

In [None]:
# Figure 1
comm_list = [sovi, cejst, ovb, lmi]

# Colormaps for eal_base 
# Trying 'sunset' from Paul Tol
# https://cran.r-project.org/web/packages/khroma/
# vignettes/tol.html#diverging-data
cmap_eal_l = ['#FEDA8B', '#FDB366',
              '#F67E4B', '#DD3D2D', '#A50026']
cmap_eal = mpc.LinearSegmentedColormap.from_list("", cmap_eal_l)


title_dict = {
    'lmi': 'FHA Low-Moderate Income',
    'sovi': 'CDC Socially Vulnerable' ,
    'cejst': 'Justice40 Community',
    'ovb': 'NJ Overburdened'
}

comm_names = ['sovi', 'cejst', 'ovb', 'lmi']

# Helps to zoom in on places where at-risk homes are
total_bounds = ens_agg_dfs['Mid_naccs'].geometry.total_bounds
buffer = .001
minx = total_bounds[0] - buffer
maxx = total_bounds[2] + buffer
miny = total_bounds[1] - buffer
maxy = total_bounds[3] + buffer
bounds = [minx, miny, maxx, maxy]

for scen in scenarios:
    for ddf_type in ddf_types:

        save_filename = join(FIG_DIR, 'Figure2',
                             scen + '_' + ddf_type + '.png')
        prepare_saving(save_filename)
        ens_plot = ens_agg_dfs[scen + '_' + ddf_type]
        
        plot_risk_disadv(ens_plot, comm_list, comm_names, title_dict,
                         tract_geo, save_filename, risk_cmap=cmap_eal,
                         bounds=bounds)

## Figure 3

In [None]:
# Figure 2
budget = 3e6

sort_cols = ['npv_opt',
             'sovi', 
             'avoid_rel_eal',
             'lmi',
             'rel_eal',
             'cejst']

title_dict = {
    'lmi': 'Criterion: Majority of Benefits in\nLow-Mod Income Block Groups',
    'npv_opt': 'Criterion: Highest to Lowest\nNet Benefit' ,
    'sovi': 'Criterion: Majority of Benefits in\nCDC Socially Vulnerable',
    'rel_eal': 'Criterion: Highest to Lowest\nInitial Risk Burden',
    'cejst': 'Criterion: Majority of Benefits in\nJustice40 Communities',
    'avoid_rel_eal': 'Criterion: Highest to Lowest\nReduction in Risk Burden'
}

for scen in scenarios:
    for ddf_type in ddf_types:
        scenario = scen + '_' + ddf_type
        save_filename = join(FIG_DIR, 'Figure3',
                             scen + '_' + ddf_type + '_budget_' +
                             str(budget)[0] + 'e6_.png')
        prepare_saving(save_filename)
        ens_plot = ens_agg_dfs[scenario]
        
        plot_alloc(ens_plot, elev_ids, scenario, budget, sort_cols,
                   title_dict, save_filename)

## Figure 4

In [None]:
# The costs of meeting objectives

color_dict = {
    'sovi': "#33BBEE",
    'lmi': "#0077BB",
    'cejst': "#009988",
    'avoid_rel_eal': "#EE3377",
    'rel_eal': "#CC3311",
    'npv_opt': "#EE7733",
}

obj_names = ['Net Benefit ($M)',
             'Residual Risk ($M)',
             'Inequity in Investment',
             'Inequity in Residual Risk']

avoid_eq_col = 'avoid_eq1'

obj_cols = ['npv_plot', 'pv_resid_plot', avoid_eq_col, 'resid_eq']

min_obj_cols = ['resid_eq', 'pv_resid_plot', avoid_eq_col]

# For plotting purposes, we're not comparing this rule
# for allocation since it hasn't been used this way yet
objs_plot = objs[objs['sort'] != 'ovb']

for scen in scenarios:
    for ddf_type in ddf_types:
        scenario = scen + '_' + ddf_type
        save_filename = join(FIG_DIR, 'Figure4',
                             scen + '_' + ddf_type + '.png')
        prepare_saving(save_filename)

        plot_objcst(objs_plot, obj_cols, obj_names,
                    scenario, color_dict, min_obj_cols,
                    avoid_eq_col,
                    save_filename)


# Main Summary Stats

## Elevation project budgets

In [None]:
# Want to get adjustments for previous elevation projects
# Load in fema assistance/grants
hma_proj_filep = join(FR, 'pol', 'fema_assistance',
                      'HazardMitigationAssistanceProjects.csv')

hma_proj = pd.read_csv(hma_proj_filep)

# Subset from hma_proj.projecType.unique() for
# elevation of private structures
# The entries are a bit messy and more sophisticated
# data preparation could get a better estimate of the budgets
# This is a good enough first pass for getting an idea
# of the distribution of budgets for these projects
proj_types = ['202.1: Elevation of Private Structures - Riverine',
              '202.2: Elevation of Private Structures - Coastal',
              ]
elev_proj = hma_proj[hma_proj['projectType'].isin(proj_types)]

# add a year column
# We'll use the dateApproved year for a best guess of 
# what year the cost estimates reflect
elev_proj['year'] = pd.to_datetime(elev_proj['dateApproved']).dt.year

# Today price = historic_price * (2022 index / historic_index) 
upper_infl_filep = join(ABS_DIR, 'resources',
                        'construction_deflator_new_sfh.xls')

# Read in the construction deflator for single family homes
upper_infl_filep = join(ABS_DIR, 'resources',
                        'construction_deflator_new_sfh.xls')
upper_infl = pd.read_excel(upper_infl_filep, header=4)

# We will groupby on year for the average Laspeyres (Fixed) index
# Then we will create the historic/2022 index values
upper_infl['year'] = upper_infl['Date'].dt.year
# Can also limit to 1993, the min year in the elev_proj dataframe
# We also are using 2022 dollars because we have the whole year of
# data for that
upper_infl_lim = upper_infl[(upper_infl['year'] >= elev_proj['year'].min()) &
                            (upper_infl['year'] <= 2022)]
upper_infl_gb = upper_infl_lim.groupby('year')['Laspeyres (Fixed)'].mean()
# Reset index, rename, and get our defaltor for each year
cost_deflate = upper_infl_gb.reset_index()
cost_deflate.columns = ['year', 'laspeyres']
base_index = cost_deflate.loc[cost_deflate['year'] == 2022,
                              'laspeyres'].iloc[0]
cost_deflate.loc[:,'deflator'] = base_index/cost_deflate.loc[:, 'laspeyres']

# Use the cost_deflate df to get a dict of year to deflator values
# Then deflate the project amount column in elev proj
cost_d_map = dict(zip(cost_deflate['year'], cost_deflate['deflator']))
elev_proj['defl'] = elev_proj['year'].map(cost_d_map)
elev_proj['cost2022'] = elev_proj['projectAmount']*elev_proj['defl']

pct75 = str((elev_proj['cost2022'].quantile(.75)/1e6).round(3))
pct80 = str((elev_proj['cost2022'].quantile(.8)/1e6).round(3))
pct90 = str((elev_proj['cost2022'].quantile(.9)/1e6).round(3))
pct95 = str((elev_proj['cost2022'].quantile(.95)/1e6).round(3))

print('75th%ile of HMA project costs: ' + pct75)
print('80th%ile of HMA project costs: ' + pct80)
print('90th%ile of HMA project costs: ' + pct90)
print('95th%ile of HMA project costs: ' + pct95)

## Cost effectiveness summaries

In [None]:
# The cost difference for same level of inequity in residual risk
# with risk burden vs. other criteria
# First, level w/ risk burden sorting at lowest cost
low_resid_eq = objs[(objs['sort'].isin(['rel_eal', 'avoid_rel_eal'])) &
                    (objs['budget'] == 1e6)]['resid_eq'].iloc[0]
low_cost_resid_eq = objs[(objs['sort'].isin(['rel_eal', 'avoid_rel_eal'])) &
                         (objs['budget'] == 1e6)]['up_cost'].iloc[0]

# Commensurate cost of other policies at this level
# Can use npv_opt sorting as the reference since
# they all have the same costs & objective values at this point
same_resid_eq_min_cost = objs[(objs['sort'].isin(['npv_opt'])) &
                              (objs['resid_eq'] <= low_resid_eq)]['up_cost'].min()

print('Risk burden sorting')
print('Objective value at $1M budget: ' + str(np.round(low_resid_eq, 3)))
print('Cost at $1M budget: ' + str(np.round(low_cost_resid_eq, 3)))
print('\nOther rules')
print('Lowest cost policy with same objective value: ' +
      str(np.round(same_resid_eq_min_cost, 3)))
print('Ratio of costs: ' + str(np.round(same_resid_eq_min_cost/low_cost_resid_eq)))