In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Packages
import requests
import os
import json
from os.path import join
from pathlib import Path
import yaml
from yaml.loader import SafeLoader
from shapely.geometry import shape
import pandas as pd
import geopandas as gpd
import numpy as np

import unsafe.download as undown
import unsafe.files as unfile
import unsafe.unzip as ununzip
import unsafe.exp as unexp
import unsafe.ddfs as unddf
import unsafe.ensemble as unens

# Configure

In [None]:
# Specify FIPS, etc., 
fips_args = {
    'FIPS': ['34007'], 
    'STATEFIPS': ['34'],
    'STATEABBR': ['NJ'],
    'NATION': ['US']
}
FIPS = fips_args['FIPS'][0]
NATION = fips_args['NATION'][0]
STATEABBR = fips_args['STATEABBR'][0]
STATEFIPS = fips_args['STATEFIPS'][0]

In [None]:
# Read in the config file and set up key parameters
ABS_DIR = Path().absolute().parents[1]

CONFIG_FILEP = join(ABS_DIR, 'config', 'config.yaml')
# Open the config file and load
with open(CONFIG_FILEP) as f:
    CONFIG = yaml.load(f, Loader=SafeLoader)

# Wildcards for urls
URL_WILDCARDS = CONFIG['url_wildcards']

# Get the file extensions for api endpoints
API_EXT = CONFIG['api_ext']

# Get the CRS constants
NSI_CRS = CONFIG['nsi_crs']

# Dictionary of ref_names
REF_NAMES_DICT = CONFIG['ref_names']

# Dictionary of ref_id_names
REF_ID_NAMES_DICT = CONFIG['ref_id_names']

# Coefficient of variation
# for structure values
COEF_VARIATION = CONFIG['coef_var']

# First floor elevation dictionary
FFE_DICT = CONFIG['ffe_dict']

# Use gloucester city for clipping
# store its CRS
CLIP_CRS = CONFIG['clip_crs']

# Number of states of the world
N_SOW = CONFIG['sows']

# Get hazard model variables
# We have a hazard subdirectory of the scenario-based runs
HAZ_DIR_SUB = CONFIG['haz_dir_sub']
# Get Return Period list
RET_PERS = CONFIG['RPs']
HAZ_FILEN = CONFIG['haz_filename']
# Get CRS for depth grids
HAZ_CRS = CONFIG['haz_crs']

# Hazard scenarios
SCENARIOS = CONFIG['scenarios']

# Get the files we need downloaded
DOWNLOAD = pd.json_normalize(CONFIG['download'], sep='_').T

In [None]:
# Quick references to directories
FR = join(ABS_DIR, "data", "raw")

# And external - where our hazard data should be
FE = join(FR, "external")

# Set up interim and results directories as well
# We already use "FR" for raw, we use "FO" 
# because you can also think of results
# as output
FI = join(ABS_DIR, "data", "interim")
FO = join(ABS_DIR, "data", "results")

# "Raw" data directories for exposure, vulnerability (vuln) and
# administrative reference files
EXP_DIR_R = join(FR, "exp")
VULN_DIR_R = join(FR, "vuln")
REF_DIR_R = join(FR, "ref")
# Haz is for depth grids
HAZ_DIR_R = join(FE, "haz")
# Pol is for NFHL
POL_DIR_R = join(FR, "pol")

# Unzip directory 
UNZIP_DIR = join(FR, "unzipped")

# We want to process unzipped data and move it
# to the interim directory where we keep
# processed data
# Get the filepaths for unzipped data
# We unzipped the depth grids (haz) and 
# ddfs (vuln) into the "external"/ subdirectory
HAZ_DIR_UZ = join(UNZIP_DIR, "external", "haz")
POL_DIR_UZ = join(UNZIP_DIR, "pol")
REF_DIR_UZ = join(UNZIP_DIR, "ref")
VULN_DIR_UZ = join(UNZIP_DIR, "vuln")
DDF_DIR_UZ = join(UNZIP_DIR, "external", "vuln")

# "Interim" data directories
EXP_DIR_I = join(FI, "exp")
VULN_DIR_I = join(FI, "vuln")
REF_DIR_I = join(FI, "ref")
# Haz is for depth grids
HAZ_DIR_I = join(FI, "haz")
# Pol is for NFHL
POL_DIR_I = join(FI, "pol")

# Download and unzip data

In [None]:
wcard_dict = {x: fips_args[x[1:-1]][0] for x in URL_WILDCARDS}
undown.download_raw(DOWNLOAD, wcard_dict,
                    FR, API_EXT)

In [None]:
ununzip.unzip_raw(FR, UNZIP_DIR)

# Prepare data for ensemble

## Clip file

In [None]:
# Read in the data we downloaded from the county's REST API server
clip_filep = join(REF_DIR_R, FIPS, 'clip.json')
with open(clip_filep) as f:
    clip_data = json.load(f)

# Use pandas to get the data in a form that is easier
# to turn into a geodataframe for clipping
clip_df = pd.json_normalize(clip_data['features'])
# We want to make a polygon out of the geometry coordinates
# We can access that from the original json object
clip_geo = [shape(i['geometry']) for i in clip_data['features']]
# We can create a geodataframe of clip_df by adding clip_geo
# as its geometry column
clip_gdf = gpd.GeoDataFrame(clip_df,
                            crs=CLIP_CRS,
                            geometry=clip_geo)

# We can clean up the gdf by removing the
# type, id, geometry.type and geometry.coordinates columns
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
clip_gdf = clip_gdf.drop(columns=drop_col)

# Write the file out to interim
clip_out_filep = join(FI, 'ref', FIPS, 'clip.gpkg')
unfile.prepare_saving(clip_out_filep)
clip_gdf.to_file(clip_out_filep,
                 driver='GPKG')

## Exposure

In [None]:
nsi_gdf = unexp.get_nsi_geo(FIPS, NSI_CRS, EXP_DIR_R)

# Set the values that we pass into the get_struct_subset
# function. In this case, occtype==RES1 and num_story <= 2
occtype_list=['RES1-1SNB', 'RES1-2SNB', 'RES1-1SWB', 'RES1-2SWB']
sub_string = 'occtype.isin(@occtype_list) and num_story <= 2'
nsi_filt = unexp.get_struct_subset(nsi_gdf,
                                   filter=sub_string,
                                   occtype_list=occtype_list)

# For this case study, let us save some memory and just
# write out the single family houses 
EXP_OUT_FILEP = join(EXP_DIR_I, FIPS, 'nsi_sf.gpkg')
unfile.prepare_saving(EXP_OUT_FILEP)

# Clip to our clip boundary
# They are in the same CRS
nsi_clip_out = gpd.clip(nsi_filt, clip_gdf)

# Limit to sqft <= 99th percentile
# Arbitrary cutoff. The max value from the steps above
# is 400858 which is way too large
# There are other large values that are dropped with this
# arbitrary cutoff
# For GC case study, this value is 2696.41999
sqft_clip = nsi_clip_out['sqft'].quantile(.99)
nsi_clip_out[nsi_clip_out['sqft'] <= sqft_clip].to_file(EXP_OUT_FILEP,
                                                        driver='GPKG')

## Reference

In [None]:
# We are going to clip reference data to 
# the GC clip file from earlier
unexp.clip_ref_files(clip_gdf, FIPS,
                     REF_DIR_UZ, REF_DIR_I, REF_NAMES_DICT)



## Physical vulnerability

In [None]:
unddf.process_naccs(DDF_DIR_UZ, VULN_DIR_I)
# Belownot used in this case study, but
# because of a pending issue with UNSAFE, we need to process these
unddf.process_hazus(DDF_DIR_UZ, VULN_DIR_I)


## Social vulnerability

### Prepare the social vulnerability data

In [None]:
# Process national social vulnerability data
sovi_list = ['cejst', 'svi']
unexp.process_national_sovi(sovi_list, FIPS,
                            VULN_DIR_R, REF_DIR_I, VULN_DIR_I)

In [None]:
# Process lmi & nj ovb at block group
# (not yet in unsafe)
bg_filep = join(REF_DIR_I, FIPS, 'bg.gpkg')
bg_geo = gpd.read_file(bg_filep)

# Process lmi
# Read data
lmi_filename = 'ACS_2015_lowmod_blockgroup_all.xlsx'
lmi_filep = join(VULN_DIR_R, 'social', NATION, lmi_filename)
lmi = pd.read_excel(lmi_filep, engine='openpyxl')
# Get GEOID for merge (last 12 characters is the bg id)
lmi['GEOID'] = lmi['GEOID'].str[-12:]

# Retain GEOID and Lowmod_pct
keep_cols = ['GEOID', 'Lowmod_pct']
lmi_f = bg_geo[['GEOID', 'geometry']].merge(lmi[keep_cols],
                                            on='GEOID',
                                            how='inner')

# Write file
lmi_out_filep = join(VULN_DIR_I, 'social', FIPS, 'lmi.gpkg')
lmi_f.to_file(lmi_out_filep, driver='GPKG')


# Process NJ overburdened
# Read data
ovb_filep = join(VULN_DIR_UZ, 'social', STATEABBR,
                 'Govt_census_group_2022_EJ.gdb')
ovb = gpd.read_file(ovb_filep)

# Rename some columns
ovb = ovb.rename(columns={'OVERBURDENED_COMMUNITY_CRITERI': 'ovb_crit'})

# Keep a subset of columns
ovb_f = ovb[['GEOID', 'ovb_crit', 'geometry']]

# The data already is limited to overburdened categories

# Subset to our study area
ovb_reproj = ovb_f.to_crs(clip_gdf.crs)
ovb_clipped = gpd.clip(ovb_reproj, clip_gdf)

# Write file
ovb_out_filep = join(VULN_DIR_I, 'social', FIPS, 'ovb.gpkg')
ovb_clipped.to_file(ovb_out_filep, driver='GPKG')

### Link the social vulnerability data to structures

In [None]:
# Loop through the community boundary data
# Get links to the single family home data
# Store in single dataframe
# Write out

sovi_dir = join(VULN_DIR_I, 'social', FIPS)
filenames = ['lmi', 'sovi', 'ovb', 'cejst']

sovi_df_list = []
# Let's add a list of just fd_id
# This makes sure every property is
# linked to the social vulnerability categories
sovi_df_list.append(nsi_clip_out[['fd_id']].set_index('fd_id'))

for fn in filenames:
    # Read in each gpkg
    fp = join(sovi_dir, fn + '.gpkg')
    sovi_geo = gpd.read_file(fp)

    # Subset sovi_geo based on thresholds
    # For cejst and ovb this is already done
    # For lmi and ovb need to do the filter as follows
    if fn == 'lmi':
        # See https://www.hudoig.gov/reports-publications/
        # report/cdbg-dr-program-generally-
        # met-low-and-moderate-income-requirements
        # The statutory threshold is 50%, so retain those
        sovi_sub = sovi_geo[sovi_geo['Lowmod_pct'] > .5]
    elif fn == 'sovi':
        # Subset to threshhold for FMA (from 2022 NOFO)
        sovi_sub = sovi_geo[sovi_geo['sovi'] > .6]
    elif fn == 'ovb':
        sovi_sub = sovi_geo[sovi_geo['ovb_crit'] != 'Adjacent']
    else:
        sovi_sub = sovi_geo

    # Only need the geometry for sovi_sub
    sovi_sub = sovi_sub[['geometry']]
    
    # Limit the NSI to our fd_id and geometry column
    keep_col_nsi = ['fd_id', 'geometry']
    nsi_sub = nsi_clip_out[keep_col_nsi]

    # Reproj nsi_sub to the reference crs
    nsi_reproj = nsi_sub.to_crs(sovi_geo.crs)

    # Do a spatial join
    nsi_sovi = gpd.sjoin(nsi_reproj, sovi_sub, predicate='within')

    # Add indicator column
    nsi_sovi[fn] = True

    # Append this to our sovi_df_list
    sovi_df_list.append(nsi_sovi[['fd_id', fn]].set_index('fd_id'))

    # Helpful message
    print('Linked vulnerability to NSI: ' + fn)

sovi_df_f = pd.concat(sovi_df_list, axis=1).fillna(False)
sovi_out_filepath = join(sovi_dir, 'c_indicators.pqt')
sovi_df_f.to_parquet(sovi_out_filepath)

## Policy

In [None]:
# We need NFHL for the ensemble and visualizations
unexp.process_nfhl(FIPS,
                   POL_DIR_UZ,
                   POL_DIR_I)

## Link food zones and references to structures

In [None]:
# Link flood zones
nfhl_filep = join(POL_DIR_I, FIPS, 'fld_zones.gpkg')
nfhl = gpd.read_file(nfhl_filep)
keep_cols = ['fld_zone', 'fld_ar_id', 'static_bfe']
unexp.get_spatial_var(nsi_clip_out,
                      nfhl,
                      'fz',
                      FIPS,
                      EXP_DIR_I,
                      keep_cols)

# Link references
unexp.get_ref_ids(nsi_clip_out, FIPS,
                  REF_ID_NAMES_DICT, REF_DIR_I, EXP_DIR_I)

## Hazard

In [None]:
# The files unzipped into haz_dir_uz_{scen}/haz_dir_sub
# We want to organize it a bit better as haz_dir_i/haz_filen
# For each depth grid
# move haz_dir_uz_scen/haz_dir_sub/haz_filen (without the _scen)
# Use Path(from_file).rename(to_file) to organize
# the depth grids better

# Loop through haz_dir_uz_{scen} files
for scen in SCENARIOS:
    haz_from_dir = join(HAZ_DIR_UZ,
                        HAZ_DIR_SUB + "_" + scen,
                        HAZ_DIR_SUB)

    pathlist = Path(haz_from_dir).glob('**/*.asc')
    for path in pathlist:
        haz_from_file = str(path)   
        haz_to_file_sub = haz_from_file + "_" + scen
        ret_per = haz_from_file.split(".")[0].split("_")[-1]
        haz_to_file = join(HAZ_DIR_I, HAZ_DIR_SUB,
                        ret_per + "_" + scen + ".asc")

        unfile.prepare_saving(haz_to_file)
        Path(haz_from_file).rename(haz_to_file)

    print("Moved and renamed " + scen + " depth grids")


In [None]:
# Sample the inundation grids and write out the
# fd_id/depths dataframe
depth_df = unexp.get_inundations(nsi_clip_out,
                                 HAZ_CRS, RET_PERS,
                                 join(HAZ_DIR_I, HAZ_DIR_SUB),
                                 HAZ_FILEN,
                                 scens=SCENARIOS)

In [None]:
# Write out dataframe that links fd_id to depths
# with columns corresponding to ret_per (i.e. 500, 100, 50, 10)
# in our case study
nsi_depths_out = join(EXP_DIR_I, FIPS, 'nsi_depths.pqt')
unfile.prepare_saving(nsi_depths_out)
depth_df.reset_index().to_parquet(nsi_depths_out)

# Generate ensemble

In [None]:
# Get a dataframe conducive for loss estimation
# This procedure is separate fr mmpreparing data for the ensemble
# so will just take the county code to load in and merge
# all the relevant data
base_df = unens.get_base_df(FIPS, EXP_DIR_I)

# Get the ensemble
# To make this run faster, we should subset
# base_df on properties at risk in each scenario
for scen in SCENARIOS:
    print('Scenario: ' + scen)
    # Subset to depths greater than 0
    base_df_temp = base_df[base_df['depth_' + scen + '_500'] > 0]
    # Want to remove the depth columns for other scenarios
    d_cols_all = set(['depth_' + s  + '_' + rp for rp in RET_PERS for s in SCENARIOS])
    d_cols_scen = set(['depth_' + scen  + '_' + rp for rp in RET_PERS])
    d_cols_drop = list(d_cols_all - d_cols_scen)
    base_df_temp = base_df_temp.drop(columns=d_cols_drop)

    # If depths in other return periods are 0, make them nan
    base_df_temp.loc[:,list(d_cols_scen)] = base_df_temp.loc[:,list(d_cols_scen)].replace({0:np.nan})

    # Want to remove scen reference from columns
    base_df_temp.columns = [x.replace('_' + scen, '') for x in base_df_temp.columns]

    ens_df_losses  = unens.generate_ensemble(nsi_clip_out,
                                             base_df_temp,
                                             ['naccs'],
                                             ['ffe', 'val_struct'],
                                             N_SOW,
                                             FFE_DICT,
                                             COEF_VARIATION,
                                             VULN_DIR_I)
    
    col_sub = [x for x in ens_df_losses.columns if 'loss' in x]
    loss_sub = ens_df_losses[col_sub]
    # Just get return period reference
    ret_per_ints = [int(x.split('_')[-1]) for x in loss_sub.columns]
    loss_sub.columns = ['loss_' + str(x) for x in ret_per_ints]
    rp_list = sorted(ret_per_ints)
    eals = unddf.get_eal(loss_sub, rp_list)

    ens_df_out = pd.concat([ens_df_losses, pd.Series(eals, name='eal')],
                           axis=1)
    
    out_file = 'ensemble_' + scen + '.pqt'
    ens_out_filep = join(FO, out_file)
    unfile.prepare_saving(ens_out_filep)
    ens_df_out.to_parquet(join(FO, out_file))

    print('Wrote losses & eal for ' + scen)