# Configure

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from zipfile import ZipFile
import zipfile_deflate64
import os
from pathlib import Path
import sys
import glob
import shutil
from os.path import join

os.environ["USE_PYGEOS"] = "0"
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import rasterio.mask
from pyproj import CRS
import matplotlib.pyplot as plt

In [3]:
# Filepath directories

# Get the absolute path to the project directory
# Which is one directory above notebooks/
ABS_DIR = os.path.abspath(Path(os.getcwd()).parents[0])
# Get raw data directory
FR = join(ABS_DIR, 'data', 'raw')
# Get interim data directory
FI = join(ABS_DIR, 'data', 'interim')

# Directories for raw exposure, vulnerability (vuln) and 
# administrative reference files
#  all exist so just need references
EXP_DIR_R = join(FR, 'exposure')
VULN_DIR_R = join(FR, 'vuln')
REF_DIR_R = join(FR, 'ref')
# Haz is for FEMA NFHL and depth grids
HAZ_DIR_R = join(FR, 'haz')

# Directories for interim exposure, vulnerability (vuln) and 
# hazard
EXP_DIR_I = join(FI, 'exposure')
VULN_DIR_I = join(FI, 'vuln')
HAZ_DIR_I = join(FI, 'haz')
REF_DIR_I = join(FI, 'ref')

# Ensure they exist
Path(EXP_DIR_I).mkdir(parents=True, exist_ok=True)
Path(VULN_DIR_I).mkdir(parents=True, exist_ok=True)
Path(HAZ_DIR_I).mkdir(parents=True, exist_ok=True)
Path(REF_DIR_I).mkdir(parents=True, exist_ok=True)

# Reference fips
FIPS = '42101'

# Unzip Files

In [44]:
# For each .zip directory in fr
# Create needed subdirectories in interim/
# Unzip in the appropriate interim/ subdirectory

for path in Path(FR).rglob("*.zip"):
    # Avoid hidden files and files in directories
    if path.name[0] != ".":
        # Get root for the directory this .zip file is in
        zip_root = path.relative_to(FR).parents[0]

        # Get path to interim/zip_root
        zip_to_path = join(FI, zip_root)

        # Make directory, including parents
        # No need to check if directory exists bc
        # it is only created when this script is run
        Path(zip_to_path).mkdir(parents=True, exist_ok=True)

        # Unzip to zip_to_path
        with ZipFile(path, "r") as zip_ref:
            zip_ref.extractall(zip_to_path)

        print('Unzipped and moved to interim : '
              + str(path.name).split('.')[0])

Unzipped and moved to interim : noaa
Unzipped and moved to interim : tract
Unzipped and moved to interim : block
Unzipped and moved to interim : bg
Unzipped and moved to interim : zcta
Unzipped and moved to interim : county
Unzipped and moved to interim : nfhl
Unzipped and moved to interim : dg


# Process NSI

In [104]:
# The NSI comes with all the data necessary for performing a standard 
# flood risk assessment. It is still useful to process the raw data.
# Here, we subset to residential properties with 1 to 2 stories
# and save as a geodataframe. These are the types of residences we have
# multiple depth-damage functions for and a literature base to draw 
# from to introduce uncertainty in these loss estimates

In [19]:
# Read raw NSI data
nsi_filep = join(EXP_DIR_R, 'nsi.pqt')
# Read and reset index
nsi_full = pd.read_parquet(nsi_filep).reset_index(drop=True)

# Convert to geodataframe
geometry = gpd.points_from_xy(nsi_full['properties.x'],
                             nsi_full['properties.y'])
# The NSI CRS is EPSG 4326
nsi_gdf = gpd.GeoDataFrame(nsi_full, geometry=geometry,
                           crs="EPSG:4326")

# Drop the following columns
drop_cols = ['type', 'geometry.type', 'geometry.coordinates']
nsi_gdf = nsi_gdf.drop(columns=drop_cols)

# Remove "properties" from columns
col_updates = [x.replace("properties.", "") for x in nsi_gdf.columns]
nsi_gdf.columns = col_updates

In [20]:
# Subset to residential properties and update
# RES 1 - single family
# RES 2 - manufactured home
# RES 3 - multifamily (but could fit into a depth-damage function
# archetype depending on # stories)
# We are going to use RES1 for this case-study
# It is the only occtype with hazus and naccs
# DDFs and has less ambiguous classification

# occtype category for easier use in loss estimation steps

# Get residential structures
nsi_res = nsi_gdf.loc[nsi_gdf['occtype'].str[:4] == 'RES1']

# For this case-study, don't use any building with more 
# than 2 stories
res1_3s_ind = nsi_res['num_story'] > 2
# Final residential dataframe
res_f = nsi_res.loc[~res1_3s_ind]

In [39]:
# Subset to relevant columns
cols = ['fd_id', 'occtype', 'found_type', 'cbfips',
        'ftprntsrc', 'found_ht', 'val_struct',
        'val_cont', 'source', 'firmzone', 'ground_elv_m',
        'geometry']

res_out = res_f.loc[:,cols]

# Write out to interim/exposure/
EXP_OUT_FILEP = join(EXP_DIR_I, 'nsi_res.gpkg')
res_out.to_file(EXP_OUT_FILEP, driver='GPKG')

# Prepare depth-damage functions

In [4]:
# Read raw naccs data
# vuln/physical is a directory w/ files that 
# is pre-supplied for the user of this codebase
# The NACCS data is extracted from a pdf w/ manual entry
# I entered it as a dataframe that mimics the way
# Hazus enters data on DDFs so that it could potentially be 
# ingested into the Hazus database more easily
naccs = pd.read_csv(join(VULN_DIR_R, 'physical', 'naccs_ddfs.csv'))


In [109]:
# Goal of processing is to have the data in
# tidy format and with non string values

# I think we should stick to RES1. Change
# NSI processing above as well
# Need to change occ type codes for pile 
# foundation

# Drop Description and Source columns
# Melt on occupancy damage category
# Each depth is associated with a percent damage
dropcols = ['Description', 'Source']
idvars = ['Occupancy', 'DamageCategory']
naccs_melt = naccs.drop(columns=dropcols).melt(id_vars=idvars,
                                               var_name='depth_str',
                                               value_name='pct_dam')

# Need to convert depth_ft into a number
# Replace ft with empty character
# If string ends with m, make negative number
# Else, make positive number
naccs_melt['depth_str'] = naccs_melt['depth_str'].str.replace('ft', '')
negdepth = naccs_melt.loc[naccs_melt['depth_str'].str[-1] == 
                          'm']['depth_str'].str[:-1].astype(float)*-1
posdepth = naccs_melt.loc[naccs_melt['depth_str'].str[-1] != 
                          'm']['depth_str'].astype(float)

naccs_melt.loc[naccs_melt['depth_str'].str[-1] == 'm',
               'depth_ft'] = negdepth
naccs_melt.loc[naccs_melt['depth_str'].str[-1] != 'm',
               'depth_ft'] = posdepth

# Divide pctdam by 100
naccs_melt['rel_dam'] = naccs_melt['pct_dam']/100

# Delete depth_str and pctdam and standardize
# column names
dropcols = ['depth_str', 'pct_dam']
newcols = ['occtype', 'dam_cat', 'depth_ft', 'rel_dam']
naccs_melt = naccs_melt.drop(columns=dropcols)
naccs_melt.columns = newcols

# Write out to processed/vulnerability/
vuln_out_dir = join(VULN_DIR_I, 'physical')
Path(vuln_out_dir).mkdir(parents=True, exist_ok=True)
vuln_out_filep = join(vuln_out_dir, 'naccs_ddfs.csv')
naccs_melt.to_csv(vuln_out_filep, index=False)


In [6]:
# Got HAZUS DDFs from here: 
# https://github.com/cran/hazus/tree/master/data
# Downloaded the Hazus 5.1 technical manual and created a 
# Riverine IDs spreadsheet that tracks what the current version of 
# HAZUS recommends for riverine DDFs. Will cross reference that with 
# the downloaded DDFs from the cran/hazus/data/ repository. 
# I loaded the data in R (it’s .rda) and then converted to csv. 

# These are ddfs in the same form as the naccs data (I made the
# naccs data conform to this as best as I could)
hazus_ddfs = pd.read_csv(join(VULN_DIR_R, 'physical', 'haz_fl_dept.csv'))

In [7]:
# For basements, use FIA (MOD.) which does one and two floors by
# A and V zones
# For no basements, use USACE - IWR
# which does one and two floor, no flood zone specified
# 106: FIA (MOD.) 1S WB A zone
# 114: "" V zone
# 108: FIA (MOD.) 1S WB A zone
# 116: "" V zone

# 129: USACE - IWR 1S NB
# 130: USCAE - IWR 2S+ NB

# Handling pile and pier foundations is important
# for RES1, but this will not be an issue for this case-study
# since there are no foundation types like this in the NSI
# for Philly (is that true, though?)

# Subset to DmgFnId in the codes above
dmg_ids = [106, 108, 114, 116, 129, 130]
hazus_res = hazus_ddfs[(hazus_ddfs['DmgFnId'].isin(dmg_ids)) &
                       (hazus_ddfs['Occupancy'] == 'RES1')]

# Make occtype column in the same form that the NSI has
# e.g. RES1-1SNB
# Add column for A or V zone
# Note: outside SFHA basement homes will take A zone
# What other option do we have? 

# Split Description by comma. 
# The split[0] element tells us stories (but description sometimes
# says floors instead of story...)
# Can get around this issue by looking at first word
# The split[1] element
# tells us w/ basement or no basement. Use this to create occtype
desc = hazus_res['Description'].str.split(',')
s_type = desc.str[0].str.split(' ').str[0]
s_type = s_type.str.replace('one', '1').str.replace('two', '2')
b_type = desc.str[1].str.strip()
occtype = np.where(b_type == 'w/ basement',
                   s_type + 'SWB',
                   s_type + 'SNB')
fz = desc.str[-1].str.replace('Structure', '').str.strip()

# Need occtype, flood zone, depth_ft, and rel_dam columns
# Follow steps from naccs processing to get depth_ft and rel_dam
# First, drop unecessary columns
# Don't need Source_Table, Occupy_Class, Cover_Class, empty columns
# Description, Source, DmgFnId, Occupancy and first col (Unnamed: 0)
# because index was written out
# Don't need all na columns either (just for automobiles, apparently)
hazus_res = hazus_res.loc[:,[col for col in hazus_res.columns if 'ft' in col]]
hazus_res = hazus_res.dropna(axis=1, how='all')
# Add the occtype and fld_zone columns
hazus_res = hazus_res.assign(occtype=occtype,
                             fld_zone=fz.str[0])

# Then, occtype and fld_zone as index and melt rest of columns. Following 
# naccs processing
idvars = ['occtype', 'fld_zone']
hazus_melt = hazus_res.melt(id_vars=idvars,
                            var_name='depth_str',
                            value_name='pct_dam')

# Need to convert depth_ft into a number
# Replace ft with empty character
# If string ends with m, make negative number
# Else, make positive number
hazus_melt['depth_str'] = hazus_melt['depth_str'].str.replace('ft', '')
negdepth = hazus_melt.loc[hazus_melt['depth_str'].str[-1] == 
                          'm']['depth_str'].str[:-1].astype(float)*-1
posdepth = hazus_melt.loc[hazus_melt['depth_str'].str[-1] != 
                          'm']['depth_str'].astype(float)

hazus_melt.loc[hazus_melt['depth_str'].str[-1] == 'm',
               'depth_ft'] = negdepth
hazus_melt.loc[hazus_melt['depth_str'].str[-1] != 'm',
               'depth_ft'] = posdepth

# Divide pctdam by 100
hazus_melt['rel_dam'] = hazus_melt['pct_dam']/100

# Delete depth_str and pctdam and standardize
# column names
# Since we just have the building types, call this
# bld_type instead of occtype
dropcols = ['depth_str', 'pct_dam']
newcols = ['bld_type', 'fld_zone', 'depth_ft', 'rel_dam']
hazus_melt = hazus_melt.drop(columns=dropcols)
hazus_melt.columns = newcols

# Write out to processed/vulnerability/
vuln_out_dir = join(VULN_DIR_I, 'physical')
Path(vuln_out_dir).mkdir(parents=True, exist_ok=True)
vuln_out_filep = join(vuln_out_dir, 'hazus_ddfs.csv')
hazus_melt.to_csv(vuln_out_filep, index=False)

# Process Hazard

In [68]:
# Save the flood zones, do some processing on columns
# The files are unzipped as shape files instead of gdb
# We want S_FLD_HAZ_AR 
fld_haz_fp = join(HAZ_DIR_I, 'nfhl', 'S_FLD_HAZ_AR.shp')
nfhl = gpd.read_file(fld_haz_fp)

In [78]:
# Keep FLD_ZONE, FLD_AR_ID, STATIC_BFE, geometry
keep_cols = ['FLD_ZONE', 'FLD_AR_ID', 'STATIC_BFE', 'ZONE_SUBTY',
             'geometry']
nfhl_f = nfhl.loc[:,keep_cols]

# Adjust .2 pct X zones to X_500
nfhl_f.loc[nfhl_f['ZONE_SUBTY'] == '0.2 PCT ANNUAL CHANCE FLOOD HAZARD',
           'FLD_ZONE'] = nfhl_f['FLD_ZONE'] + '_500'

# Update column names
# Lower case
nfhl_f.columns = [x.lower() for x in nfhl_f.columns]

# Drop ZONE_SUBTY
nfhl_f = nfhl_f.drop(columns=['zone_subty'])

# Write file
nfhl_f.to_file(join(HAZ_DIR_I, 'fld_zones.gpkg'),
               driver='GPKG')


In [79]:
# This is optional: delete the nfhl directory to reduce
# the file storage burden
# TODO: Make this a setting in a config file you can toggle as a user
RM_NFHL = True
if RM_NFHL:
    # Get directory name
    nfhl_dir = join(HAZ_DIR_I, 'nfhl')
    
    # Try to remove the tree; if it fails,
    # throw an error using try...except.
    try:
        shutil.rmtree(nfhl_dir)
    except OSError as e:
        print("Error: %s - %s." % (e.filename, e.strerror))
    

In [4]:
# No need to do anything for the depth rasters. Can 
# remove some extemperaneous files to reduce 
# size of project
# This is optional processing - make that clear
# In the interim/haz/dg directory, 
# only keep files that start with Depth_
# Or CstDpth_
# Can loop through files in the directory and keep all files
# that start with these characters
dg_dir = join(HAZ_DIR_I, 'dg')
for path in Path(dg_dir).iterdir():
    if (str(path.name[:5]) != 'Depth') and (str(path.name[:7]) != 'CstDpth'):
        path.unlink()

# Process Reference Data

In [116]:
# Get all the reference files oriented to the county
# Save as .gpkg
# Clear ref/ directory when done

# base file name for state files
base_state_fp = 'tl_2022_42_'

# base file name for us files
base_us_fp = 'tl_2022_us_'

# state based files
state_ref_l = ['bg.shp', 'tabblock20.shp', 'tract.shp']
state_ref_l = [base_state_fp + x for x in state_ref_l]

# us based files
us_ref_l = ['zcta520.shp']
us_ref_l = [base_us_fp + x for x in us_ref_l]

# merge list
ref_l = state_ref_l + us_ref_l

In [117]:
# Read in county file
counties = gpd.read_file(join(REF_DIR_I, base_us_fp + 'county.shp'))

# Identify county from geoid column
# If processing multiple counties in the future, 
# can change to .isin(FIPS) w/ FIPS as a list
# Or, more generally can write a wrapper function
# for handling string or list input
counties_f = counties.loc[counties['GEOID'] == FIPS][['geometry']]
counties_f['fips'] = FIPS

# Use as reference for clipping other files
# For each polygon in counties_f, which corresponds to a county,
# you want to check temp_ref.within(polygon) and add these ref 
# polygons to a dataframe 


# Need a dict for these
# Start with counties_f which we need to write out
ref_clip_l = {'counties': counties_f}

# Loop through other data, clip to county, save as .gpkg
for ref in ref_l:
    # Read in the ref file
    temp_ref = gpd.read_file(join(REF_DIR_I, ref))
    # Do a spatial join for ref in county(ies)
    temp_ref_j = gpd.sjoin(temp_ref, counties_f, predicate='within')
    # Add to ref clip list
    # key/value pairs are refname.gpkg which can be obtained
    # by splitting refname.shp on '.' and keeping first part of string
    # then getting the last name after splitting on '_'
    ref_name = ref.split('.')[0].split('_')[-1]
    ref_clip_l[ref_name] = temp_ref_j
    # Helpful log message
    print('Clipped reference data: ' + ref_name)

# Delete the shp files in interim/ref
for path in Path(REF_DIR_I).iterdir():
    path.unlink()

# Save the gpkg files for each ref
# I could probably write out the gpkg into 
# a different directory, or something else
# that makes it obvious to just write the
# file out when you loop through ref_l above
# But reference files don't seem like
# processed data -- interim seems right
# An improvement might be having a temp directory
# for storing unzipped files and then
# it makes sense to clean these up
# after processing and moving certain files
# to interim. That seems neater and more
# nominally correct
for ref_name, ref in ref_clip_l.items():
    ref_out_fp = join(REF_DIR_I, ref_name + '.gpkg')
    # Wrote out ref data as gpkg
    ref.to_file(ref_out_fp, driver='GPKG')

Clipped reference data: bg
Clipped reference data: tabblock20
Clipped reference data: tract
Clipped reference data: zcta520


# Process social vulnerability data

In [23]:
# Read NOAA SOVI
vuln_s_path = join(VULN_DIR_I, 'social')
sovi_filep = join(vuln_s_path, 'SoVI2010_PA')
sovi = gpd.read_file(sovi_filep)

In [24]:
# Get fips col
fips = sovi['STATEFP10'] + sovi['COUNTYFP10']
sovi = sovi.assign(fips=fips)
sovi_fips = sovi[sovi['fips'].eq(FIPS)]

In [25]:
# Load in county reference file
fips_geo_fp = join(REF_DIR_I, 'counties.gpkg')
fips_geo = gpd.read_file(fips_geo_fp)

In [26]:
# Visual check that boundaries of SOVI are snapped
# to the county boundary correctly - looks good
# fig, ax = plt.subplots(figsize=(10, 6))
# sovi_plot = sovi_fips.to_crs(fips_geo.crs)
# sovi_plot.plot(ax=ax, color='blue', alpha=.2)
# fips_geo.plot(ax=ax, color='red', alpha=.2)

In [33]:
# Reproject sovi_fips to county crs
sovi_f = sovi_fips.to_crs(fips_geo.crs)

# Add column for "high" vulnerability based on
# FMA grant designation of .75 (greater than)
# Use the column 'SOVI0610PA' for the score
sovi_f['FMA_High'] = False
sovi_f.loc[sovi_f['SOVI0610PA'] > .75, 'FMA_High'] = True

# Keep relevant columns
keep_cols = ['fips', 'SOVI0610PA', 'SoVI0610_1', 'SoVI0610_2',
             'GEOID10', 'geometry']
sovi_f = sovi_f.loc[:, keep_cols]

# Rename columns
rename_cols = ['fips', 'sovi', 'sovi_cat_1', 'sovi_cat_2',
               'tract_id', 'geometry']
sovi_f.columns = rename_cols

# Delete the unzipped files dir to remove file junk
sovi_dir = join(vuln_s_path, 'SoVI2010_PA')
# This is optional: delete the sovi directory to reduce
# the file storage burden
# TODO: Make this a setting in a config file you can toggle as a user
RM_SOVI = True
if RM_SOVI:
    # Try to remove the tree; if it fails,
    # throw an error using try...except.
    try:
        shutil.rmtree(sovi_dir)
    except OSError as e:
        print("Error: %s - %s." % (e.filename, e.strerror))

# Write out the new SOVI in interim
sovi_out_path = join(vuln_s_path, 'sovi.gpkg')
sovi_f.to_file(sovi_out_path, driver='GPKG')
    
# (It really makes sense to have a temp/ directory for unzipped
# stuff so we can just delete it all)

# Link NSI with Hazard Data

In [13]:
# Load NSI
nsi = gpd.read_file(join(EXP_DIR_I, 'nsi_res.gpkg'))

## Structures & Flood Zones

In [80]:
# NSI has firmzone column but it was all NaN when I pulled it
# from the API. We need to get flood zones to know which properties
# are in or outside the SFHA. This is useful for summary stats. 
# Down the line, we will also want to know things like pre/post FIRM
# for adjusting FFE relative to BFE, probabilities of foundation
# types, etc. 

# Read in flood zones
nfhl_filep = join(HAZ_DIR_I, 'fld_zones.gpkg')
fz = gpd.read_file(nfhl_filep)

In [84]:
# Project nsi to flood zone crs
nsi_rep = nsi.to_crs(fz.crs)

# Spatial join, retaining flood zone cols
# Only need the id and geom from nsi for this
fz_m = gpd.sjoin(nsi_rep[['fd_id', 'geometry']],
                 fz,
                 predicate='within')

# I checked for issues like overlapping flood zones
# resulting in NSI structures in multiple polygons
# and did not find any. That's good, but chances
# are there will be counties where this happens
# and we will need code to handle these consistently

# Write out fd_id/fld_ar_id/fld_zone/static_bfe
keep_cols = ['fd_id', 'fld_zone', 'fld_ar_id', 'static_bfe']
fz_m_out = fz_m[keep_cols]

nsi_fz_filep = join(EXP_DIR_I, 'nsi_fz.pqt')
fz_m_out.to_parquet(nsi_fz_filep)

## Structures and Depth Grids

In [10]:
# For each depth grid, we need to link up the depth with the structure
# Reproject structures to the depth grid crs
# Do point in grid calculations
# Write out a file in interim for the fd_id/depth dataframe
dg_dir = join(HAZ_DIR_I, 'dg')
# We are going to store each depth grid
# in a dict with the return period as the key
dg_dict = {}
for path in Path(dg_dir).rglob("*.tif"):
    # filename will be of the form
    # Depth_RPpct.tif
    # Or CstDpth_RPpct.tif
    filename = path.name

    # Unfortunately, the file naming conventions for
    # the inland and coastal depth grids are different
    # Not a huge deal, but makes the code look less nice

    # Figure out if Inl or Cst
    fld_src = 'Inl' if str(filename[:5]) == 'Depth' else 'Cst'

    if fld_src == 'Inl':
        # Below is how you process the inland depth grids
        # This drops "Depth" and combines remaining split
        # string pieces with a '.'
        # This is designed specifically for the way the fema
        # dgs are stored. It has the nice feature of giving us
        # 0.2 instead of 0_2 for the .2% prob event
        # Then we drop pct.tif, the last 7 characters
        ret_p_temp = '.'.join(filename.split('_')[1:])
        ret_p = ret_p_temp[:-7]
    else:
        # This will get you 0_2 to 0.2 and otherwise return 01, 02, 10
        ret_p = filename.split('pct')[0][7:].replace('_', '.')
    
    # Let's load each file and store in a dictionary that
    # uses the fld_src + return period as the key
    dg_dict[fld_src + '_' + ret_p] = rasterio.open(path)
    

In [14]:
# Get CRS from the first depth grid
# Reproject nsi for this CRS
dg_temp = list(dg_dict.values())[0] 
dg_crs = dg_temp.crs

nsi_reproj = nsi.to_crs(dg_crs)

In [15]:
# For each depth grid, we will sample from the grid
# by way of a list of coordinates from the reprojected
# nsi geodataframe (this is the fastest way I know to do it)
coords = zip(nsi_reproj['geometry'].x, nsi_reproj['geometry'].y)
coord_list = [(x, y) for x, y in coords]
print('Store NSI coordinates in list')

# We'll store series of fd_id/depth pairs for each return period
# in a list and concat this into a df after iterating
depth_list = []
for key, dg in dg_dict.items():
    fld_src = key.split('_')[0]
    rp = key.split('_')[1]
    print('Flooding source: ' + fld_src)
    print('Return period: ' + rp)
    # Sample from the depth grid based on structure locations
    # I did some ground truthing in qgis
    # It appears that the sampled values align correctly
    sampled_depths = [x[0] for x in dg.sample(coord_list)]
    print('Sampled depth from grid:')

    # Store the series 
    depths = pd.Series(sampled_depths,
                       index=nsi_reproj['fd_id'],
                       name=key)
    # Add the series to the list of series
    depth_list.append(depths)
    print('Aligned depths with index of structures in series')

# Concat to dataframe
depth_df = pd.concat(depth_list, axis=1)

Store NSI coordinates in list
Flooding source: Inl
Return period: 0.2
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Cst
Return period: 10
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Cst
Return period: 0.2
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Cst
Return period: 02
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Inl
Return period: 01
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Inl
Return period: 02
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Cst
Return period: 01
Sampled depth from grid:
Aligned depths with index of structures in series
Flooding source: Inl
Return period: 10
Sampled depth from grid:
Aligned depths with index of structures in series


In [16]:
# Replace nodata values with 0
depth_df[depth_df == dg_temp.nodata] = 0

# Get a dataframe of structures with flood exposure
depth_df_f = depth_df[depth_df.sum(axis=1) > 0]

# Multiply by 3.281 to convert to feet
depth_df_f = depth_df_f*3.281

In [17]:
# Write out file of fd_id and depths
# Reset index to get fd_id as a column
# (I don't think it really matters but to be consistent
# with earlier file writing decisions)
# Flood depths are relative to grade
nsi_depths_filep = join(EXP_DIR_I, 'nsi_depths.pqt')
depth_df_f.reset_index().to_parquet(nsi_depths_filep)

# Link NSI with reference data

In [30]:
# Load NSI
nsi = gpd.read_file(join(EXP_DIR_I, 'nsi_res.gpkg'))

In [31]:
# Want to know which block, block group, zip, tract structures
# are in. Loop through these, do spatial joins, and
# write the nsi_ref file to exposure

# These could all be defined in config files
refs = ['tabblock20', 'bg',  'tract', 'zcta520']
ref_id = ['GEOID20', 'GEOID', 'GEOID', 'GEOID20']
ref_names = ['block', 'bg', 'tract', 'zcta']
ref_df_list = []
# The logic here is to read in the ref file
# Subset just to the id column 
# Reproject NSI to the crs of the ref file
# Do a spatial join for point in polygon
# Then store an indexed dataframe of the ref id so that
# we can concat all of them together later on fd_id
for i, ref in enumerate(refs):
    ref_filep = join(REF_DIR_I, ref + '.gpkg')
    ref_geo = gpd.read_file(ref_filep)[[ref_id[i], 'geometry']]
    nsi_reproj = nsi.to_crs(ref_geo.crs)[['fd_id', 'geometry']]
    nsi_ref = gpd.sjoin(nsi_reproj, ref_geo, predicate='within')
    nsi_ref_f = nsi_ref.set_index('fd_id')[[ref_id[i]]]
    nsi_ref_f = nsi_ref_f.rename(columns={ref_id[i]: ref_names[i] + '_id'})
    ref_df_list.append(nsi_ref_f)

nsi_refs = pd.concat(ref_df_list, axis=1).reset_index()
# Write to file - in exposure
ref_filep = join(EXP_DIR_I, 'nsi_ref.pqt')
nsi_refs.to_parquet(ref_f