In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from zipfile import ZipFile
import os
from pathlib import Path
import sys
import glob
from os.path import join
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio 
from pyproj import CRS


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


# Move .zip Directories to Interim

In [3]:
# It could make sense to have a lib/ style directory
# like PLACES has for common functionality
# and this code block would be useful there for getting
# a fr() path

# Get the absolute path to the precal_hazard directory
# Which is two directories above notebooks/exploration/
abs_dir = os.path.abspath(Path(os.getcwd()).parents[1])
# Get raw data directory
fr = join(abs_dir, 'data', 'raw')
# Get interim data directory
fi = join(abs_dir, 'data', 'interim')
# Get processed data directory
fp = join(abs_dir, 'data', 'processed')

In [6]:
# For each .zip directory in fr
# Create needed subdirectories in interim/
# Unzip in the appropriate interim/ subdirectory

for path in Path(fr).rglob('*.zip'):
    # Avoid hidden files and files in directories
    if path.name[0] != '.':
        # Get root for the directory this .zip file is in
        zip_root = path.relative_to(fr).parents[0]

        # Get path to interim/zip_root
        zip_to_path = join(fi, zip_root)

        # Make directory, including parents
        # No need to check if directory exists bc
        # it is only created when this script is run
        Path(zip_to_path).mkdir(parents=True, exist_ok=True)

        # Unzip to zip_to_path
        with ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(zip_to_path)

# Clip Raw Data to Location Boundary

In [43]:
# Filepath to location boundary
boundary_filep = join(fi, 'hazard', '020402031007.shp')
# Read boundary
boundary = gpd.read_file(boundary_filep)

## NSI

In [69]:
# Read full NSI from all the counties
nsi_filep = join(fr, 'exposure', 'nsi.pqt')
# Read and reset index
nsi_full = pd.read_parquet(nsi_filep).reset_index(drop=True)

In [70]:
# Convert to geodataframe
geometry = gpd.points_from_xy(nsi_full['properties.x'],
                            nsi_full['properties.y'])
# The NSI CRS is EPSG 3426
nsi_gdf_f = gpd.GeoDataFrame(nsi_full, geometry=geometry,
                             crs="EPSG:4326")

In [71]:
# Project nsi_gdf_f coordinates to EPSG 4269 so that they
# match the boundary CRS
nsi_gdf_f = nsi_gdf_f.to_crs(boundary.crs)

# Use spatial join to get nsi locations within location boundary
nsi_gdf = gpd.sjoin(nsi_gdf_f, boundary[['geometry']])

In [72]:
# Drop the following columns
drop_cols = ['type', 'geometry.type', 'geometry.coordinates', 'index_right']
nsi_gdf = nsi_gdf.drop(columns=drop_cols)

# Remove "properties" from columns
col_updates = [x.replace("properties.", "") for x in nsi_gdf.columns]
nsi_gdf.columns = col_updates

In [74]:
# Write the NSI data to interim
int_exp_filep = join(fi, 'exposure')
Path(int_exp_filep).mkdir(parents=True, exist_ok=True)
nsi_gdf.to_file(join(int_exp_filep, 'nsi.gpkg'), driver='GPKG')

# Process Hazard Data

## Get input hazard data into spatially referenced format

In [12]:
haz_filedir = join(fi, 'hazard')
haz_dirs = glob.glob(join(haz_filedir, "output*"))

In [13]:
haz_dirs

['/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.025',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.0175',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.1',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.01',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.06',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.0275',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.08',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.02',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.035',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.045',
 '/jumbo/keller-lab/projects/icom/precal/precal_hazard/data/interim/hazard/output0.05',
 '/jumbo/keller-lab/projec

In [16]:
# metadata from .txt file
# ncols         2276
# nrows         1564
# xllcorner     -75.4159722219145
# yllcorner     40.0026388902255
# cellsize      9.2592593e-05
# NODATA_value  -9999

# Prepare directory for writing out
HAZ_OUT_DIR = join(fp, 'hazard', 'depths')
Path(HAZ_OUT_DIR).mkdir(parents=True, exist_ok=True)

# Constant for peak_flood depth.txt
DEPTH_FILEP = "peak_flood_depth.txt"

# Constants from metadata
EPSG = 4269
NODATA = -9999
NROWS = 1564
RES = 9.2592593e-05
XLL = -75.4159722219145
YLL = 40.0026388902255
# Get the CRS 
crs = CRS.from_user_input(EPSG)

# Calculate the y coordinate for the origin
# by adding the cell resolution * raster height (#rows)
# to the y lower left coordinate
# xll and yll mean x lower left and y lower left
YUL = YLL + RES*NROWS

# Get transform
trans = rasterio.transform.from_origin(XLL,
                                       YUL,
                                       RES, RES)


# Use "output*" wildcard in glob to find
# all subdirectories in interim/hazard/
# that have peak_flood_depth.txt files in them
# Use numpy to load text, then reshape the data
# Use rasterio to provide the CRS
# The datum is NAD83, EPSG: 4269
haz_filedir = join(fi, 'hazard')
haz_dirs = glob.glob(join(haz_filedir, "output*"))

# Loop through directories in haz_dirs
# Convert each peak_flood_depth.txt
# into a raster
# Use the wildcard component
# after "output" as the index
for hd in haz_dirs:
    haz_filep = join(hd, DEPTH_FILEP)
    # Suffix correspondes to parameter
    # values used to generate depths
    # Useful to keep this in the processing/writing
    # of files
    file_suf = hd.split("output")[1]

    # Load peak_flood_depth.txt
    fld_depths_in = np.loadtxt(haz_filep, skiprows=6)

    # Unique filename for each depth grid
    # Join haz_out_dir defined as a constant above
    # with peak_fld_depth, the file_suf, and .tif
    filename = 'peak_fld_depth_' + file_suf + '.tif'
    haz_out_filep = join(HAZ_OUT_DIR, filename)

    # Write raster 
    haz_r = rasterio.open(haz_out_filep, 'w', driver='GTiff',
                          height=fld_depths_in.shape[0],
                          width=fld_depths_in.shape[1],
                          count=1, dtype=str(fld_depths_in.dtype),
                          crs=crs, nodata=NODATA, transform=trans)

    haz_r.write(fld_depths_in, 1)
    haz_r.close()

## Link depths to structures

In [34]:
depth_filenames

['peak_fld_depth_0.0375.tif',
 'peak_fld_depth_0.0125.tif',
 'peak_fld_depth_0.035.tif',
 'peak_fld_depth_0.05.tif',
 'peak_fld_depth_0.02.tif',
 'peak_fld_depth_0.0275.tif',
 'peak_fld_depth_0.025.tif',
 'peak_fld_depth_0.0225.tif',
 'peak_fld_depth_0.0175.tif',
 'peak_fld_depth_0.0325.tif',
 'peak_fld_depth_0.03.tif',
 'peak_fld_depth_0.04.tif',
 'peak_fld_depth_0.1.tif',
 'peak_fld_depth_0.07.tif',
 'peak_fld_depth_0.045.tif',
 'peak_fld_depth_0.09.tif',
 'peak_fld_depth_0.08.tif',
 'peak_fld_depth_0.01.tif',
 'peak_fld_depth_0.06.tif',
 'peak_fld_depth_0.015.tif']

In [35]:
# Read in NSI data
INT_EXP_FILEP = join(fi, 'exposure')
nsi_gdf = gpd.read_file(join(INT_EXP_FILEP, 'nsi.gpkg'))

# Get coordinate list
coord_list = [(x, y) for x, y in
              zip(nsi_gdf['geometry'].x,
                  nsi_gdf['geometry'].y)]

# List of depth series
depth_list = []

# For each depth raster, link up unique property
# coordinates with the corresponding depth values
# Write out file of coord/id index & depth_suf columns
depth_filenames = os.listdir(HAZ_OUT_DIR)

for d_fn in depth_filenames:
    # Filepath and load
    d_grid_fp = join(HAZ_OUT_DIR, d_fn)
    # Open the depth raster in read mode
    d_grid = rasterio.open(d_grid_fp)

    # Get the suffix
    # First, get the pre .tif str component
    filepre = d_fn.split('.tif')[0]
    # Then get last element splitting on "_"
    d_suf = filepre.split('_')[-1]

    # Sample points from the raster based on nsi coordinates
    # Get sampled values from pixels
    sampled_depths = [x[0] for x in d_grid.sample(coord_list)]

    # Store as series with name
    # Index by fd_id
    # depth_d_suf
    depth_series = pd.Series(sampled_depths,
                             index=nsi_gdf['fd_id'],
                             name='depth_' + d_suf)

    # Convert depth to ft
    depth_series = depth_series * 3.281

    # Store in list
    depth_list.append(depth_series)
    
# Concat into dataframe
depths = pd.concat(depth_list, axis=1)

# Write data frame to file
# Exposure/depths links depths to properties
EXP_OUT_DIR = join(fp, 'exposure')
Path(EXP_OUT_DIR).mkdir(parents=True, exist_ok=True)
DEPTHS_OUT_FILEP = join(EXP_OUT_DIR, 'depths.pqt')
# fd_id is index, so set index=True
depths.to_parquet(DEPTHS_OUT_FILEP,
                  index=True)

## Subset to residential structures and write out

In [38]:
# Get residential structures
nsi_res = nsi_gdf.loc[nsi_gdf['st_damcat'] == 'RES']

# TODO: Need to update occtype variable to OPEN or ENC
# when pile or pier found_type exists, but not
# relevant for this first case study so avoiding the code

# Write out to processed/exposure/
EXP_OUT_FILEP = join(EXP_OUT_DIR, 'nsi_res.gpkg')
nsi_res.to_file(EXP_OUT_FILEP, driver='GPKG')

# Process depth damage functions

In [14]:
# Filepath to NACCS depth damage functions
vul_dir = join(fr, 'vulnerability')
# Read ddfs
naccs = pd.read_csv(join(vul_dir, 'naccs_ddfs.csv'))

In [40]:
# Need to write file in tidy format

# Drop Description and Source columns
# Melt on occupancy damage category
# Each depth is associated with a percent damage
dropcols = ['Description', 'Source']
idvars = ['Occupancy', 'DamageCategory']
naccs_melt = naccs.drop(columns=dropcols).melt(id_vars=idvars,
                                               var_name='depth_str',
                                               value_name='pctdam')

# Need to convert depth_ft into a number
# Replace ft with empty character
# If string ends with m, make negative number
# Else, make positive number
naccs_melt['depth_str'] = naccs_melt['depth_str'].str.replace('ft', '')
negdepth = naccs_melt.loc[naccs_melt['depth_str'].str[-1] == 
                          'm']['depth_str'].str[:-1].astype(float)*-1
posdepth = naccs_melt.loc[naccs_melt['depth_str'].str[-1] != 
                          'm']['depth_str'].astype(float)

naccs_melt.loc[naccs_melt['depth_str'].str[-1] == 'm',
               'depth_ft'] = negdepth
naccs_melt.loc[naccs_melt['depth_str'].str[-1] != 'm',
               'depth_ft'] = posdepth

# Divide pctdam by 100
naccs_melt['reldam'] = naccs_melt['pctdam']/100

# Delete depth_str and pctdam and standardize
# column names
dropcols = ['depth_str', 'pctdam']
newcols = ['occtype', 'damcat', 'depth_ft', 'reldam']
naccs_melt = naccs_melt.drop(columns=dropcols)
naccs_melt.columns = newcols

# Write out to processed/vulnerability/
vuln_out_dir = join(fp, 'vulnerability')
Path(vuln_out_dir).mkdir(parents=True, exist_ok=True)
vuln_out_filep = join(vuln_out_dir, 'naccs_ddfs.csv')
naccs_melt.to_csv(vuln_out_filep, index=False)