In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import shape
import rasterio 
from rasterio.warp import calculate_default_transform, reproject, Resampling
import rasterio.mask
from pyproj import CRS
import os
os.environ['USE_PYGEOS'] = '0'

from util.files import *
from util.const import *

In [3]:
# FIPS will be passed in as an argument, one day...
FIPS = '34007'
# STATE ABBR and NATION will be derived from FIPS, one day...
STATEABBR = 'NJ'
NATION = 'US'

# Process - everything ends up at county level and clipped

## Process clip data

In [4]:
# For our case study, we are going to focus on Gloucester City, NJ
# Our config.yaml loads in a county indexed clip file
# so that we can restrict all our data to the GC boundaries

In [5]:
# Read in the data we downloaded from the county's REST API server
clip_filep = join(REF_DIR_R, FIPS, 'clip.json')
with open(clip_filep) as f:
    clip_data = json.load(f)

In [6]:
# Use pandas to get the data in a form that is easier
# to turn into a geodataframe for clipping
clip_df = pd.json_normalize(clip_data['features'])
# We want to make a polygon out of the geometry coordinates
# We can access that from the original json object
clip_geo = [shape(i['geometry']) for i in clip_data['features']]
# We can create a geodataframe of clip_df by adding clip_geo
# as its geometry column
clip_gdf = gpd.GeoDataFrame(clip_df,
                            crs=CLIP_CRS,
                            geometry=clip_geo)

# We can clean up the gdf by removing the
# type, id, geometry.type and geometry.coordinates columns
drop_col = ['type', 'id', 'geometry.type', 'geometry.coordinates']
clip_gdf = clip_gdf.drop(columns=drop_col)

# Write the file out to interim
clip_out_filep = join(FI, 'ref', FIPS, 'clip.gpkg')
prepare_saving(clip_out_filep)
clip_gdf.to_file(clip_out_filep,
                 driver='GPKG')

## Process NSI

In [4]:
# The NSI comes with all the data necessary for performing a standard 
# flood risk assessment. It is still useful to process the raw data.
# Here, we subset to residential properties with 1 to 2 stories
# and save as a geodataframe. These are the types of residences we have
# multiple depth-damage functions for and a literature base to draw 
# from to introduce uncertainty in these loss estimates

In [7]:
# Read NSI, reset index upon reading
# TODO - this will be json...
nsi_filep = join(EXP_DIR_R, FIPS, 'nsi.pqt')
nsi_full = pd.read_parquet(nsi_filep).reset_index(drop=True)

In [8]:
# Convert to gdf
# This is useful for some spatial joins we need to perform
# Convert to geodataframe
geometry = gpd.points_from_xy(nsi_full['properties.x'],
                             nsi_full['properties.y'])
# The NSI CRS is EPSG 4326
nsi_gdf = gpd.GeoDataFrame(nsi_full, geometry=geometry,
                           crs=NSI_CRS)

# Drop the following columns
drop_cols = ['type', 'geometry.type', 'geometry.coordinates']
nsi_gdf = nsi_gdf.drop(columns=drop_cols)

# Remove "properties" from columns
col_updates = [x.replace("properties.", "") for x in nsi_gdf.columns]
nsi_gdf.columns = col_updates

In [9]:
# Subset to residential properties and update
# RES 1 - single family
# RES 2 - manufactured home
# RES 3 - multifamily (but could fit into a depth-damage function
# archetype depending on # stories)
# We are going to use RES1 for this case-study
# It is the only occtype with hazus and naccs
# DDFs and has less ambiguous classification

# occtype category for easier use in loss estimation steps

# Get residential structures
nsi_res = nsi_gdf.loc[nsi_gdf['occtype'].str[:4] == 'RES1']

# For this case-study, don't use any building with more 
# than 2 stories
res1_3s_ind = nsi_res['num_story'] > 2
# Final residential dataframe
res_f = nsi_res.loc[~res1_3s_ind]

In [10]:
# Subset to relevant columns
cols = ['fd_id', 'occtype', 'found_type', 'cbfips',
        'ftprntsrc', 'found_ht', 'val_struct',
        'val_cont', 'source', 'firmzone', 'ground_elv_m',
        'geometry']

res_out = res_f.loc[:,cols]

# Clip to our clip boundary
# They are in the same CRS
nsi_clip_out = gpd.clip(res_out, clip_gdf)

In [16]:
# Write out to interim/exposure/FIPS/
# Single family homes -- sf
EXP_OUT_FILEP = join(EXP_DIR_I, FIPS, 'nsi_sf.gpkg')
prepare_saving(EXP_OUT_FILEP)
nsi_clip_out.to_file(EXP_OUT_FILEP, driver='GPKG')

## Process Depth-Damage Functions

In [None]:
# VULN_DIR_UZ/physical/US/haz_fl_dept.csv & naccs_ddfs.csv

# I want to adapt the code in nsi_unc/workflows/notebooks/loss_ensemble
# and write out the HAZUS & NACCS dicts (the params)
# I want to modify it for every half foot, not rounding to the
# full foot - that will probably bug people
# I also want to update the process_data.ipynt code
# For both of these, code can be greatly condensed -- with some
# if statement -- to be cleaner

## Process Reference Data

In [None]:
# glob for shp files in REF_DIR_UZ - we need to get these all at
# our county level 

# Process Social Vulnerability Data

In [None]:
# need to process these all at the county level
# sovi is in VULN_DIR_UZ/social/{STATEABBR}/SoVI2010_{STATEABBR}/...
# can glob for .shp (no .shp.xml)

# Link - everything to structures

## Link NSI with Flood Zones

## Link NSI with Reference Data

## Link NSI with Social Vulnerability Data