In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from zipfile import ZipFile
import zipfile_deflate64
import os
from pathlib import Path
import sys
import glob
from os.path import join
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio 
from rasterio.warp import calculate_default_transform, reproject, Resampling
import rasterio.mask
from pyproj import CRS
import matplotlib.pyplot as plt

Matplotlib is building the font cache; this may take a moment.


# Configure files and other info

In [3]:
# It could make sense to have a lib/ style directory
# like PLACES has for common functionality
# and this code block would be useful there for getting
# a fr() path

# Filepath directories

# Get the absolute path to the project directory
# Which is one directory above notebooks/
ABS_DIR = os.path.abspath(Path(os.getcwd()).parents[0])
# Get raw data directory
FR = join(ABS_DIR, 'data', 'raw')
# Get interim data directory
FI = join(ABS_DIR, 'data', 'interim')
# Get processed data directory
FP = join(ABS_DIR, 'data', 'processed')

# Directories for raw exposure, vulnerability (vuln) and 
# administrative reference files
#  all exist so just need references
EXP_DIR_R = join(FR, 'exposure')
VULN_DIR_R = join(FR, 'vuln')
REF_DIR_R = join(FR, 'ref')
# Haz is for FEMA NFHL and depth grids
HAZ_DIR_R = join(FR, 'haz')

# Directories for interim exposure, vulnerability (vuln) and 
# hazard
EXP_DIR_I = join(FI, 'exposure')
VULN_DIR_I = join(FI, 'vuln')
HAZ_DIR_I = join(FI, 'haz')

# Ensure they exist
Path(EXP_DIR_I).mkdir(parents=True, exist_ok=True)
Path(VULN_DIR_I).mkdir(parents=True, exist_ok=True)
Path(HAZ_DIR_I).mkdir(parents=True, exist_ok=True)

# Unzip and move files to interim

In [4]:
# For each .zip directory in fr
# Create needed subdirectories in interim/
# Unzip in the appropriate interim/ subdirectory

for path in Path(FR).rglob('*.zip'):
    # Avoid hidden files and files in directories
    if path.name[0] != '.':
        # Get root for the directory this .zip file is in
        zip_root = path.relative_to(FR).parents[0]

        # Get path to interim/zip_root
        zip_to_path = join(FI, zip_root)

        # Make directory, including parents
        # No need to check if directory exists bc
        # it is only created when this script is run
        Path(zip_to_path).mkdir(parents=True, exist_ok=True)

        # Unzip to zip_to_path
        with ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(zip_to_path)


# Reproject and clip spatial data to location boundary

In [5]:
# Reference the city limits clip file
boundary_filep = join(REF_DIR_R, 'city.gpkg')
# Read boundary
boundary = gpd.read_file(boundary_filep)

## NSI

In [6]:
# Read raw NSI data
nsi_filep = join(EXP_DIR_R, 'nsi.pqt')
# Read and reset index
nsi_full = pd.read_parquet(nsi_filep).reset_index(drop=True)

In [7]:
# Convert to geodataframe
geometry = gpd.points_from_xy(nsi_full['properties.x'],
                             nsi_full['properties.y'])
# The NSI CRS is EPSG 4326
nsi_gdf_f = gpd.GeoDataFrame(nsi_full, geometry=geometry,
                             crs="EPSG:4326")

In [10]:
# Project nsi_gdf_f coordinates so that they
# match the boundary CRS
nsi_gdf_f = nsi_gdf_f.to_crs(boundary.crs)

# Use spatial join to get nsi locations within location boundary
# Note: this does not remove any properties for this case study
# but can if you change the boundary file, which is a feature
# that should be available to future users
nsi_gdf = gpd.sjoin(nsi_gdf_f, boundary[['geometry']])

In [13]:
# Drop the following columns
drop_cols = ['type', 'geometry.type', 'geometry.coordinates', 'index_right']
nsi_gdf = nsi_gdf.drop(columns=drop_cols)

# Remove "properties" from columns
col_updates = [x.replace("properties.", "") for x in nsi_gdf.columns]
nsi_gdf.columns = col_updates

In [17]:
# Write the NSI data to interim
nsi_gdf.to_file(join(EXP_DIR_I, 'nsi.gpkg'), driver='GPKG')

## Depth Grids

In [27]:
# There are depth and coastal depth grids
# We want each one reprojected and clipped to the area boundary
# In general, I think it makes more sense to extract the depths from
# the raster in its original CRS, but for plotting purposes it can be
# useful to have all data in a standardized CRS. I think this should match
# up with the CRS of the boundary, but there could be better CRS to choose

# List of depth grid filenames
dg_names = ['CstDpth0_2pct.tif', 'CstDpth01pct.tif',
            'CstDpth02pct.tif', 'CstDpth10pct.tif',
            'Depth_0_2pct.tif', 'Depth_01pct.tif',
            'Depth_02pct.tif', 'Depth_10pct.tif']

# List of out filenames
dg_names_out = ['cst_depth500.tif', 'cst_depth100.tif',
                'cst_depth50.tif', 'cst_depth10.tif',
                'in_depth500.tif', 'in_depth100.tif',
                'in_depth50.tif', 'in_depth10.tif']
# Interim directory
dg_in_dir = join(HAZ_DIR_I, 'dg')
# Out directory (same as in, but it's nice to have
# different name references for logic in the script)
dg_out_dir = join(HAZ_DIR_I, 'dg')
Path(dg_out_dir).mkdir(parents=True, exist_ok=True)
# Temp directory (for reprojected)
dg_tmp_dir = join(HAZ_DIR_I, 'tmp')
Path(dg_tmp_dir).mkdir(parents=True, exist_ok=True)

# Store crs
DST_CRS = boundary.crs

# Loop through each, reproject, write in interim
# TODO: I expect you always have to reproject, but
# it's better practice to check if the CRS are equal
for i, dg_in in enumerate(dg_names):
    print('Beginning proccessing: ' + dg_in)
    
    # In, tmp, out filepaths
    dg_in_filep = join(dg_in_dir, dg_in)
    # Reprojected files go in tmp, so replace . with _r.
    dg_tmp_filep = join(dg_tmp_dir, dg_in.replace('.', '_r.'))
    # In/out filepaths line up, so can use the i for index
    dg_out_filep = join(dg_out_dir, dg_names_out[i])
    
    # reprojecting grid to match boundary crs
    # Following rasterio references
    # https://rasterio.readthedocs.io/en/stable/topics/reproject.html
    with rasterio.open(dg_in_filep) as src:
        transform, width, height = calculate_default_transform(
            src.crs, DST_CRS, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': DST_CRS,
            'transform': transform,
            'width': width,
            'height': height
        })
        
        with rasterio.open(dg_tmp_filep, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=DST_CRS,
                    resampling=Resampling.nearest)

    # Clip depth grid to boundary using mark
    # https://rasterio.readthedocs.io/en/stable/
    # topics/masking-by-shapefile.html
    with rasterio.open(dg_tmp_filep) as src:
        out_image, out_transform = rasterio.mask.mask(src,
                                                      boundary['geometry'],
                                                      crop=True)
        out_meta = src.meta
    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})

    with rasterio.open(dg_out_filep, "w", **out_meta) as dest:
        dest.write(out_image)
        
    # TODO: makes sense to include a log for checking bottlenecks
    # Can write out how long it took to do the reprojection & clipping
    # For now, since running in jupyter, just printing out
    print(dg_in + ' reprojected and clipped')

Beginning proccessing: CstDpth0_2pct.tif
CstDpth0_2pct.tif reprojected and clipped
Beginning proccessing: CstDpth01pct.tif
CstDpth01pct.tif reprojected and clipped
Beginning proccessing: CstDpth02pct.tif
CstDpth02pct.tif reprojected and clipped
Beginning proccessing: CstDpth10pct.tif
CstDpth10pct.tif reprojected and clipped
Beginning proccessing: Depth_0_2pct.tif
Depth_0_2pct.tif reprojected and clipped
Beginning proccessing: Depth_01pct.tif
Depth_01pct.tif reprojected and clipped
Beginning proccessing: Depth_02pct.tif
Depth_02pct.tif reprojected and clipped
Beginning proccessing: Depth_10pct.tif
Depth_10pct.tif reprojected and clipped


In [None]:
# Compare the original and new rasters to each other to see what you did
# Double check that these steps are necessary

## Flood Zones

## SOVI

# Link ref data to tabular data if needed

## BGs to LMI

## NFIP claims/pols to tracts

# Parcel processing

## Clean and filter

## Join with building footprints

## Merge with hazard and social vuln data

# Prepare depth-damage functions