In [5]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('style/advocate.mplstyle')
%matplotlib inline
pd.set_option('display.float_format', lambda x: f'{x:,.2f}')
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

## 1-Processing

The steps needed to process the Road Home and Census files to generate CSVs for analysis. Currently combines the needed files, filtering out Road Home data from any blocks that received less than 3 feet of flooding. Neighborhood files (currently based on 2010 Neighborhood Boundaries) are created by aggregating blocks.

TO-DO: Add income data

In [8]:
# Road Home Data provided by Louisiana Division of Administration. 
# Aggregated to Census Blocks (2000)
rh = pd.read_csv('raw data/PRR 8_22_22.csv')

# 2000 Census Block boundary files.
blocks = gpd.read_file('shapefiles/blocks/tl_2010_22_tabblock00.shp')
blocks = blocks.to_crs('epsg:3452')

# Converting blocks to centroids for join with neighborhoods
blocks_centroids = blocks.copy()
blocks_centroids['geometry'] = blocks_centroids['geometry'].centroid

# Import flooding statistics by Census block and generate list of GEOIDs with greater than 3 feet of flooding for filtering
flood_blocks = gpd.read_file('shapefiles/flood_blocks/flood_blocks.shp')
flood_blocks = flood_blocks[['BLKIDFP00', 'flood_mean', 'flood_medi', 'flood_min', 'flood_max']]
flood_blocks = flood_blocks.rename(columns = {'BLKIDFP00' : 'GEOID', 'flood_medi' : 'flood_median'})
flood_blocks_GT3 = flood_blocks[flood_blocks['flood_min'] > 3]['GEOID'].to_list()

# Import household statistics based on 2000 Census
households = pd.read_csv('raw data/2000-La-TenureByHouseholderRaceByBlock.csv', skiprows=[0])
households['GEOID'] = households['Geography'].apply(lambda x: x.split('US')[1])
households = households[['GEOID', 'Total', 'Total!!Owner occupied', 'Total!!Owner occupied!!Householder who is Black or African American alone']]
households = households.rename(
    columns =
        {
            'Total' : 'households_2000_total',
            'Total!!Owner occupied' : 'households_2000_owner_occupied',
            'Total!!Owner occupied!!Householder who is Black or African American alone' : 'households_2000_owner_occupied_black'
        })

# Neighborhood data provided by the city of New Orleans. 
# Note that the neighborhood boundaries are based on Census Tract Boundaries. This file is based on the 2010 Tracts. 
# Currently checking with the Data Center (which generates the boundaries) about whether there were changes between 
# the 2000 and 2010 vintage boundaries. If so, I've requested a copy of the 2000 vintage. Boundary changes are typically 
# very minor, but may have some impact on the overall analysis.
neighborhoods = gpd.read_file('shapefiles/neighborhoods/Neighborhood_Statistical_Areas.shp')
neighborhoods = neighborhoods.to_crs('epsg:3452')

# Get neighborhood for each block
blocks_neighborhoods = gpd.sjoin(blocks_centroids, neighborhoods[['GNOCDC_LAB', 'geometry']])

# P004 from Census 2000 Summary File 1: Hispanic or Latino and Not Hispanic of Latino by Race by Block
blocks_race = pd.read_csv('raw data/2000-La-HispanicRaceByBlock.csv', skiprows=[0])

# Rename relevant columns for easy reference
blocks_race = blocks_race.rename(
    columns = {
        'Total!!Hispanic or Latino' : 'Hispanic',
        'Total!!Not Hispanic or Latino!!Population of one race!!White alone' : 'White',
        'Total!!Not Hispanic or Latino!!Population of one race!!Black or African American alone' : 'Black',
        'Total!!Not Hispanic or Latino!!Population of one race!!American Indian and Alaska Native alone' : 'Indigenous',
        'Total!!Not Hispanic or Latino!!Population of one race!!Asian alone' : 'Asian',
        'Total!!Not Hispanic or Latino!!Population of one race!!Native Hawaiian and Other Pacific Islander alone' : 'NHOPI',
        'Total!!Not Hispanic or Latino!!Population of one race!!Some other race alone' : 'Other',
        'Total!!Not Hispanic or Latino!!Population of two or more races' : 'Multiracial'
    })
blocks_race = blocks_race[['Total', 'White', 'Black', 'Hispanic', 'Indigenous', 'Asian', 'NHOPI', 'Other', 'Multiracial', 'Geography']]

# Get GEOID from each df
blocks_race['GEOID'] = blocks_race['Geography'].apply(lambda x: x.split('US')[1])
rh['GEOID'] = rh['Census Block'].astype(str)
blocks_neighborhoods['GEOID'] = blocks_neighborhoods['BLKIDFP00']
blocks['GEOID'] = blocks['BLKIDFP00']

# Combine datasets. Include only blocks with greater than 3 feet of flooding
# NOTE: This is an outer join that will include blocks with no RH grants
rh_blocks = (
    blocks
        .merge(rh[rh['GEOID'].isin(flood_blocks_GT3)], on = 'GEOID', how = 'outer')
        .merge(blocks_neighborhoods[['GNOCDC_LAB', 'GEOID']], on = 'GEOID', how = 'outer')
        .merge(blocks_race[['Total', 'White', 'Black', 'Hispanic', 'Indigenous', 'Asian', 'NHOPI', 'Other', 'Multiracial', 'GEOID']], on = 'GEOID', how = 'outer')
        .merge(households, on = 'GEOID', how='outer')
    )

# Field descriptions
# compensation_sum: Sum of compensation grants
# additional_compensation_sum: Sum of additional compensation grants
# elevation_sum: Sum of elevation grants
# mitigation_sum: Sum of mitigation grants
# all_grants_sum: Sum of compensation, additional compensation, elevation and mitigation grants
# all_grounts_count: Count of properties that received grants
# value_sum: Sum of pre-storm value of properties
# damage_sum: Sum of damage asessment
# insurance_sum: Sum of gross insurance payments


rh_blocks = rh_blocks.rename(
    columns = {
        'GNOCDC_LAB' : 'Neighborhood',
        'Sum of Total CG Amount' : 'compensation_sum',
        'Sum of Total ACG Amunt' : 'additional_compensation_sum',
        'Sum of Total Elevation Amount' : 'elevation_sum',
        'Sum of Total IMM Amount' : 'mitigation_sum',
        'Sum of TOTAL_CLOSING_AMOUNT' : 'all_grants_sum',
        'Count of TOTAL_CLOSING_AMOUNT2' : 'all_grants_count',
        'Sum of Current PSV' : 'value_sum',
        'Sum of Current Damage Assessment' : 'damage_sum',
        'Sum of Gross Insurance Duplication of Benefits' : 'insurance_sum'
    }
)

neighborhood_fields = [
    'Neighborhood',
    'compensation_sum',
    'additional_compensation_sum',
    'elevation_sum', 
    'mitigation_sum', 
    'all_grants_sum', 
    'all_grants_count',
    'value_sum', 
    'damage_sum', 
    'insurance_sum',
    'Total', 
    'White', 
    'Black', 
    'Hispanic', 
    'Indigenous', 
    'Asian', 
    'NHOPI',
    'Other', 
    'Multiracial',
    'households_2000_total',
    'households_2000_owner_occupied',
    'households_2000_owner_occupied_black'
]

rh_neighborhoods = rh_blocks[neighborhood_fields].groupby('Neighborhood').sum()
# Merge with geometry
rh_neighborhoods = neighborhoods[['GNOCDC_LAB','geometry']].set_index('GNOCDC_LAB').merge(rh_neighborhoods, left_index=True, right_index=True)

# Function to generate useful fields for various geographies
#
# Field descriptions
# grant_insurance_sum: Sum of payments from Compensation Grants, Additional Compensation Grants and Insurance
# grant_insurance_mean: Mean payment per property
# uncovered_sum: Sum of damage assessments not covered by grants or insurance
# uncovered_mean: Mean amount not covered per property
# uncovered_percent: Percent of damage not covered
# rh_household_pct: Percentage of households that received a Compensation Grant
# households_2000_owner_occupied_pct: Percentage of households that are owner-occupied
# households_2000_owner_occupied_black_pct: Percentage of households that are owner-occupied and have a Black householder

def helper_fields(df):
    df['damage_mean'] = df['damage_sum'] / df['all_grants_count']
    df['grant_insurance_sum'] = df['compensation_sum']+df['additional_compensation_sum']+df['insurance_sum']
    df['grant_insurance_mean'] = df['grant_insurance_sum']/df['all_grants_count']
    df['uncovered_sum'] = df['damage_sum'] - df['grant_insurance_sum']
    df['uncovered_mean'] = df['uncovered_sum']/df['all_grants_count']
    df['uncovered_percent'] = (df['damage_sum']-df['grant_insurance_sum'])/df['damage_sum']
    df['white_percent'] = df['White']/df['Total']
    df['rh_household_pct'] = df['all_grants_count']/df['households_2000_owner_occupied']
    df['households_2000_owner_occupied_pct'] = df['households_2000_owner_occupied']/df['households_2000_total']
    df['households_2000_owner_occupied_black_pct'] = df['households_2000_owner_occupied_black']/df['households_2000_total']

    return df

rh_blocks = helper_fields(rh_blocks)
rh_neighborhoods = helper_fields(rh_neighborhoods)

# Create dataframe of just New Orleans blocks
rh_blocks_nola = rh_blocks[rh_blocks['COUNTYFP00']=='071']

In [9]:
rh_blocks_nola.to_csv('processed data/rh_blocks_nola_flood_GT3.csv')
rh_neighborhoods.to_csv('processed data/rh_neighborhoods_flood_GT3.csv')
rh_neighborhoods.to_file('processed data/rh_neighborhoods_flood_GT3.geojson')
rh_blocks_nola.to_file('processed data/rh_blocks_nola_flood_GT3.geojson')