In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('style/advocate.mplstyle')
%matplotlib inline
pd.set_option('display.float_format', lambda x: f'{x:,.2f}')
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)



# Block group income analysis

Compare uncovered damage vs. income level/poverty level (we would use 2000 Census data – “what did it look like before you got the grant”, a pre-flood view) GeoIDs for the blocks are all 2000.
Assigned: Sophie
Needs:
This nonprofit has already matched tracts to neighborhoods (https://www.datacenterresearch.org/); Jeff will pop data into the drive
What ACS time period should we use?
Timing: Days (let’s check in on our Monday meeting 9/19)
Status: In progress


### Load Flooding data
- rh_block_groups_nola_flood_GT3
- These files have racial/ethnic demographic information and the Road Home data. Note that as with the neighborhoods file, the demographic information is the aggregate of all the blocks in the block group but the RH data only aggregates blocks that had more than 3 feet of flooding.

In [5]:
df_flood = pd.read_csv('processed data/rh_block_groups_nola_flood_GT3.csv', index_col=0)

In [6]:
len(df_flood)

485

In [18]:
df_flood.columns

Index(['BKGPIDFP00', 'geometry', 'GEOID', 'compensation_sum',
       'additional_compensation_sum', 'elevation_sum', 'mitigation_sum',
       'all_grants_sum', 'all_grants_count', 'value_sum', 'damage_sum',
       'insurance_sum', 'Total', 'White', 'Black', 'Hispanic', 'Indigenous',
       'Asian', 'NHOPI', 'Other', 'Multiracial', 'households_2000_total',
       'households_2000_owner_occupied',
       'households_2000_owner_occupied_black', 'damage_mean',
       'grant_insurance_sum', 'grant_insurance_mean', 'uncovered_sum',
       'uncovered_mean', 'uncovered_percent', 'white_percent',
       'rh_household_pct', 'households_2000_owner_occupied_pct',
       'households_2000_owner_occupied_black_pct'],
      dtype='object')

### Load Census block group data from IPUMS
Compare uncovered damage vs. income level/poverty level (we would use 2000 Census data – “what did it look like before you got the grant”, a pre-flood view) GeoIDs for the blocks are all 2000.



In [8]:
df_census = pd.read_csv('raw data/nhgis0008_csv/nhgis0008_ds152_2000_blck_grp.csv')

In [9]:
len(df_census) # should be 3509

3509

### Limit to just census block groups in Orleans parish

In [11]:
df_census.columns

Index(['GISJOIN', 'YEAR', 'STATE', 'STATEA', 'COUNTY', 'COUNTYA', 'CTY_SUBA',
       'PLACEA', 'TRACTA', 'BLCK_GRPA', 'AIANHHA', 'URBRURALA', 'NAME',
       'HF5001', 'HF5002', 'HF5003', 'HF5004', 'HF5005', 'HF5006', 'HF5007',
       'HF5008', 'HF5009', 'HF5010', 'HF5011', 'HF5012', 'HF5013', 'HF5014',
       'HF5015', 'HF5016', 'HF6001', 'HHE001', 'HHE002', 'HHS001', 'HHS002'],
      dtype='object')

In [14]:
df_census.COUNTY.value_counts()[:10]

Orleans             485
Jefferson           357
East Baton Rouge    306
Caddo               210
Calcasieu           133
Lafayette           131
Ouachita            126
St Tammany          101
Rapides              96
Terrebonne           80
Name: COUNTY, dtype: int64

In [15]:
df_orleans = df_census[df_census.COUNTY == "Orleans"]

In [16]:
len(df_orleans)

485

In [92]:
df_orleans.BLCK_GRPA.value_counts()

1    176
2    158
3     95
4     42
5      9
6      4
7      1
Name: BLCK_GRPA, dtype: int64

In [17]:
df_orleans.head()

Unnamed: 0,GISJOIN,YEAR,STATE,STATEA,COUNTY,COUNTYA,CTY_SUBA,PLACEA,TRACTA,BLCK_GRPA,AIANHHA,URBRURALA,NAME,HF5001,HF5002,HF5003,HF5004,HF5005,HF5006,HF5007,HF5008,HF5009,HF5010,HF5011,HF5012,HF5013,HF5014,HF5015,HF5016,HF6001,HHE001,HHE002,HHS001,HHS002
1931,G22007100001001,2000,Louisiana,22,Orleans,71,,,100,1,,,Block Group 1,43,17,18,13,18,42,50,3,36,9,27,36,16,8,9,6,37269,294,679,58,293
1932,G22007100001002,2000,Louisiana,22,Orleans,71,,,100,2,,,Block Group 2,21,54,41,23,28,19,25,22,42,42,24,3,13,0,7,12,35278,86,782,43,333
1933,G22007100001003,2000,Louisiana,22,Orleans,71,,,100,3,,,Block Group 3,28,15,16,35,14,3,35,38,3,43,46,43,13,15,0,6,44185,37,531,28,325
1934,G22007100002001,2000,Louisiana,22,Orleans,71,,,200,1,,,Block Group 1,80,29,34,53,37,18,9,6,11,0,22,17,0,0,8,0,21979,372,381,130,194
1935,G22007100002002,2000,Louisiana,22,Orleans,71,,,200,2,,,Block Group 2,80,18,28,21,25,0,4,5,0,8,8,4,0,0,0,0,15298,331,379,76,125


### Convert GISJOIN to GEOID
- Remove G
- Remove zero in third/4th digit  
- Remove 6th/8th digit (0)   
- 12 digits is block group code

In [21]:
len('G22007100001001')

15

In [20]:
len("220710033042")

12

In [31]:
df_orleans.GISJOIN 

1931    G22007100001001
1932    G22007100001002
1933    G22007100001003
1934    G22007100002001
1935    G22007100002002
             ...       
2411    G22007100133012
2412    G22007100133013
2413    G22007100133014
2414    G22007100133021
2415    G22007100133022
Name: GISJOIN, Length: 485, dtype: object

In [41]:
df_orleans["GEOID"] = df_orleans.GISJOIN.str[1:3] + df_orleans.GISJOIN.str[4:7] + df_orleans.GISJOIN.str[8:] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_orleans["GEOID"] = df_orleans.GISJOIN.str[1:3] + df_orleans.GISJOIN.str[4:7] + df_orleans.GISJOIN.str[8:]


### Join two datasets

In [43]:
len(df_orleans.GEOID.unique()) ## Good, 485 unique rows

485

In [49]:
df_flood['GEOID'] = df_flood.GEOID.astype("str") # convert to string

In [51]:
df_merged = df_flood.merge(df_orleans, left_on="GEOID", right_on="GEOID")

In [52]:
len(df_merged)

485

### Rename columns

In [54]:
df_merged.columns

Index(['BKGPIDFP00', 'geometry', 'GEOID', 'compensation_sum',
       'additional_compensation_sum', 'elevation_sum', 'mitigation_sum',
       'all_grants_sum', 'all_grants_count', 'value_sum', 'damage_sum',
       'insurance_sum', 'Total', 'White', 'Black', 'Hispanic', 'Indigenous',
       'Asian', 'NHOPI', 'Other', 'Multiracial', 'households_2000_total',
       'households_2000_owner_occupied',
       'households_2000_owner_occupied_black', 'damage_mean',
       'grant_insurance_sum', 'grant_insurance_mean', 'uncovered_sum',
       'uncovered_mean', 'uncovered_percent', 'white_percent',
       'rh_household_pct', 'households_2000_owner_occupied_pct',
       'households_2000_owner_occupied_black_pct', 'GISJOIN', 'YEAR', 'STATE',
       'STATEA', 'COUNTY', 'COUNTYA', 'CTY_SUBA', 'PLACEA', 'TRACTA',
       'BLCK_GRPA', 'AIANHHA', 'URBRURALA', 'NAME', 'HF5001', 'HF5002',
       'HF5003', 'HF5004', 'HF5005', 'HF5006', 'HF5007', 'HF5008', 'HF5009',
       'HF5010', 'HF5011', 'HF5012', 'HF50

In [56]:
df_merged.rename(columns={"HF5001":"lt_10000",
"HF5002":"10000_1499",
"HF5003":"15000_19999",
"HF5004":"20000_24,999",
"HF5005":"25000_29999",
"HF5006":"30000_34999",
"HF5007":"35000_39999",
"HF5008":"40000_44999",
"HF5009":"45000_49999",
"HF5010":"50000_59999",
"HF5011":"60000_74999",
"HF5012":"75000_99999",
"HF5013":"100000_124999",
"HF5014":"125000_149999",
"HF5015":"150000_199999",
"HF5016":"gte_200000",
"HF6001":"median_income",
"HHE001":"below_poverty_level",
"HHE002":"at_or_below_poverty_level"}, inplace=True)

In [53]:
## Save joined data!
df_merged.to_csv('processed data/rh_blocks_nola_flood_GT3_w_income_poverty.csv')