# breadboard for census community resilience estimates
## 2023 estimates
## https://www.census.gov/programs-surveys/community-resilience-estimates/data/datasets.html

FIPS format (11 digits): SSCCCTTTTTT

SS = 2-digit state code
CCC = 3-digit county code
TTTTTT = 6-digit (census) tract code


__GEO_ID__ A geographic identifier which contains information on the type of geography and applicable FIPS codes  
__STATE__ State FIPS code  
__COUNTY__ County FIPS code*  
__TRACT__ Tract FIPS code*  
__NAME__ Geographic Area Name  
__GEO_LEVEL__ Geographic level*  
__WATER_TRACT__ Flag that denotes tracts composed completely of water, where no populations
reside*  
__POPUNI__ Total population (excludes adult correctional/juvenile facilities and college dorms)  
__PRED0_E__ Estimated number of individuals with zero components of social vulnerability  
__PRED0_M__ Estimated margin of error for individuals with zero components of social
vulnerability  
__PRED0_PE__ Rate of individuals with zero components of social vulnerability  
__PRED0_PM__ Rate margin of error for individuals with zero components of social vulnerability  
__PRED12_E__ Estimated number of individuals with one-two components of social vulnerability  
__PRED12_M__ Estimated margin of error for individuals with one-two components of social
vulnerability  
__PRED12_PE__ Rate of individuals with one-two components of social vulnerability  
__PRED12_PM__ Rate margin of error for individuals with one-two components of social
vulnerability  
__PRED3_E__ Estimated number of individuals with three plus components of social vulnerability  
__PRED3_M__ Estimated margin of error for individuals with three plus components of social
vulnerability  
__PRED3_PE__ Rate of individuals with three plus components of social vulnerability  
__PRED3_PM__ Rate margin of error for individuals with three plus components of social
vulnerability  


keep list    
- GEO_ID
- GEO_LEVEL
- WATER_TRACT
- POPUNI
- PRED12_PE
- PRED3_PE


In [None]:
# import libraries
# NOTE: global_vars should be edited to include local paths and credentials before use.
# If global_vars.py is created in the root dir remove the ignore/ prefix in the import statement below.
import ignore.global_vars as gv
import db_tools as dbt
import pandas as pd



In [None]:
# NOTE: Set to True if a full rebuild is required, set to False to skip table builds. Search 'REBUILD_TABLE' to see which sections are effected.
REBUILD = False

In [2]:
df = pd.read_csv(gv.DATA_PATHS["census_resilience"], encoding='latin-1')
df.sample(5)

Unnamed: 0,GEO_ID,STATE,COUNTY,TRACT,NAME,GEO_LEVEL,WATER_TRACT,POPUNI,PRED0_E,PRED0_M,PRED0_PE,PRED0_PM,PRED12_E,PRED12_M,PRED12_PE,PRED12_PM,PRED3_E,PRED3_M,PRED3_PE,PRED3_PM
21617,1400000US13051011004,13,51,11004,"Census Tract 110.04, Chatham County, Georgia",Tract,,3695,1523,467,41.22,12.64,1760,469,47.63,12.69,412,355,11.15,9.61
16899,1400000US12017451603,12,17,451603,"Census Tract 4516.03, Citrus County, Florida",Tract,,2244,457,229,20.37,10.2,1251,267,55.75,11.9,536,249,23.89,11.1
84509,0500000US02275,2,275,0,"Wrangell City and Borough, Alaska",County,,2064,507,173,24.56,8.38,1015,203,49.18,9.84,542,180,26.26,8.72
4827,1400000US06013355111,6,13,355111,"Census Tract 3551.11, Contra Costa County, Cal...",Tract,,6686,1784,734,26.68,10.98,3461,818,51.76,12.23,1441,663,21.55,9.92
1868,1400000US04013071516,4,13,71516,"Census Tract 715.16, Maricopa County, Arizona",Tract,,4761,715,633,15.02,13.3,2392,712,50.24,14.95,1654,642,34.74,13.48


In [3]:
print(df.shape)
print(df.info())

(87611, 20)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87611 entries, 0 to 87610
Data columns (total 20 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   GEO_ID       87611 non-null  object 
 1   STATE        87611 non-null  int64  
 2   COUNTY       87611 non-null  int64  
 3   TRACT        87611 non-null  int64  
 4   NAME         87611 non-null  object 
 5   GEO_LEVEL    87611 non-null  object 
 6   WATER_TRACT  320 non-null    float64
 7   POPUNI       87611 non-null  int64  
 8   PRED0_E      87611 non-null  int64  
 9   PRED0_M      87611 non-null  int64  
 10  PRED0_PE     87611 non-null  float64
 11  PRED0_PM     87611 non-null  float64
 12  PRED12_E     87611 non-null  int64  
 13  PRED12_M     87611 non-null  int64  
 14  PRED12_PE    87611 non-null  float64
 15  PRED12_PM    87611 non-null  float64
 16  PRED3_E      87611 non-null  int64  
 17  PRED3_M      87611 non-null  int64  
 18  PRED3_PE     87611 non-null  float

In [4]:
# Check TRACT field lengths for conversion
df = df.copy()
df['TRACT'] = df['TRACT'].astype(str)
df['TRACT'].str.len().describe()

count    87611.000000
mean         4.972298
std          1.172546
min          1.000000
25%          4.000000
50%          5.000000
75%          6.000000
max          6.000000
Name: TRACT, dtype: float64

In [5]:
df['GEO_LEVEL'].unique()

array(['Tract', 'County', 'State', 'US'], dtype=object)

In [6]:
# check if the GEO_LEVEL data is heirarchical.
us_count = (df['GEO_LEVEL'] == 'US').sum()
state_count = (df['GEO_LEVEL'] == 'State').sum()
county_count = (df['GEO_LEVEL'] == 'County').sum()
tract_count = (df['GEO_LEVEL'] == 'Tract').sum()
print(us_count)
print(state_count)
print(county_count)
print(tract_count)

1
51
3144
84415


In [7]:
# verify no useful FIPS codes in non-tract rows
# describe statement should show no >= 2 digit values
df_not_tract = df[df['GEO_LEVEL'] != 'Tract'].copy()
df_not_tract['TRACT'] = df_not_tract['TRACT'].astype(str)
df_not_tract = df_not_tract.reset_index(drop=True)
df_not_tract['TRACT'].str.len().describe()

count    3196.0
mean        1.0
std         0.0
min         1.0
25%         1.0
50%         1.0
75%         1.0
max         1.0
Name: TRACT, dtype: float64

In [8]:
df[df['GEO_LEVEL'] == 'County'].head()

Unnamed: 0,GEO_ID,STATE,COUNTY,TRACT,NAME,GEO_LEVEL,WATER_TRACT,POPUNI,PRED0_E,PRED0_M,PRED0_PE,PRED0_PM,PRED12_E,PRED12_M,PRED12_PE,PRED12_PM,PRED3_E,PRED3_M,PRED3_PE,PRED3_PM
84415,0500000US01001,1,1,0,"Autauga County, Alabama",County,,60183,23534,2790,39.1,4.64,24255,2851,40.3,4.74,12394,2306,20.59,3.83
84416,0500000US01003,1,3,0,"Baldwin County, Alabama",County,,251949,91386,7887,36.27,3.13,107840,8199,42.8,3.25,52723,6888,20.93,2.73
84417,0500000US01005,1,5,0,"Barbour County, Alabama",County,,22085,4988,1080,22.59,4.89,9758,1204,44.18,5.45,7339,1123,33.23,5.08
84418,0500000US01007,1,7,0,"Bibb County, Alabama",County,,20223,6880,1051,34.02,5.2,8050,1109,39.81,5.48,5293,967,26.17,4.78
84419,0500000US01009,1,9,0,"Blount County, Alabama",County,,59729,18973,2245,31.77,3.76,26090,2367,43.68,3.96,14666,2036,24.55,3.41


In [9]:
# Looking at the data there are missing leading 0 in the TRACT field, instead of concatonating
# filter so only GEO_LEVEL == 'County' remain and strip everything except the last 5 digits from GEO_ID
# into a new column County_fips
df = df.copy()
df = df[df['GEO_LEVEL'] == 'County']
df = df.reset_index(drop=True)
df['County_fips'] = df['GEO_ID'].str[-5:]
df.head()


Unnamed: 0,GEO_ID,STATE,COUNTY,TRACT,NAME,GEO_LEVEL,WATER_TRACT,POPUNI,PRED0_E,PRED0_M,...,PRED0_PM,PRED12_E,PRED12_M,PRED12_PE,PRED12_PM,PRED3_E,PRED3_M,PRED3_PE,PRED3_PM,County_fips
0,0500000US01001,1,1,0,"Autauga County, Alabama",County,,60183,23534,2790,...,4.64,24255,2851,40.3,4.74,12394,2306,20.59,3.83,1001
1,0500000US01003,1,3,0,"Baldwin County, Alabama",County,,251949,91386,7887,...,3.13,107840,8199,42.8,3.25,52723,6888,20.93,2.73,1003
2,0500000US01005,1,5,0,"Barbour County, Alabama",County,,22085,4988,1080,...,4.89,9758,1204,44.18,5.45,7339,1123,33.23,5.08,1005
3,0500000US01007,1,7,0,"Bibb County, Alabama",County,,20223,6880,1051,...,5.2,8050,1109,39.81,5.48,5293,967,26.17,4.78,1007
4,0500000US01009,1,9,0,"Blount County, Alabama",County,,59729,18973,2245,...,3.76,26090,2367,43.68,3.96,14666,2036,24.55,3.41,1009


In [10]:
'''
Drop all columns except:
- County_fips
- GEO_ID
- GEO_LEVEL
- WATER_TRACT
- POPUNI
- PRED12_PE
- PRED3_PE
'''
df = df[['County_fips', 'GEO_ID', 'GEO_LEVEL', 'WATER_TRACT', 'POPUNI', 'PRED12_PE', 'PRED3_PE']]
df.head()

Unnamed: 0,County_fips,GEO_ID,GEO_LEVEL,WATER_TRACT,POPUNI,PRED12_PE,PRED3_PE
0,1001,0500000US01001,County,,60183,40.3,20.59
1,1003,0500000US01003,County,,251949,42.8,20.93
2,1005,0500000US01005,County,,22085,44.18,33.23
3,1007,0500000US01007,County,,20223,39.81,26.17
4,1009,0500000US01009,County,,59729,43.68,24.55


In [11]:
# Check memory usage and col types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3144 entries, 0 to 3143
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   County_fips  3144 non-null   object 
 1   GEO_ID       3144 non-null   object 
 2   GEO_LEVEL    3144 non-null   object 
 3   WATER_TRACT  0 non-null      float64
 4   POPUNI       3144 non-null   int64  
 5   PRED12_PE    3144 non-null   float64
 6   PRED3_PE     3144 non-null   float64
dtypes: float64(3), int64(1), object(3)
memory usage: 172.1+ KB


In [None]:
# load to db
if REBUILD:
    dbt.load_data(df, 'census_resilience', if_exists='replace')
else: 
    print("rebuild skipped")

Created SQLAlchemy engine for disaster_db


Data loaded successfully into census_resilience
