# Demographic and economic data #2

In [1]:
# %load nb-imports.py
import geopandas as gpd
import pandas as pd
import numpy as np

import cawc
from cawc import datasets as ds

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
PATH = cawc.PATH_DATA / 'sec-2016-1'

data = pd.read_csv(PATH / 'ACS_16_5YR_DP03.csv', skiprows=[1])
data

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_VC11,HC02_VC11,HC03_VC11,HC04_VC11,HC01_VC12,HC02_VC12,HC03_VC12,...,HC03_VC178,HC04_VC178,HC01_VC179,HC02_VC179,HC03_VC179,HC04_VC179,HC01_VC180,HC02_VC180,HC03_VC180,HC04_VC180
0,1400000US06001400100,6001400100,"Census Tract 4001, Alameda County, California",1643,168,1643,,,,4.6,...,2.2,2.4,,,2.5,2.3,,,8.3,5.5
1,1400000US06001400200,6001400200,"Census Tract 4002, Alameda County, California",1270,95,1270,,,,3.2,...,9.2,7.6,,,0.8,1.3,,,13.6,6.8
2,1400000US06001400300,6001400300,"Census Tract 4003, Alameda County, California",3402,352,3402,,,,6.0,...,12.8,11.9,,,3.5,4.8,,,16.5,6.6
3,1400000US06001400400,6001400400,"Census Tract 4004, Alameda County, California",2678,228,2678,,,,4.3,...,5.9,5.2,,,0.7,1.1,,,14.8,6.2
4,1400000US06001400500,6001400500,"Census Tract 4005, Alameda County, California",2545,249,2545,,,,3.2,...,4.1,6.2,,,4.4,4.5,,,14.0,5.7
5,1400000US06001400600,6001400600,"Census Tract 4006, Alameda County, California",1116,113,1116,,,,9.3,...,5.7,5.8,,,4.5,5.4,,,20.9,11.0
6,1400000US06001400700,6001400700,"Census Tract 4007, Alameda County, California",2712,273,2712,,,,8.4,...,14.6,7.9,,,14.5,9.6,,,24.1,6.0
7,1400000US06001400800,6001400800,"Census Tract 4008, Alameda County, California",2290,253,2290,,,,8.9,...,18.0,9.7,,,5.5,5.5,,,17.7,6.4
8,1400000US06001400900,6001400900,"Census Tract 4009, Alameda County, California",1435,171,1435,,,,4.3,...,13.9,12.0,,,10.5,8.5,,,25.0,10.2
9,1400000US06001401000,6001401000,"Census Tract 4010, Alameda County, California",3954,441,3954,,,,8.4,...,22.0,13.4,,,22.1,10.8,,,25.6,8.5


As in the previous `demo` notebook, after manually checking the correspondence between the column labels containing the desired observables, we select a table from the overall data.

It's interesting to note that the column label indices seem to be consistent regardless of the selections and modifications done interactively on the FactFinder portal (even if the corresponding column is not present in the exported CSV file).

In [3]:
COLUMNS = {
    'GEO.id2': 'geoid',
    'HC01_VC85': 'mean_household_income',
    'HC01_VC86': 'median_household_income',
}

def get_data_subset(df, col_map):
    cols_to_keep = list(col_map.keys())
    print(cols_to_keep)
    return df[cols_to_keep].rename(columns=col_map)

In [4]:
data.pipe(get_data_subset, COLUMNS)

['GEO.id2', 'HC01_VC85', 'HC01_VC86']


Unnamed: 0,geoid,mean_household_income,median_household_income
0,6001400100,177417.0,247448.0
1,6001400200,153125.0,188486.0
2,6001400300,85313.0,135449.0
3,6001400400,99539.0,127642.0
4,6001400500,83650.0,92682.0
5,6001400600,61597.0,91276.0
6,6001400700,46424.0,70191.0
7,6001400800,61111.0,77066.0
8,6001400900,55691.0,83432.0
9,6001401000,51073.0,68001.0


In [5]:
(ds.DEMO_ECON().set_index('geoid')
 .join(ds.DEMO_ECON_2().set_index('geoid'))
)

[dataset `sea2016-A`]: getting from loader...
[dataset `sea2016-A`]: processing...
[dataset `sea2016-B`]: getting from loader...
[dataset `sea2016-B`]: processing...


Unnamed: 0_level_0,label,rate_unemployed,rate_underpovlimit_families,rate_no_health_insurance_unemployed,mean_household_income,median_household_income
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
06001400100,"Census Tract 4001, Alameda County, California",4.6,2.5,13.9,177417.0,247448.0
06001400200,"Census Tract 4002, Alameda County, California",3.2,0.8,7.2,153125.0,188486.0
06001400300,"Census Tract 4003, Alameda County, California",6.0,3.5,6.6,85313.0,135449.0
06001400400,"Census Tract 4004, Alameda County, California",4.3,0.7,7.3,99539.0,127642.0
06001400500,"Census Tract 4005, Alameda County, California",3.2,4.4,16.2,83650.0,92682.0
06001400600,"Census Tract 4006, Alameda County, California",9.3,4.5,14.2,61597.0,91276.0
06001400700,"Census Tract 4007, Alameda County, California",8.4,14.5,15.3,46424.0,70191.0
06001400800,"Census Tract 4008, Alameda County, California",8.9,5.5,18.8,61111.0,77066.0
06001400900,"Census Tract 4009, Alameda County, California",4.3,10.5,4.2,55691.0,83432.0
06001401000,"Census Tract 4010, Alameda County, California",8.4,22.1,17.5,51073.0,68001.0


In [6]:
ds.DEMO_ECON_3()

[dataset `sea2016`]: getting from loader...
[dataset `sea2016-A`]: getting from loader...
[dataset `sea2016-A`]: processing...
[dataset `sea2016-B`]: getting from loader...
[dataset `sea2016-B`]: processing...
[dataset `sea2016`]: processing...


Unnamed: 0,geoid,label,rate_unemployed,rate_underpovlimit_families,rate_no_health_insurance_unemployed,mean_household_income,median_household_income
0,06001400100,"Census Tract 4001, Alameda County, California",4.6,2.5,13.9,177417.0,247448.0
1,06001400200,"Census Tract 4002, Alameda County, California",3.2,0.8,7.2,153125.0,188486.0
2,06001400300,"Census Tract 4003, Alameda County, California",6.0,3.5,6.6,85313.0,135449.0
3,06001400400,"Census Tract 4004, Alameda County, California",4.3,0.7,7.3,99539.0,127642.0
4,06001400500,"Census Tract 4005, Alameda County, California",3.2,4.4,16.2,83650.0,92682.0
5,06001400600,"Census Tract 4006, Alameda County, California",9.3,4.5,14.2,61597.0,91276.0
6,06001400700,"Census Tract 4007, Alameda County, California",8.4,14.5,15.3,46424.0,70191.0
7,06001400800,"Census Tract 4008, Alameda County, California",8.9,5.5,18.8,61111.0,77066.0
8,06001400900,"Census Tract 4009, Alameda County, California",4.3,10.5,4.2,55691.0,83432.0
9,06001401000,"Census Tract 4010, Alameda County, California",8.4,22.1,17.5,51073.0,68001.0
