In [1]:
import geopandas as gpd
import pandas as pd
import pygris
import censusdis.data as ced
from censusdis import states

from data.constants import LOCAL_CRS, WORLD_CRS, COMM_AREA_TABLE

In [2]:
tract_file_out = "../data/raw/tracts.geojson"
comm_file_out = "../data/raw/communities.geojson"

# Pipeline in

(none)

# Census Tracts

(For uber stops which are anonymized to tract or community area)

In [3]:
tracts = pygris.tracts(state='IL', county='cook', cb=True, year=2020, cache=False)
tracts = tracts[['GEOID','geometry']].rename(columns={'GEOID':'geoid10'})
tracts = tracts.to_crs(WORLD_CRS)
tracts['geoid10'] = pd.to_numeric(tracts['geoid10'])
tracts['centroid'] = tracts['geometry'].to_crs(LOCAL_CRS).centroid.to_crs(WORLD_CRS).to_wkt()

Using FIPS code '17' for input 'IL'
Using FIPS code '031' for input 'cook'


## Tract Demographics

In [4]:
DATASET = "acs/acs5"
YEAR = 2020
VARIABLES = {"NAME": "CENSUS_ROW",
             "B01003_001E": "TOTAL_POPULATION",
             "B19013_001E": "MEDIAN_HOUSEHOLD_INCOME"}

demographics = ced.download(
    DATASET,
    YEAR,
    VARIABLES.keys(),
    state=states.IL,
    county="031", # cook county
    tract="*",
    with_geometry=True,
    with_geometry_columns=True
)
demographics = (demographics
        .rename(columns=VARIABLES)
        .dropna(subset=['GEOID'])
        .assign(geoid10 = lambda x: x['GEOID'].astype(int))
        .filter(['geoid10'] + list(VARIABLES.values())))

tracts = (tracts
          .merge(demographics, how='left', on='geoid10')
          .drop(columns=['CENSUS_ROW']))


# Chicago Community Areas

In [5]:
comm_points = gpd.read_file(COMM_AREA_TABLE).filter(['area_num_1','geometry']).drop_duplicates()
comm_points['centroid'] = comm_points['geometry'].to_crs(LOCAL_CRS).centroid.to_crs(WORLD_CRS).to_wkt()

# Pipeline out

In [6]:
comm_points.to_file(comm_file_out, index=False)
tracts.to_file(tract_file_out, index=False)