In [1]:
import os

from census import Census
from us import states

import pandas as pd
from tqdm import tqdm

tqdm.pandas()

c = Census(os.environ['US_CENSUS_API_KEY'])
places_df = pd.read_csv('cook-county-places.csv')
places_df.PLACEFP = places_df.PLACEFP.apply(lambda x: str(x).zfill(5))

In [2]:
# B25001_001E: total households (ACS5)
def get_census_stats(placefp):
    return c.acs5.state_place(('NAME', 'B25001_001E'), states.IL.fips, placefp, year=2016)

census_places_df = places_df.PLACEFP.progress_apply(get_census_stats)
census_places_df = census_places_df.apply(lambda x: x[0]).apply(pd.Series)

census_places_df.NAME = census_places_df.NAME.str.replace(', Illinois', '')

display(census_places_df.head())

100%|██████████| 146/146 [00:11<00:00, 13.61it/s]


Unnamed: 0,NAME,B25001_001E,state,place
0,Lansing village,12182.0,17,42028
1,Merrionette Park village,912.0,17,48554
2,Niles village,11900.0,17,53000
3,Northbrook village,13436.0,17,53481
4,North Riverside village,2778.0,17,54144


In [3]:
merged_places_df = places_df.merge(census_places_df, left_on=('PLACEFP',), right_on=('place',))

cols = ['STATEFP', 'PLACEFP', 'NAME_x', 'NAMELSAD', 'B25001_001E']
households_df = merged_places_df[cols].rename(columns={'NAME_x': 'NAME'})

In [4]:
purchases_df = pd.read_excel('taxpurchases.xlsx')
s2 = purchases_df['Towns'].dropna()
s1 = households_df.NAME.str.upper()

s1[~s1.isin(s2)]

25                CRETE
26            FRANKFORT
28           ROMEOVILLE
30      UNIVERSITY PARK
31            WOODRIDGE
32           HOMER GLEN
34               ITASCA
39          BENSENVILLE
47            DEERFIELD
48            DEER PARK
50          EAST DUNDEE
53                 GOLF
65            WOOD DALE
105             CHICAGO
109           OAK BROOK
138       HIGHLAND PARK
139          LONG GROVE
140          RIVERWOODS
143            LOCKPORT
144    FRANKFORT SQUARE
145         WILLOWBROOK
Name: NAME, dtype: object

In [6]:
left = purchases_df.iloc[:, :14]
right = households_df.copy()
right['Towns'] = right.NAME.str.upper()

output_df = left.merge(right, on='Towns')

In [15]:
cols = ['NAME', *range(2004, 2017)]

final_df = output_df[cols].set_index('NAME').transpose().fillna(0)

final_df.to_csv('final-annual.csv')