In [2]:
import pandas as pd
import os

from util.census_api import CensusApi

api_key = os.getenv('CENSUS_KEY','no variable found')
print(f'API Key: {api_key}')

county_ids = [53033,53035,53053,53061]
state_id = 53

API Key: 1b373fe94cdee2c96c20b3c1f14ed6b1d6b92562


In [3]:
c = CensusApi(api_key)

In [4]:
race_cols = ['hispanic','white_nh','black_nh','aian_nh','asian_pac_nh','other_nh']

In [36]:
# load 1990 data from ipums
df90 = pd.read_csv('data/nhgis0017_ds120_1990_tract.csv')


df90['hispanic'] = df90[['ET2006','ET2007','ET2008','ET2009','ET2010']].sum(axis=1)

df90 = df90.rename(columns = {
    'ET1001':'total_population',
    'ET2001':'white_nh',
    'ET2002':'black_nh',
    'ET2003':'aian_nh',
    'ET2004':'asian_pac_nh',
    'ET2005':'other_nh'
})

df90 = df90[['GISJOIN','total_population'] + race_cols]

# check totals
print(f"total population: {int(df90['total_population'].sum()):,}")
print(f"race col total: {int(df90[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 4,866,692
race col total: 4,866,692
If these two numbers match, then the race variables were summed correctly.


In [37]:
df90.loc[df90['GISJOIN'] == 'G5300330024901']

Unnamed: 0,GISJOIN,total_population,hispanic,white_nh,black_nh,aian_nh,asian_pac_nh,other_nh
431,G5300330024901,5208,109,4271,73,13,738,4


In [38]:
xwalk90 = (
    pd.read_csv('xwalks/nhgis_tr1990_tr2010_53.csv')
)

In [39]:
xwalk90.loc[xwalk90['tr1990gj'] == 'G5300330024901']

Unnamed: 0,tr1990gj,tr1990ge,tr2010gj,tr2010ge,parea,wt_pop,wt_adult,wt_fam,wt_hh,wt_hu,wt_ownhu,wt_renthu
846,G5300330024901,53033020000.0,G5300330024901,53033024901,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [40]:
xwalk90 = xwalk90.merge(df90, left_on='tr1990gj', right_on='GISJOIN', how='left')

In [41]:
xwalk90.loc[xwalk90['tr1990gj'] == 'G5300330024901']

Unnamed: 0,tr1990gj,tr1990ge,tr2010gj,tr2010ge,parea,wt_pop,wt_adult,wt_fam,wt_hh,wt_hu,wt_ownhu,wt_renthu,GISJOIN,total_population,hispanic,white_nh,black_nh,aian_nh,asian_pac_nh,other_nh
846,G5300330024901,53033020000.0,G5300330024901,53033024901,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,G5300330024901,5208.0,109.0,4271.0,73.0,13.0,738.0,4.0


In [42]:
for col in race_cols + ['total_population']:
    xwalk90[col] = xwalk90[col] * xwalk90['wt_pop']

In [46]:
xwalk90 = xwalk90[race_cols + ['total_population', 'tr2010ge']].groupby('tr2010ge').sum()

In [48]:
xwalk90 = xwalk90.round(0)

In [50]:
xwalk90['total_population'].sum()

np.float64(4866698.0)

In [51]:
xwalk90[race_cols].sum().sum()

np.float64(4866683.0)

In [7]:
# 2000 decennial data
variables_dict = {
    'total_population': ['P001001'],
    'hispanic':['P004002'],
    'white_nh':['P004005'],
    'black_nh':['P004006'],
    'aian_nh':['P004007'],
    'asian_pac_nh':['P004008','P004009'],
    'other_nh':['P004010','P004011']
}

d00 = c.get_dec_data(variables_dict, 2000,'tract','sf1', county_ids, state_id)

# check totals
print(f"total population: {int(d00['total_population'].sum()):,}")
print(f"race col total: {int(d00[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 3,275,847
race col total: 3,275,847
If these two numbers match, then the race variables were summed correctly.


In [6]:
# 2010 decennial data
variables_dict = {
    'total_population': ['P001001'],
    'hispanic':['P009002'],
    'white_nh':['P009005'],
    'black_nh':['P009006'],
    'aian_nh':['P009007'],
    'asian_pac_nh':['P009008','P009009'],
    'other_nh':['P009010','P009011']
}

df10 = c.get_dec_data(variables_dict, 2010,'tract','sf1', county_ids, state_id)

# check totals
print(f"total population: {int(df10['total_population'].sum()):,}")
print(f"race col total: {int(df10[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 3,690,942
race col total: 3,690,942
If these two numbers match, then the race variables were summed correctly.


In [4]:
# 2020 decennial data
variables_dict = {
    'total_population': ['P1_001N'],
    'hispanic':['P9_002N'],
    'white_nh':['P9_005N'],
    'black_nh':['P9_006N'],
    'aian_nh':['P9_007N'],
    'asian_pac_nh':['P9_008N','P9_009N'],
    'other_nh':['P9_010N','P9_011N']
}

df20 = c.get_dec_data(variables_dict, 2020,'tract','dhc', county_ids, state_id)

# check totals
print(f"total population: {int(df20['total_population'].sum()):,}")
print(f"race col total: {int(df20[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 4,294,373
race col total: 4,294,373
If these two numbers match, then the race variables were summed correctly.
