In [None]:
import pandas as pd
import os

from util.census_api import CensusApi

api_key = os.getenv('CENSUS_KEY','no variable found')
print(f'API Key: {api_key}')

county_ids = [53033,53035,53053,53061]
state_id = 53

API Key: 1b373fe94cdee2c96c20b3c1f14ed6b1d6b92562


In [2]:
c = CensusApi(api_key)

In [3]:
race_cols = ['hispanic','white_nh','black_nh','aian_nh','asian_pac_nh','other_nh']

In [None]:
# load 1990 data from ipums
df90 = pd.read_csv('data/nhgis0017_ds120_1990_tract.csv')
df90['county_id'] = (df90['STATEA'].astype(str) + df90['COUNTYA'].astype(str).str.zfill(3)).astype(int)


df90['hispanic'] = df90[['ET2006','ET2007','ET2008','ET2009','ET2010']].sum(axis=1)

df90 = df90.rename(columns = {
    'ET1001':'total_population',
    'ET2001':'white_nh',
    'ET2002':'black_nh',
    'ET2003':'aian_nh',
    'ET2004':'asian_pac_nh',
    'ET2005':'other_nh'
})

df90 = df90[['GISJOIN','county_id','total_population'] + race_cols]
df90 = df90.loc[df90['county_id'].isin(county_ids)].copy()

# check totals
print(f"total population: {int(df90['total_population'].sum()):,}")
print(f"race col total: {int(df90[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 2,748,895
race col total: 2,748,895
If these two numbers match, then the race variables were summed correctly.


In [None]:
xwalk90 = pd.read_csv('xwalks/nhgis_tr1990_tr2010_53.csv',dtype={'tr1990ge':'int64'})

ValueError: Integer column has NA values in column 1

In [23]:
df90.dtypes

GISJOIN             object
county_id            int64
total_population     int64
hispanic             int64
white_nh             int64
black_nh             int64
aian_nh              int64
asian_pac_nh         int64
other_nh             int64
dtype: object

In [25]:
xwalk90.loc[xwalk90['tr1990gj']=='G5300090980799']

Unnamed: 0,tr1990gj,tr1990ge,tr2010gj,tr2010ge,parea,wt_pop,wt_adult,wt_fam,wt_hh,wt_hu,wt_ownhu,wt_renthu
140,G5300090980799,53009980000.0,G5300090000700,53009000700,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [21]:
xwalk90.merge(df90, left_on='tr1990gj', right_on='GISJOIN', how='left')

Unnamed: 0,tr1990gj,tr1990ge,tr2010gj,tr2010ge,parea,wt_pop,wt_adult,wt_fam,wt_hh,wt_hu,...,wt_renthu,GISJOIN,county_id,total_population,hispanic,white_nh,black_nh,aian_nh,asian_pac_nh,other_nh
0,G41005509501,410559501.0,G5300390950200,53039950200,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,,,,,,,,,
1,G53000109501,530019501.0,G5300010950100,53001950100,0.989438,0.999851,0.999798,0.999730,0.999813,0.999684,...,1.000000,,,,,,,,,
2,G53000109501,530019501.0,G5300010950200,53001950200,0.010562,0.000149,0.000202,0.000270,0.000187,0.000316,...,0.000000,,,,,,,,,
3,G53000109502,530019502.0,G5300010950200,53001950200,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,,,,,,,,,
4,G53000109503,530019503.0,G5300010950300,53001950300,0.999393,0.960932,0.964623,0.964789,0.964549,0.963924,...,0.891967,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2438,,,G5300490990100,53049990100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,,,,,,,,,
2439,,,G5300550990100,53055990100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,,,,,,,,,
2440,,,G5300570990100,53057990100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,,,,,,,,,
2441,,,G5300610990002,53061990002,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,,,,,,,,,


In [7]:
# 2000 decennial data
variables_dict = {
    'total_population': ['P001001'],
    'hispanic':['P004002'],
    'white_nh':['P004005'],
    'black_nh':['P004006'],
    'aian_nh':['P004007'],
    'asian_pac_nh':['P004008','P004009'],
    'other_nh':['P004010','P004011']
}

d00 = c.get_dec_data(variables_dict, 2000,'tract','sf1', county_ids, state_id)

# check totals
print(f"total population: {int(d00['total_population'].sum()):,}")
print(f"race col total: {int(d00[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 3,275,847
race col total: 3,275,847
If these two numbers match, then the race variables were summed correctly.


In [6]:
# 2010 decennial data
variables_dict = {
    'total_population': ['P001001'],
    'hispanic':['P009002'],
    'white_nh':['P009005'],
    'black_nh':['P009006'],
    'aian_nh':['P009007'],
    'asian_pac_nh':['P009008','P009009'],
    'other_nh':['P009010','P009011']
}

df10 = c.get_dec_data(variables_dict, 2010,'tract','sf1', county_ids, state_id)

# check totals
print(f"total population: {int(df10['total_population'].sum()):,}")
print(f"race col total: {int(df10[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 3,690,942
race col total: 3,690,942
If these two numbers match, then the race variables were summed correctly.


In [4]:
# 2020 decennial data
variables_dict = {
    'total_population': ['P1_001N'],
    'hispanic':['P9_002N'],
    'white_nh':['P9_005N'],
    'black_nh':['P9_006N'],
    'aian_nh':['P9_007N'],
    'asian_pac_nh':['P9_008N','P9_009N'],
    'other_nh':['P9_010N','P9_011N']
}

df20 = c.get_dec_data(variables_dict, 2020,'tract','dhc', county_ids, state_id)

# check totals
print(f"total population: {int(df20['total_population'].sum()):,}")
print(f"race col total: {int(df20[race_cols].sum().sum()):,}")
print("If these two numbers match, then the race variables were summed correctly.")

total population: 4,294,373
race col total: 4,294,373
If these two numbers match, then the race variables were summed correctly.
