# Integrating Census API

https://www.census.gov/data/developers/guidance/api-user-guide.html

What Data are Available in the Census Data API?
Determine the dataset that holds the information for which you are searching. Then decide what information you want from this dataset. The Census Bureau has already made available many datasets and is adding more almost monthly for querying with the API tool. Currently, the most popular aggregate datasets to query for variables using the API are as follows:

American Community Survey (ACS) 
* 1 Year (2015, 2014, 2013, 2012, 2011), 
* 3 Year (2013, 2012), 
* 5 Year (2005-2009 and 2010-2014), 
* American Community Survey Supplemental (2014 -2015)

Economic Indicators Time Series
* Decennial Census (SF1/SF3) 2010, 2000, 1990
* Economic Census 2012, 2007, 2002
* County Business Patterns (CBP) and Nonemployer Statistics 2008-2014
* Population Estimates and Projections 2015, 2014, 2013, 2012
* Business Dynamics Statistics 1976-2014

# Census Data Tables Have Certain IDs attached to them:
https://www.census.gov/data/developers/guidance/api-user-guide.Query_Components.html

https://www2.census.gov/programs-surveys/acs/summary_file/2016/documentation/tech_docs/2016_SummaryFile_Tech_Doc.pdf

https://www.census.gov/programs-surveys/acs/technical-documentation/summary-file-documentation.html

See the link to see how you can access data about different categories of people.

In [1]:
from census import Census
from us import states
import pandas as pd
import time
import math

import sys
sys.path.append('../datasets/')
from censusdata import CensusData

from IPython.display import display

%load_ext autoreload
%autoreload 2


In [24]:
pd.set_option('display.max_columns', None) # This ensures we can view all the columns

# Specify state and county to download (select one)
loc_name, state_codes, county_codes = "balt_city", list([states.MD.fips]), list([510]) # Baltimore
# loc_name, state_codes, county_codes = "greater_balt", states.MD.fips, list([510, 5]) # Baltimore City and County
# loc_name, state_codes, county_codes = "maryland", states.MD.fips, None # All of Maryland
#loc_name, state_codes, county_codes = "delmarva", [states.MD.fips, states.DE.fips, states.VA.fips], None # All of DelMarVa

# CENSUS API Stuff
CENSUS_API = 'fe55211c8b3f0350fcb040c07321a129a3d6e266' # Your API key here
c = Census(CENSUS_API) # Initialize census class with API key

# Generate codes for census variables of interest
var_ids = []
var_ids.extend(["B19001_0{:02d}E".format(x) for x in range(2, 18)]) # Household income over 12 months
var_ids.extend(["B19037_0{:02d}E".format(x) for x in range(1, 70)]) # Household income over 12 months by age

# Local Storage Parameters
LOCAL_DATA_DIR = './data/'
ATTR_FILE_END = '_census_data.csv'
attr_outfile = LOCAL_DATA_DIR + loc_name + ATTR_FILE_END

In [50]:
def build_tract_fips(record):
    fips_code = record['state'] + record['county'] + record['tract']
    return str(fips_code)

def census_tracts_to_dataframe(var_list, state_codes, county_codes):
    CALL_LIM = 50 # Can only request 50 records at a time
    fips_codes = []
    all_records = []
    
    census_df = []
    census_dict = {}
    num_chunks = int(math.ceil(1.0 * len(var_list) / CALL_LIM))
    
    print(num_chunks)
    for state_id in state_codes:
        print("State: {0}".format(state_id))
        
        for county_id in county_codes:
            print("County: {0}".format(county_id))
            
            for chunk_num in range(0, num_chunks):
                census_data = []
                
                chunk_start = chunk_num * CALL_LIM
                chunk_end = (chunk_num + 1) * CALL_LIM
                if chunk_end > len(var_list): 
                    chunk_end = len(var_list)

                print("Chunk start: {0}".format(chunk_start))
                print("Chunk end: {0}".format(chunk_end))
                    
                var_sublist = var_list[chunk_start:chunk_end]

                # get the variables sublisted by np.array_split
#                 var_sublists = np.array_split(var_list, num_chunks)
#                 var_sublist = var_sublists[chunk_num].tolist()
                
#                 c.sf1.get('NAME', geo={'for': 'tract:*',
#                        'in': 'state:{} county:170'.format(states.AK.fips)})

                print('state:{0} county{1}'.format(state_id, county_id))
                census_data = c.acs.get(var_sublist, 
                                        {'for': 'tract:*', 
                                         'in': 'state:{0}'.format(state_id, county_id)})
                
                print("Got {0} records.".format(len(census_data)))
                
                for idx, record in enumerate(census_data):

                    # Build fips codes
                    fips_code = build_tract_fips(record)
                    #census_data[idx]["fips"] = fips_code
                    
                    # Eliminate original code components
                    key_list = ['state', 'county', 'tract']
                    for key in key_list:
                        if key in record: 
                            del record[key]
                    
                    if fips_code in census_dict:
                        census_dict[fips_code].update(record)
                    else:
                        census_dict[fips_code] = record

    census_df = pd.DataFrame(census_dict)
    census_df = census_df.transpose()
    census_df.index.name = "fips"
    
    return census_df

def build_bg_fips(record):
    fips_code = record['state'] + record['county'] + record['tract'] + record['block group']
    return str(fips_code)

def build_tract_fips(record):
    fips_code = record['state'] + record['county'] + record['tract']
    return str(fips_code)
def census_to_dataframe(var_list, state_code, county_codes):
    fips_codes = []
    all_records = []
    
    for county in county_codes: 
        print(county)
        print(state_code)
        census_data = c.acs.get(var_list, 
                                {'for': 'block group:*', 
                                 'in': 'state:{0} county:{1}'.format(state_code[0], county)})
        
        for idx, record in enumerate(census_data):
            # Build fips codes
            fips_code = build_bg_fips(record)
            census_data[idx]["fips"] = fips_code

            # Eliminate original code components
            key_list = ['state', 'county', 'tract', 'block group']
            for key in key_list:
                if key in census_data[idx]: 
                    del census_data[idx][key]
        
        all_records.extend(census_data)
        
    census_df = pd.DataFrame(all_records)
    census_df = census_df.set_index("fips")
                
    return census_df

In [51]:
print(var_ids, state_codes, county_codes)
print(type(var_ids))
print(type(var_ids[0]))
print(state_codes)
print(county_codes)

['B19001_002E', 'B19001_003E', 'B19001_004E', 'B19001_005E', 'B19001_006E', 'B19001_007E', 'B19001_008E', 'B19001_009E', 'B19001_010E', 'B19001_011E', 'B19001_012E', 'B19001_013E', 'B19001_014E', 'B19001_015E', 'B19001_016E', 'B19001_017E', 'B19037_001E', 'B19037_002E', 'B19037_003E', 'B19037_004E', 'B19037_005E', 'B19037_006E', 'B19037_007E', 'B19037_008E', 'B19037_009E', 'B19037_010E', 'B19037_011E', 'B19037_012E', 'B19037_013E', 'B19037_014E', 'B19037_015E', 'B19037_016E', 'B19037_017E', 'B19037_018E', 'B19037_019E', 'B19037_020E', 'B19037_021E', 'B19037_022E', 'B19037_023E', 'B19037_024E', 'B19037_025E', 'B19037_026E', 'B19037_027E', 'B19037_028E', 'B19037_029E', 'B19037_030E', 'B19037_031E', 'B19037_032E', 'B19037_033E', 'B19037_034E', 'B19037_035E', 'B19037_036E', 'B19037_037E', 'B19037_038E', 'B19037_039E', 'B19037_040E', 'B19037_041E', 'B19037_042E', 'B19037_043E', 'B19037_044E', 'B19037_045E', 'B19037_046E', 'B19037_047E', 'B19037_048E', 'B19037_049E', 'B19037_050E', 'B19037_0

In [52]:
# This segment of code will get household income estimates for each block group in Baltimore city
census_df = census_tracts_to_dataframe(var_ids, state_codes, county_codes)
census_df.to_csv(attr_outfile) # Write to csv

2
State: 24
County: 510
Chunk start: 0
Chunk end: 50
state:24 county510
Got 1406 records.
Chunk start: 50
Chunk end: 85
state:24 county510
Got 1406 records.


In [73]:
from IPython.display import display

In [75]:
display(census_df.head())
print(census_df.shape)

Unnamed: 0_level_0,B19001_002E,B19001_003E,B19001_004E,B19001_005E,B19001_006E,B19001_007E,B19001_008E,B19001_009E,B19001_010E,B19001_011E,B19001_012E,B19001_013E,B19001_014E,B19001_015E,B19001_016E,B19001_017E,B19037_001E,B19037_002E,B19037_003E,B19037_004E,B19037_005E,B19037_006E,B19037_007E,B19037_008E,B19037_009E,B19037_010E,B19037_011E,B19037_012E,B19037_013E,B19037_014E,B19037_015E,B19037_016E,B19037_017E,B19037_018E,B19037_019E,B19037_020E,B19037_021E,B19037_022E,B19037_023E,B19037_024E,B19037_025E,B19037_026E,B19037_027E,B19037_028E,B19037_029E,B19037_030E,B19037_031E,B19037_032E,B19037_033E,B19037_034E,B19037_035E,B19037_036E,B19037_037E,B19037_038E,B19037_039E,B19037_040E,B19037_041E,B19037_042E,B19037_043E,B19037_044E,B19037_045E,B19037_046E,B19037_047E,B19037_048E,B19037_049E,B19037_050E,B19037_051E,B19037_052E,B19037_053E,B19037_054E,B19037_055E,B19037_056E,B19037_057E,B19037_058E,B19037_059E,B19037_060E,B19037_061E,B19037_062E,B19037_063E,B19037_064E,B19037_065E,B19037_066E,B19037_067E,B19037_068E,B19037_069E
fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1
24510010100,40.0,18.0,42.0,17.0,27.0,51.0,47.0,36.0,39.0,69.0,109.0,223.0,214.0,112.0,253.0,165.0,1462.0,34.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,5.0,4.0,0.0,12.0,822.0,25.0,0.0,34.0,0.0,0.0,0.0,17.0,7.0,14.0,27.0,52.0,123.0,149.0,89.0,202.0,83.0,305.0,11.0,0.0,0.0,0.0,0.0,22.0,0.0,20.0,0.0,34.0,31.0,65.0,36.0,8.0,27.0,51.0,301.0,0.0,18.0,8.0,17.0,27.0,29.0,30.0,9.0,25.0,8.0,26.0,26.0,24.0,11.0,24.0,19.0
24510010200,61.0,23.0,0.0,55.0,41.0,0.0,31.0,110.0,44.0,92.0,74.0,246.0,260.0,214.0,130.0,60.0,1441.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,20.0,0.0,0.0,11.0,0.0,0.0,935.0,9.0,10.0,0.0,18.0,9.0,0.0,31.0,0.0,11.0,49.0,42.0,182.0,223.0,203.0,104.0,44.0,323.0,28.0,0.0,0.0,12.0,32.0,0.0,0.0,87.0,11.0,12.0,12.0,64.0,37.0,0.0,12.0,16.0,145.0,24.0,13.0,0.0,25.0,0.0,0.0,0.0,23.0,22.0,24.0,0.0,0.0,0.0,0.0,14.0,0.0
24510010300,63.0,23.0,15.0,15.0,32.0,29.0,25.0,0.0,31.0,36.0,53.0,186.0,72.0,205.0,165.0,178.0,1128.0,40.0,15.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,11.0,0.0,5.0,0.0,0.0,789.0,30.0,0.0,0.0,0.0,10.0,17.0,0.0,0.0,17.0,8.0,47.0,147.0,63.0,164.0,156.0,130.0,207.0,9.0,12.0,0.0,0.0,11.0,12.0,0.0,0.0,14.0,28.0,0.0,19.0,9.0,36.0,9.0,48.0,92.0,9.0,11.0,12.0,15.0,11.0,0.0,25.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0
24510010400,51.0,25.0,8.0,36.0,35.0,39.0,15.0,40.0,24.0,123.0,147.0,179.0,152.0,162.0,154.0,231.0,1421.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,14.0,14.0,0.0,0.0,0.0,0.0,923.0,20.0,0.0,0.0,0.0,22.0,31.0,0.0,0.0,24.0,100.0,133.0,114.0,118.0,121.0,136.0,104.0,276.0,0.0,0.0,8.0,0.0,0.0,8.0,9.0,23.0,0.0,6.0,0.0,44.0,34.0,23.0,9.0,112.0,184.0,31.0,25.0,0.0,36.0,13.0,0.0,6.0,17.0,0.0,7.0,0.0,7.0,0.0,18.0,9.0,15.0
24510010500,27.0,28.0,30.0,12.0,8.0,12.0,25.0,18.0,65.0,28.0,57.0,133.0,104.0,26.0,129.0,65.0,767.0,21.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,6.0,5.0,0.0,3.0,0.0,0.0,484.0,17.0,17.0,0.0,7.0,5.0,4.0,17.0,13.0,46.0,10.0,26.0,92.0,75.0,17.0,101.0,37.0,162.0,4.0,0.0,7.0,5.0,0.0,0.0,0.0,0.0,19.0,7.0,20.0,31.0,24.0,6.0,22.0,17.0,100.0,4.0,9.0,23.0,0.0,3.0,8.0,8.0,5.0,0.0,8.0,5.0,5.0,5.0,0.0,6.0,11.0


(200, 85)


# Use Census Class Module I built

In [63]:
year = 2013
CENSUS_API = 'fe55211c8b3f0350fcb040c07321a129a3d6e266' # Your API key here
censusloader = CensusData(year, CENSUS_API)

If setting year, make sure that the census api has data for that year!


In [64]:
# Generate codes for census variables of interest
var_ids = []
var_ids.extend(["B19001_0{:02d}E".format(x) for x in range(2, 18)]) # Household income over 12 months
var_ids.extend(["B19037_0{:02d}E".format(x) for x in range(1, 70)]) # Household income over 12 months by age

# Regions for variables of interest - state, county, zipcode, etc.
loc_name, state_codes, county_codes = "california", list([states.CA.fips]), None # All of CA
# loc_name, state_codes, county_codes = "balt_city", list([states.MD.fips]), list([510]) # Baltimore
# loc_name, state_codes, county_codes = "balt_city", list([states.MD.fips]), list([510]) # Baltimore
print(loc_name, state_codes, county_codes)
# print(type(state))

california ['06'] None


In [65]:
# print(c.acs.get(['B19001_002E']))
censusloader.census_tracts_to_dataframe(var_ids, state_codes, county_codes)

State: 06
state:06 county:None
Got 8057 records.
state:06 county:None
Got 8057 records.
state:06 county:None
Got 8057 records.


In [None]:
# Local Storage Parameters
LOCAL_DATA_DIR = './data/census/'
ATTR_FILE_END = '_census_data.csv'
outfile = os.path.join(LOCAL_DATA_DIR, 'ca' + ATTR_FILE_END)
censusloader.savedata(outfile)