In [None]:
import os
import requests
import pandas as pd
from ipywidgets import interact
from itables import init_notebook_mode
from itables import show


def get_census_api_key():
    """
    Go here to get a census api key: https://api.census.gov/data/key_signup.html
    Then put it in a file named "census_api_key.txt" in this directory
    The current_path trick is not guaranteed to work in jupyter notebook; hardcode it if you need to
    """
    current_path = os.getcwd()
    with open(os.path.join(current_path, "census_api_key.txt"), "r") as keyfile:
        census_api_key = keyfile.readline().rstrip()
    return census_api_key

census_api_key = get_census_api_key()

top_url = "https://api.census.gov/data"
census_metadata = requests.get(top_url).json()
num_entries = len(census_metadata['dataset'])
census_dataset_df = pd.DataFrame(census_metadata['dataset'])

init_notebook_mode(all_interactive=True)
print(f"Ready to browse {num_entries} tables")

The following cells make iterative calls to the Census API to get specific kinds of information. If the site is having a slow day, it can be a minute or more for these calls. The interactivity won't seem to work right in these cases - it will send for table "41" before you've finished typing "412". Patience, or try another day when the response is spiffier.

Let's do a keyword search over both titles and descriptions to start to find what's interesting to us.

In [None]:
def keyword_search(keyword, showtable):
    """
    TODO: make the keyword search case-insensitive
    """
    title_contains_pattern = census_dataset_df['title'].str.contains(keyword)
    descr_contains_pattern = census_dataset_df['description'].str.contains(keyword)
    result = census_dataset_df[(title_contains_pattern==True) | (descr_contains_pattern == True)]
    if showtable:
        return show(result[["title","description"]],  dom="tpilr", classes="wrap", style="width:75%")
    else:
        return result[["title", "description"]]
# You'll be able to change the keyword in the interactive prompt that appears
interact(keyword_search, keyword='Employment', showtable=True)

What variables are available to explore in these tables? Use the indices from the table above to specify which tables to further explore.

In [None]:
def get_variables(table_index, showtable):
    ind = int(table_index)
    vlink = census_dataset_df["c_variablesLink"][ind]
    vars = requests.get(vlink).json()['variables']
    df = pd.DataFrame(vars).transpose()
    if showtable:
        return show(df,  dom="tpilr", classes="wrap", style="width:75%")
    else:
        return df
# We use a string instead of an integer here so you get a text prompt instead of a slider
interact(get_variables, table_index="412", showtable=True)

# If the API is having a slow day, then changing the table index will take awhile to generate the new results
# It does re-query the API to get the variables information for each table of interest

What geographical variables are available to query in the tables of interest? Use the table indices from the keyword search above.

In [None]:
def get_geography(table_index, showtable):
    ind = int(table_index)
    glink = census_dataset_df["c_geographyLink"][ind]
    geo = requests.get(glink).json()['fips']
    df = pd.DataFrame(geo)
    if showtable:
        return show(df, dom="tpilr", classes="wrap", style="width:75%")
    else:
        return df
    
interact(get_geography, table_index="1177", showtable=True)

Use the next function to show example queries for your table and columns of interest. You will have to fill in "YOUR_PREDICATE_VALUES" - for instance, the zipcodes or years of interest to you.

In [None]:
def create_example_query(table_index, variable, predicate):
    table_index = int(table_index)
    table_url = census_dataset_df["distribution"][table_index][0]["accessURL"]
    predicate = predicate.replace(" ", "")
    request = f"{table_url}?get={variable}&for={predicate}:YOUR_PREDICATE_VALUES&key={census_api_key}"
    return {"table_url":table_url, "request": request}
    
# Which table is of interest?
table_index = 412
# Do you want to limit the returns by geography? (YOUR_PREDICATE_VALUES will be zipcodes or state names or the like)
geo=True
# Do you want to limit the returns by time? (YOUR_PREDICATE_VALUES will be date ranges)
time=True
# Need to implement and test time-based predicates, we just have geography for now

# Choose new values in the drop-downs below to create new example queries
# You can copy and paste the request url into your code,
# Or you can use the queries class to create and check the request url
interact(create_example_query, table_index=str(table_index), variable=get_variables(str(table_index), showtable=False).index, predicate=get_geography(str(table_index), showtable=False).name)


In [None]:
from queries import CensusQuery
# request = create_example_query(412, "PAYQTR1_N", "zip code")
# request = request.replace("YOUR_PREDICATE_VALUES", "10036")

cq = CensusQuery()
cq.set_census_api_key()
cq.set_census_database_url("http://api.census.gov/data/2012/zbp")
cq.set_census_variable("PAYQTR1")
cq.set_census_predicate("ZIPCODE:10036,10038")
df = cq.get_census_dataframe()

# check out queries.py to see how to align this census dataframe with your own data