In [None]:
import os
import requests
import pandas as pd
from ipywidgets import interact


def get_census_api_key():
    """
    Go here to get a census api key: https://api.census.gov/data/key_signup.html
    Then put it in a file named "census_api_key.txt" in this directory
    The current_path trick is not guaranteed to work in jupyter notebook; hardcode it if you need to
    """
    current_path = os.getcwd()
    with open(os.path.join(current_path, "census_api_key.txt"), "r") as keyfile:
        census_api_key = keyfile.readline().rstrip()
    return census_api_key

census_api_key = get_census_api_key()

top_url = "https://api.census.gov/data"
census_metadata = requests.get(top_url).json()
num_entries = len(census_metadata['dataset'])
census_dataset_df = pd.DataFrame(census_metadata['dataset'])

print(f"Ready to browse {num_entries} tables")

Let's do a keyword search over both titles and descriptions to start to find what's interesting to us.

In [None]:
# from IPython.display import display

# Set the following to a smaller number to change the table width, but the description will not wrap text
pd.set_option('display.max_colwidth', None)
# Set the following to limit how many rows you see, or 'None' to see all matching rows
pd.set_option('display.max_rows', 10)
def keyword_search(keyword):
    """
    TODO: make the keyword search case-insensitive
    """
    title_contains_pattern = census_dataset_df['title'].str.contains(keyword)
    descr_contains_pattern = census_dataset_df['description'].str.contains(keyword)
    result = census_dataset_df[(title_contains_pattern==True) | (descr_contains_pattern == True)]
    return display(result[["title","description"]])

interact(keyword_search, keyword='Employment')

What variables are available to explore in these tables? Use the indices from the table above to specify which tables to further explore.

In [None]:
pd.set_option('display.max_rows', 25)
def get_variables(table_index):
    ind = int(table_index)
    vlink = census_dataset_df["c_variablesLink"][ind]
    vars = requests.get(vlink).json()['variables']
    df = pd.DataFrame(vars).transpose()
    return df

interact(get_variables, table_index="1177")



What geographical variables are available to query in the tables of interest? Use the table indices from the keyword search above.

In [None]:
def get_geography(table_index):
    ind = int(table_index)
    glink = census_dataset_df["c_geographyLink"][ind]
    geo = requests.get(glink).json()['fips']
    df = pd.DataFrame(geo).transpose()
    return df

interact(get_geography, table_index="1177")

Use the next function to show example queries for your table and columns of interest. You will have to fill in "YOUR_PREDICATE_VALUES" - for instance, the zipcodes or years of interest to you.

In [None]:
def create_example_query(table_index, variable, predicate):
    table_index = int(table_index)
    table_url = census_dataset_df["distribution"][table_index][0]["accessURL"]
    predicate = predicate.replace(" ", "")
    request = f"{table_url}?get={variable}&for={predicate}:YOUR_PREDICATE_VALUES&key={census_api_key}"
    return request
    
# Which table is of interest?
table_index = 412
# Do you want to limit the returns by geography? (YOUR_PREDICATE_VALUES will be zipcodes or state names or the like)
geo=True
# Do you want to limit the returns by time? (YOUR_PREDICATE_VALUES will be date ranges)
time=True

interact(create_example_query, table_index=str(table_index), variable=get_variables(str(table_index)).index, predicate=get_geography(str(table_index)).transpose().name)


In [None]:
request = create_example_query(412, "PAYQTR1_N", "zip code")
request = request.replace("YOUR_PREDICATE_VALUES", "10036")

data = requests.get(request).json()
pd.DataFrame(data)