## Script to pull census data.

This notebook includes a simple script that will pull the requested variables form the 5-year American Community Survey at either the census tract or congressional district level.  The user must request a census api key and enter it in the line that says "my_api_key".  The user should then put the variable names from the ACS5 in the indicated line, seperated by commas.  The user may optionally put names in the indicated line, also seperated by commas.  Both should be surrounded by quotes (single/double quotes are both fine).  Then run the cells of this notebook

This is just a simple application of the library here: https://github.com/datamade/census

If you are missing the required libraries, put these in a new cell and run it:

* !pip install census
* !pip install us
* !pip install tqdm

In [1]:
from census import Census
from us import states
import requests
import pandas as pd
from tqdm import tqdm_notebook
from IPython.display import clear_output

my_api_key = ''# Put your api key here

import os
if os.path.isfile('my_key.txt'):
    with open('my_key.txt')as file:
        my_api_key = file.readlines()[0]

try:
    census_query_object = Census(my_api_key)
except:
    print("Did you enter an api key surrounded by quotes?")

#https://api.census.gov/data/2016/acs/acs5/variables/B07008_006E.json

variables_of_interest = ('NAME', 'B02PR_0034E', 'B07008_006E', 'B08301_011E')
custom_names = None


def grab_by_tracts(variables_list, file_name = "tracts_data.csv", save=True):
    """This gathers the requested variables from the 5-year American Community 
    Survey at the census tract level.  Note, this will take longer then the 
    district data, perhaps a couple minutes."""

    stats = []
    census_query_object = Census(my_api_key)
    for x in tqdm_notebook(states.STATES):
        stats = stats + census_query_object.acs5.state_county_tract(
            tuple(variables_list), x.fips, Census.ALL, Census.ALL)
    df = pd.DataFrame(stats)
    df['State Name'] = df['state'].apply(lambda x: states.STATES_AND_TERRITORIES[int(x)-1].abbr)

    if names and type(names)==list:
        df.columns = names[::-1]+list(df.columns.values[len(names):])
        
    if save:
        df.to_csv(file_name)
        
    clear_output()
    display(df.head())
    return df

def grab_by_districts(variables_list, file_name = "district_data.csv", save=True):
    """This gathers the requested variables from the 5-year American Community 
    Survey at the congressional district level."""
    
    stats = []
    census_query_object = Census(my_api_key)
    for x in tqdm_notebook(states.STATES, leave=False):
        stats = stats + census_query_object.acs5.state_district(
            tuple(variables_list), x.fips , Census.ALL)
    df = pd.DataFrame(stats)
    df['State Name'] = df['state'].apply(lambda x: states.STATES_AND_TERRITORIES[int(x)-1].abbr)
    
    if save:
        df.to_csv(file_name) 
    clear_output()
    display(df.head())
    return df
    


In [8]:
"""Run this cell to save district data."""

"""You may optionally indicate new variables and names by removing the hashmarks in front of the next two lines"""
variables_of_interest = ('NAME', 'B07008_030E', 'B07008_006E', 'B08406_009E', 'B01003_001E')


df= grab_by_districts(variables_of_interest);

Unnamed: 0,B01003_001E,B07008_006E,B07008_030E,B08406_009E,NAME,congressional district,state,State Name
0,699597.0,41667.0,713.0,,"Congressional District 1 (115th Congress), Ala...",1,1,AL
1,683013.0,40388.0,535.0,,"Congressional District 2 (115th Congress), Ala...",2,1,AL
2,700551.0,39263.0,697.0,,"Congressional District 3 (115th Congress), Ala...",3,1,AL
3,683607.0,44565.0,308.0,,"Congressional District 4 (115th Congress), Ala...",4,1,AL
4,704796.0,37956.0,746.0,,"Congressional District 5 (115th Congress), Ala...",5,1,AL


In [6]:
names = ['population', 'total_widows', 'moved_widows', 'bus_riders']

df.columns = names+list(df.columns.values[len(names):])
df.to_pickle("district_data.csv")

In [3]:
"""Run this cell to save census tract data.  Note, this will take longer then the district data, perhaps a couple minutes."""

"""You may optionally indicate new variables and names by removing the hashmarks in front of the next two lines"""
# variables_of_interest = ('NAME', 'B12002_065E', 'B12002_158E', 'B08006_009E', 'B00001_001E')
# custom_names = ['widows_male', 'widows_female', 'bus_riders', 'population']

df = grab_by_tracts(variables_of_interest, names=custom_names);

Unnamed: 0,population,bus_riders,total_widows,moved_widows,name,county,state,tract,State Name
0,2010.0,82.0,0.0,0.0,"Census Tract 201, Autauga County, Alabama",1,1,20100,AL
1,2196.0,97.0,8.0,0.0,"Census Tract 202, Autauga County, Alabama",1,1,20200,AL
2,3136.0,176.0,0.0,0.0,"Census Tract 203, Autauga County, Alabama",1,1,20300,AL
3,4563.0,365.0,0.0,0.0,"Census Tract 204, Autauga County, Alabama",1,1,20400,AL
4,10529.0,498.0,0.0,0.0,"Census Tract 205, Autauga County, Alabama",1,1,20500,AL
