In [1]:
import requests
import pandas as pd
import numpy as np

# Census/ACS API to Pandas Template

### First, choose your survey of interest. The Census Bureau collects data in many areas through all types surveys, choose the survey you are interested here.
### https://www.census.gov/data/developers/data-sets.html

In [2]:
def get_acs_api_data(base, arr):
    """
    Use the ACS api to get data based on the desired variables.
    Args:
        base (string): base survey url
        arr (list): a list of variables to get the data for (from the ACS api page)
    Returns:
        data (json): the returned data in JSON form
    """
    api_string = ','.join(map(str, arr))
    base_url = f'{base}?get={api_string}';
    api_key = 'dd677280c5e9a6f9c1f6c4929fa378c2e3f1ebc5'; # Sam's personal API key, please don't abuse
    key_extend = f'&key={api_key}'
    url_extend = f'&for=state:*'; # CHANGE GEOGRAPHY HERE, see API examples page
    url = base_url + url_extend + key_extend
    data = requests.get(url).json();
    return data

def api_data_to_dataframe(data):
    """
    Transform JSON data into Pandas dataframe, setting the GEOID to be the unique geography  identifier.
    Args:
        data (json): the returned data from the ACS api
    Returns:
        df: dataframe with the ACS data
    """
    df = pd.DataFrame(data[1:], columns = data[0]);
    df[f'GEOID'] = df.state; # CHANGE GEOID HERE (e.g. if using county: df[f'GEOID'] = df.state + df.county)
    df.set_index(f'GEOID',inplace=True);
    return df

def clean_data(base, arr):
    """
    Combo method getting the ACS api data and transforming it into a dataframe
    Args:
        base (string): base survey url
        arr (list): a list of variables to get the data for (from the ACS api page)
    Returns:
        df: dataframe with the ACS data 
    """
    api_data = get_acs_api_data(base, arr); # get ACS data from api
    if api_data:
        df = api_data_to_dataframe(api_data); # transform into dataframe, if there is any data
    else:
        df = pd.DataFrame()
    keep_cols = np.append(arr,[f'GEOID']) # remove unneeded columns
    df.drop(columns=[col for col in df if col not in keep_cols], inplace=True)
    return df

def convert_columns_to_int(df,arr):
    """
    Convert the ACS columns to integers, instead of strings
    Args:
        df: ACS data frame
        arr (list): a list of variables to get the data for (from the ACS api page)
    Returns:
        df: ACS data frame
    """
    for i in arr:
        df[i] = df[i].astype(int)
    return df

## Next, use API base url to find variables codes
### Variable searches can be done either by variable or by group of variables (browsing by group is easier).

#### Example: 2019 5-year ACS
https://api.census.gov/data/2019/acs/acs5/groups.html

https://api.census.gov/data/2019/acs/acs5/variables.html

As always, be careful of Population vs. Households

In [7]:
# Dictionary with ACS variable (not group) codes, and the variable name to use in pandas dataframe
dic = {'B25040_004E': 'Total_Electricity',
        'B25040_006E': 'Total_Coal',
        'B25040_007E': 'Total_Wood',
        'B25040_002E': 'Total_Gas',
        'B25040_005E': 'Total_Oil'}

# Example: 2019 5-year ACS
base = "https://api.census.gov/data/2019/acs/acs5"

df = clean_data(base, np.append('NAME', list(dic.keys())))
df = df.rename(columns=dic)
df = convert_columns_to_int(df, list(dic.values()))

In [8]:
df

Unnamed: 0_level_0,NAME,Total_Electricity,Total_Coal,Total_Wood,Total_Gas,Total_Oil
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Alabama,1219270,288,17884,508701,2694
2,Alaska,32455,1077,14055,122179,74656
4,Arizona,1559585,378,50068,841729,2241
5,Arkansas,587036,47,40991,442718,1072
6,California,3470566,1486,198102,8364730,31508
8,Colorado,491442,914,37721,1485834,2088
10,Delaware,122146,307,3525,154829,41550
11,District of Columbia,122592,13,16,148954,3747
9,Connecticut,227068,1222,25021,487610,556309
12,Florida,7133334,724,11652,356263,10558
