In [2]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv

Load Extra Data

In [3]:
#state-division mappings
divst = pd.read_csv("divst.csv")
divst['GESTFIPS'] = divst['GESTFIPS'].astype(str).str.zfill(2)

Helper Function Library

In [4]:
#creates state+county for county  GEOID
def create_county_id(df):
    df['County_GEOID'] = (
        df['GESTFIPS'].astype(str).str.zfill(2) +
        df['GTCO'].astype(str).str.zfill(3)
    )
    return df

In [12]:
def load_data(base_url, vars_to_get, CENSUS_API_KEY):
    """Loads data for the Disability supplement for a specified year. Returns a df. 
       - set base_url (str) as url19, url21, or url12 depending on the year.
       - set vars_to_get (list) as vars12_19 for 2012 or 2019 or vars21 for 2021
    """
    params = {
        "get": ",".join(vars_to_get),
        # all states; change to "state:01" etc. if you want a specific state
        "for": "state:*",
        "key": CENSUS_API_KEY
    }

    response = requests.get(base_url, params=params)
    response.raise_for_status()

    data = response.json()

    # First row is header, remaining rows are data
    header = data[0]
    rows = data[1:]

    df = pd.DataFrame(rows, columns=header)
    return df

In [13]:
#calculates weighted counts of persons with disability 
def add_disability_weights(
    df,
    disability_cols,
    weight_col="PWCMPWGT",
    suffix="_w"
):
    """
    For each column in disability_cols:
      - Create a new weighted column col+suffix
      - New col = weight where original value == 1, else 0
    Keeps the original disability columns unchanged.
    """
    # Make weights numeric
    df[weight_col] = pd.to_numeric(df[weight_col], errors="coerce")
    
    # Treat missing weights as 1 so crude (unweighted) rows keep contribution
    weights = df[weight_col].fillna(1)
    
    # Ensure disability columns are integer-like
    df[disability_cols] = df[disability_cols].astype(int)
    
    # Create the new weighted columns
    weighted_col_names = [col + suffix for col in disability_cols]
    
    df[weighted_col_names] = (
        (df[disability_cols] == 1)   # True where value==1, False otherwise
        .mul(weights, axis=0)        # True→1*weight, False→0*weight
        .round()
        .astype("Int64")
    )
    
    return df


In [None]:
def clean_data(df, year, divst, save = False): 
    """clean data downloaded using load_data function, saves if indicated, and returns a df. 
    df: dataframe to clean e.x. df12, df19, df21
    year: integer abbrv. year of data. (12, 19, 21)
    divst: datafile for division/state mappings **INFO: can be loaded in with existing dataset (see top of notebook
    "Load Extra Data")or generated from a dataframe e.g. df12 output from load_data (see last section of notebook "Create 
    Division Mapping")
    save: boolean indicator whether to save a csv locally
    """
    columns_to_str = ['GESTFIPS', 'GEREG', "GTCO"]

    disability_cols = [
    "PRDISFLG",
    "PEDISREM",
    "PEDISEAR",
    "PEDISPHY",
    "PEDISEYE",
    "PEDISDRS",
    "PEDISOUT",
    ]
    
    # convert the specified columns to str type
    df[columns_to_str] = df[columns_to_str].astype(str)

    #clean state.county codes
    df['GESTFIPS'] = df['GESTFIPS'].str.zfill(2)
    df['GTCO'] = df['GTCO'].str.zfill(3)

    #add division
    df = df.merge(divst, on="GESTFIPS")

    #add combined county
    df = create_county_id(df)

    #add weighted disability "count" columns
    df = add_disability_weights(df, disability_cols)

    if save: 
        df.to_csv(f"CPSdata{year}w-API.csv", index=False)

    return df

Begin pulling data

In [6]:
#setup to begin data pull

load_dotenv()
CENSUS_API_KEY = os.getenv("CENSUS_API_KEY")

url12 = "https://api.census.gov/data/2012/cps/disability/may"
url19 = "https://api.census.gov/data/2019/cps/disability/jul"
url21 = "https://api.census.gov/data/2021/cps/disability/jul"

vars12_19 = [
    "GESTFIPS",
    "GEREG",
    "GTCO",
    "PRDISFLG",   # Disability flag
    "PEDISREM",   # Cognitive difficulty
    "PEDISEAR",   # Hearing difficulty
    "PEDISPHY",   # Mobility difficulty
    "PEDISEYE",   # Vision difficulty
    "PEDISDRS",   # Self-care difficulty
    "PEDISOUT",   # Independent living difficulty
    "HEFAMINC",   # Household total family income (past 12 months)
    "PEEDUCA",    # Education level
    "PEMLR",      # Employment status
    "PRUNTYPE",   # Unemployment reason
    "PESD197",    # Medicaid status
    "PESD198",    # Medicare status
    "PTDTRACE",   # Race
    "PESEX",      # Sex
    "PRTAGE",      # Age
    "PESD1",    # How disability affects ability to work
    "PESD191",  # Receive assistance - workers’ compensation
    "PESD1910", # Receive assistance - none
    "PESD192",  # Receive assistance - SSDI
    "PESD193",  # Receive assistance - Supplemental Security Income
    "PESD194",  # Receive assistance - Veterans compensation
    "PESD195",  # Receive assistance - disability
    "PESD196",  # Receive assistance - other disability programs
    "PESD3",    # Ever leave a job because of disability
    "PESD41",   # Barrier: Lack of education or training
    "PESD42",   # Barrier: Lack of job counseling
    "PESD43",   # Barrier: Lack of transportation
    "PESD44",   # Barrier: Loss of government assistance
    "PESD45",   # Barrier: Need for special features
    "PESD46",   # Barrier: Employer or coworker attitudes
    "PESD47",   # Barrier: Difficulty with disability
    "PESD5",    # Barrier removed, work?
    "PESD6A",   # Used Vocational Rehabilitation Centers
    "PESD6B",   # Used One Stop Career Centers
    "PESD6C",   # Used the Ticket to Work Program
    "PESD6D",   # Used Assistive Technology Act Program
    "PESD6E",   # Used Center for Independent Living for Individuals w/ Disabilities
    "PESD6F",   # Used the Client Assistance Program
    "PESD6G",   # Used any other employment assistance program
    "PESD7A",   # Helpfulness: Vocational Rehabilitation Center
    "PESD7B",   # Helpfulness: One Stop Career Centers
    "PESD7C",   # Helpfulness: Ticket to Work Program
    "PESD7D",   # Helpfulness: Assistive Tech Act Program
    "PESD7E",   # Helpfulness: Center for Independent Living
    "PESD7F",   # Helpfulness: Client Assistance Program
    "PWCMPWGT"
]

vars21 = [
    "GESTFIPS",
    "GEDIV",
    "GEREG",
    "GTCO",
    "PRDISFLG",   # Disability flag
    "PEDISREM",   # Cognitive difficulty
    "PEDISEAR",   # Hearing difficulty
    "PEDISPHY",   # Mobility difficulty
    "PEDISEYE",   # Vision difficulty
    "PEDISDRS",   # Self-care difficulty
    "PEDISOUT",   # Independent living difficulty
    "HEFAMINC",   # Household total family income (past 12 months)
    "PEEDUCA",    # Education level
    "PEMLR",      # Employment status
    "PRUNEDUR",   # Unemployment duration
    "PRUNTYPE",   # Unemployment reason
    "PESD197",    # Medicaid status
    "PESD198",    # Medicare status
    "PTDTRACE",   # Race
    "PESEX",      # Sex
    "PRTAGE",      # Age
    "PESD1",    # How disability affects ability to work
    "PESD191",  # Receive assistance - workers’ compensation
    "PESD1910", # Receive assistance - none
    "PESD192",  # Receive assistance - SSDI
    "PESD193",  # Receive assistance - Supplemental Security Income
    "PESD194",  # Receive assistance - Veterans compensation
    "PESD195",  # Receive assistance - disability
    "PESD196",  # Receive assistance - other disability programs
    "PESD3",    # Ever leave a job because of disability
    "PESD41",   # Barrier: Lack of education or training
    "PESD42",   # Barrier: Lack of job counseling
    "PESD43",   # Barrier: Lack of transportation
    "PESD44",   # Barrier: Loss of government assistance
    "PESD45",   # Barrier: Need for special features
    "PESD46",   # Barrier: Employer or coworker attitudes
    "PESD47",   # Barrier: Difficulty with disability
    "PESD5",    # Barrier removed, work?
    "PESD6A",   # Used Vocational Rehabilitation Centers
    "PESD6B",   # Used One Stop Career Centers
    "PESD6C",   # Used the Ticket to Work Program
    "PESD6D",   # Used Assistive Technology Act Program
    "PESD6E",   # Used Center for Independent Living for Individuals w/ Disabilities
    "PESD6F",   # Used the Client Assistance Program
    "PESD6G",   # Used any other employment assistance program
    "PESD7A",   # Helpfulness: Vocational Rehabilitation Center
    "PESD7B",   # Helpfulness: One Stop Career Centers
    "PESD7C",   # Helpfulness: Ticket to Work Program
    "PESD7E",   # Helpfulness: Center for Independent Living
    "PWCMPWGT"   #weight
]

In [8]:
df12 = load_data(url12, vars12_19, CENSUS_API_KEY)
#df19 = load_data(url19, vars12_19, CENSUS_API_KEY)
#df21 = load_data(url21, vars21, CENSUS_API_KEY)

In [10]:
df12_clean = clean_data(df12, 12, divst)
#df19_clean = clean_data(df19, 19, divst)
#df21_clean = clean_data(df21, 21, divst)

In [11]:
df12_clean.head()

Unnamed: 0,GESTFIPS,GEREG,GTCO,PRDISFLG,PEDISREM,PEDISEAR,PEDISPHY,PEDISEYE,PEDISDRS,PEDISOUT,...,state,GEDIV,County_GEOID,PRDISFLG_w,PEDISREM_w,PEDISEAR_w,PEDISPHY_w,PEDISEYE_w,PEDISDRS_w,PEDISOUT_w
0,1,3,0,2,2,2,2,2,2,2,...,1,6,1000,0,0,0,0,0,0,0
1,1,3,0,2,2,2,2,2,2,2,...,1,6,1000,0,0,0,0,0,0,0
2,1,3,73,2,2,2,2,2,2,2,...,1,6,1073,0,0,0,0,0,0,0
3,1,3,73,2,2,2,2,2,2,2,...,1,6,1073,0,0,0,0,0,0,0
4,1,3,73,2,2,2,2,2,2,2,...,1,6,1073,0,0,0,0,0,0,0


Create Division Mappings

In [None]:
divst = df21[['GESTFIPS', 'GEDIV']].drop_duplicates().reset_index()[['GESTFIPS', 'GEDIV']]
#divst.to_csv("divst.csv", index=False)

In [None]:
#testing division merge
div12 = df12.merge(divst, on="GESTFIPS")
div12.head()


Unnamed: 0,GESTFIPS,GEREG,GTCO,PRDISFLG,PEDISREM,PEDISEAR,PEDISPHY,PEDISEYE,PEDISDRS,PEDISOUT,...,PESD6G,PESD7A,PESD7B,PESD7C,PESD7D,PESD7E,PESD7F,state,GEDIV_x,GEDIV_y
0,1,3,0,2,2,2,2,2,2,2,...,-1,-1,-1,-1,-1,-1,-1,1,6,6
1,1,3,0,2,2,2,2,2,2,2,...,-1,-1,-1,-1,-1,-1,-1,1,6,6
2,1,3,73,2,2,2,2,2,2,2,...,-1,-1,-1,-1,-1,-1,-1,1,6,6
3,1,3,73,2,2,2,2,2,2,2,...,-1,-1,-1,-1,-1,-1,-1,1,6,6
4,1,3,73,2,2,2,2,2,2,2,...,-1,-1,-1,-1,-1,-1,-1,1,6,6
