In [14]:
import pandas as pd
import altair as alt
import numpy as np
import vega_datasets
from vega_datasets import data

In [15]:
# Format data

# ACS data: remove Puerto Rico, create dataset of 2023 data
acs_data = pd.read_csv("../data/acs_data.csv")
acs_data = acs_data[acs_data['state'] != "Puerto Rico"]
acs_2023 = acs_data[acs_data['year'] == 2023]
acs_data['year'] = acs_data['year'].astype(str)

# by age datasets: remove Puerto Rico
by_age = pd.read_csv("../data/by_age.csv")
by_age = by_age[by_age['state'] != "Puerto Rico"]

by_age_count = pd.read_csv("../data/by_age_count.csv")
by_age_count = by_age_count[by_age_count['state'] != "Puerto Rico"]

by_age_count['number'] = pd.to_numeric(by_age_count['number'])

# Group age groups to adults vs. children
by_age_count['group'] = np.where( 
    (by_age_count['age']=='< 6') | (by_age_count['age']=='6 to 17'), 
                'Children (<18)', 'Adults (18-64)')


In [16]:
# Main plot: Chloropleth of proportion of low-income population in health insurance in each state

# Bring in and merge state datasets
states = alt.topo_feature(data.us_10m.url, 'states')
states_df = pd.read_csv("../data/states.csv").set_index('state')

joined = pd.merge(acs_data, states_df, left_on='state', right_on='state_name').reset_index()

# Define variables of interest (formerly a longer list, now just one)
variable_list = ['public_under138_perc']

# Plot the percentage of low-income population with public HI by state
plot = alt.Chart(states).mark_geoshape(stroke='lightgrey',
    strokeWidth=.5).encode(
    color=alt.Color(alt.repeat('row'), type='quantitative', 
        legend=alt.Legend(title = ['% of low-income population', 
                                   'in public health insurance'],
        titleAnchor='middle')).scale(scheme = 'magma')
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(joined, 'id', variable_list)
).properties(width=700, height=400
).project(type='albersUsa')

# State outlines in black
outline = alt.Chart(states).mark_geoshape(
    stroke='black', fillOpacity=0).project(type='albersUsa').properties(
    width=700, height=400
)

# Points for the non-expansion states by longitude and latitude
points = alt.Chart(joined).mark_circle(size = 250, strokeWidth = 5).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    # Expansion states get yellow borders and red dots
    stroke = alt.condition( 
        alt.datum.expansion == "Not Adopted",
        alt.value('yellow'), alt.value('')
    ),
    color=alt.condition( 
        alt.datum.expansion == "Not Adopted",
        alt.value('red'), alt.value('') 
    ))

alt.layer(plot, outline, points).repeat(
    row=variable_list
).resolve_scale(
    color='independent'
).properties(
    title={
          "text": ["10 years later, low-income residents in states that did not expand",
                    "Medicaid have lower rates of public health insurance enrollment"]
        }
).configure_title(anchor='middle')

In [20]:
# Scatterplot of % of population in Medicaid vs. % of population in public health insurance

# main scatterplot
chart = alt.Chart(acs_2023).mark_point().encode(
        alt.X('medicaid_perc:Q').title('% of state population in Medicaid'),
        alt.Y('public_perc:Q', 
              scale=alt.Scale(domain=[15, 55])).title(
            ['% of state population in any public health insurance']),
        size=alt.Size("total:Q", legend = alt.Legend(title = "State Population") ),
        color=alt.Color("expansion:N", 
                        legend=alt.Legend(title = 'Expansion Status')).scale(
            range = ['blue', 'red'])
    )

# Regression line
reg = alt.Chart(acs_2023).transform_regression('medicaid_perc', 'public_perc').mark_line(color = 'black').encode(
    x='medicaid_perc:Q', y = 'public_perc:Q'
)

# R-squared calculation and addition to plot
rsquared = acs_2023[['medicaid_perc','public_perc']].corr().iloc[0,1] ** 2

rsq = alt.Chart(acs_2023).transform_regression(
    'medicaid_perc', 'public_perc', params = True).mark_line(color = 'black').encode(
    x='medicaid_perc:Q', y = 'public_perc:Q'
).mark_text(align='left').encode(
    x=alt.datum(18), y=alt.datum(22),
    text=alt.value(f"r²: {rsquared:.3f}"),
    size = alt.value(20)
)

# Footnote for data source
footnote = alt.Chart(acs_2023).mark_text(
    text=["2023 American Community Survey data"],
    color="#000000", x=0, y="height", dy=60, align="left")

final = chart + reg + footnote + rsq

final.properties(
    title={
          "text": ["State-level Medicaid Enrollment vs.", "Public Health Insurance Enrollment"]
        }).configure_title(anchor='middle') 

In [21]:
# Scatterplot of % of population below vs. above 138% of FPL in any public health insurance

# main scatterplot
chart = alt.Chart(acs_2023).mark_point().encode(
        alt.X('public_under138_perc:Q', 
              scale=alt.Scale(domain=[40, 90])).title(
            ['% of state low-income population', 'in any public health insurance']),
        alt.Y('public_above138_perc:Q', 
              scale=alt.Scale(domain=[10, 50])).title([
            '% of state high-income population', 'in any public health insurance']),
        size=alt.Size("total:Q", legend = alt.Legend(title = "State Population") ),
        color=alt.Color("expansion:N", 
                        legend=alt.Legend(title = 'Expansion Status')).scale(
            range = ['blue', 'red']),
        opacity = alt.value(0.25)
    )

# Footnotes for low- vs. high-income definitions and data source
footnote = alt.Chart(acs_2023).mark_text(
    text=["Low-income population is defined as below 138% of Federal Poverty Level (FPL),",
    "high-income population is defined as above 138% of FPL.",
    "2023 American Community Survey data"],
    color="#000000", x=0, y="height", dy=60, align="left")

# Squares to mark the average for expansion and non-expansion states
nonexp = alt.Chart(acs_data[acs_data['expansion'] == "Not Adopted"]).mark_point(
    color='red', filled = True, shape = 'square', size = 200).encode(
    x = 'mean(public_under138_perc):Q', y='mean(public_above138_perc):Q'
) + alt.Chart(acs_data[acs_data['expansion'] == "Not Adopted"]).mark_text(
    dy = 0, dx=-50, align = 'center').encode(
    x = 'mean(public_under138_perc):Q', 
    y='mean(public_above138_perc):Q', 
    text=alt.value(['Non-Expansion', 'State Average']))

exp = alt.Chart(acs_data[acs_data['expansion'] == "Adopted"]).mark_point(
    color='blue', filled = True, shape = 'square', size = 200).encode(
    x = 'mean(public_under138_perc):Q', y='mean(public_above138_perc):Q'
) + alt.Chart(acs_data[acs_data['expansion'] == "Adopted"]).mark_text(
    dy = 20, dx=10, align = 'center').encode(
    x = 'mean(public_under138_perc):Q', 
    y='mean(public_above138_perc):Q', 
    text=alt.value(['Expansion', 'State Average']))

reg = alt.Chart(acs_2023).transform_regression('public_under138_perc', 'public_above138_perc').mark_line(
    color = 'black').encode(
    x='public_under138_perc:Q', y = 'public_above138_perc:Q'
)

final = chart + footnote + exp + nonexp

final.properties(
        title={
          "text": ["State-level Public Health Insurance Enrollment by Income"]
        }
).configure_title(anchor='middle')

In [28]:
# Bar charts comparing expansion vs. non-expansion in each age group
alt.Chart(by_age[(by_age['year'] == 2023)]).mark_bar().encode(
    column=alt.Column(
    'age:O',
    sort=['< 6', '6-17', '18-24', '25-34', '35-44', '45-54', '55-64'],
    header=alt.Header(title='Age Group', titleOrient='bottom', labelOrient = 'bottom') ),
    y=alt.Y('mean(percentage):Q', 
            axis=alt.Axis(format='%')).title('% of age group enrolled in Medicaid'),
    x=alt.X('expansion:O', axis=alt.Axis(labels=False)).title(''),

    color=alt.Color('expansion:N', 
        legend=alt.Legend(title = 'Expansion Status')).scale(range = ['blue', 'red'])
).properties(
    title={
          "text": ["Medicaid enrollment by expansion status and age group"]
        }
).configure_title(anchor='middle').configure_axis(grid=False).configure_view(strokeWidth = 1)


In [30]:
# Stacked bar chart of % of Medicaid caseload that are adults vs. children in each state

# Chat GPT: I couldn't figure out how to put borders on the expansion states only
    # asked how to create a condition to put boxes on a certain set of stacked bars 
    # in an Altair stacked bar chart
    # It returned basically exactly this code (except with dummy variables)
non_expansion = ['AL', 'FL', 'GA', 'KS', 'MS', 'SC',
                 'TN', 'TX', 'WI', 'WY']

non_expansion_condition = alt.datum.state == non_expansion[0]
for state_abbrev in non_expansion[1:]:
    non_expansion_condition = non_expansion_condition | (alt.datum.state_abbrev == state_abbrev)
# End of Chat GPT

# Chat GPT: I also couldn't figure out how to order by proportion of children
data = by_age_count[by_age_count['year'] == 2023]

total_sum = data.groupby('state')['number'].sum().reset_index(name='total_sum')

children_sum = data[data['group'] == 'Children (<18)'].groupby('state')['number'].sum().reset_index(name='children_sum')

merged_counts = pd.merge(total_sum, children_sum, on='state', how='left').fillna(0)

merged_counts['proportion_children'] = merged_counts['children_sum'] / merged_counts['total_sum']

data_counts = pd.merge(data, merged_counts, on='state', how='left').fillna(0)
# End of Chat GPT

data_counts

# Stacked bar chart
group = alt.Chart(data_counts).mark_bar().encode(
    x=alt.X('state_abbrev', sort=alt.EncodingSortField(
        field="proportion_children", order="descending")).title('State'),
    y=alt.Y('sum(number):Q').stack("normalize").title(
        '% of Medicaid population in each age group'),
    color=alt.Color('group:N', 
        legend=alt.Legend(title = 'Age Group')).scale(scheme = 'purplegreen'),
    # Borders are red for non-expansion states
    stroke=alt.condition( 
        non_expansion_condition,
        alt.value('red'),
        alt.value('transparent')
    ) , strokeWidth = alt.value(3),
    fillOpacity=alt.value(1)
)

alt.layer(group).properties(title = {
          "text": ['State-level Medicaid caseloads by age group']
}).configure_title(anchor='middle')

In [32]:
# Line charts of changes over time in % enrolled in Medicaid, labeled by expansion status

# Points for Medicaid enrollment in each year/state combination
points = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_point(
    strokeWidth = .5).encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    y=alt.Y('medicaid_perc:Q').title(['% of state population enrolled in Medicaid']),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    opacity = alt.value(0.25)
)

# Lines for Medicaid enrollment in each state across years
lines = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_line(
    strokeWidth = .5).encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    alt.Y('medicaid_perc:Q'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    opacity = alt.value(0.25),
    detail = 'state'
)

# Lines for averages by expansion status across years
averages = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_line(
    strokeWidth = 5).encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    alt.Y('mean(medicaid_perc):Q'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status'),
                   scale=alt.Scale(domain=['Adopted', 'Not Adopted'], 
                                   range=['blue', 'red']))
)

# Labels for the expansion and non-expansion average lines
exp = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_text(
    align = 'center').encode(
    x = alt.value(350), y=alt.datum(16), 
    text=alt.value(['Expansion', 'State Average']))

nonexp = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_text(
    align = 'center').encode(
    x = alt.value(350), y=alt.datum(13), 
    text=alt.value(['Non-Expansion', 'State Average']))

alt.layer(points, averages, lines, exp, nonexp).properties(title = {
          "text": ['Medicaid enrollment over time by expansion status'],
          'subtitle': ['State-level Medicaid enrollment is largely stable over time',
                      ]
})

In [33]:
# Line charts of changes over time in % enrolled in any public insurance under 138%, labeled by expansion status

# Points for public health insurance enrollment in each year/state combination
points = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_point(
    strokeWidth = .5).encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    y=alt.Y('public_under138_perc:Q', scale=alt.Scale(domain=[30, 90])).title(
        ['% of state low-income population', 'in public health insurance']),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    opacity = alt.value(0.25)
)

# Lines for public health insurance enrollment in each state across years
lines = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_line(
    strokeWidth = .5).encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    alt.Y('public_under138_perc:Q'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    opacity = alt.value(0.25),
    detail = 'state'
)

# Lines for averages by expansion status across years
averages = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_line(strokeWidth = 5).encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    alt.Y('mean(public_under138_perc):Q'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status'),
                   scale=alt.Scale(domain=['Adopted', 'Not Adopted'], 
                                   range=['blue', 'red']))
)

# Labels for the expansion and non-expansion average lines
exp = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_text(
    align = 'center').encode(
    x = alt.value(350), y=alt.datum(72), 
    text=alt.value(['Expansion', 'State Average']))

nonexp = alt.Chart(acs_data[acs_data['year'] != 2014]).mark_text(
    align = 'center').encode(
    x = alt.value(350), y=alt.datum(61), 
    text=alt.value(['Non-Expansion', 'State Average']))

alt.layer(points, averages, lines, exp, nonexp).properties(title = {
          "text": ['Low-income population public health insurance',
                  ' enrollment over time by expansion status'],
          'subtitle': ['Public health insurance enrollment is largely stable over time']
})

In [27]:
# Percentage in public coverage under 138% by expansion status and year
alt.Chart(acs_data).mark_bar().encode(
    x=alt.X('expansion:O', axis=alt.Axis(labels=False, ticks = False)).title(''),
    y=alt.Y('mean(public_under138_perc):Q').title(
        ['% of low-income population', 'in any public health insurance']),
    color=alt.Color('expansion:N', 
                    legend=alt.Legend(title = 'Expansion Status', orient='bottom',
        direction='horizontal', titleAnchor='middle')
                   ).scale(range = ['blue', 'red']),
    column=alt.Column('year:N', header=alt.Header(titleOrient='bottom'))
).properties(
    title={
          "text": ["Low-income public health coverage by expansion status across years"]
        },
    width=50,
    height=300
).configure_title(anchor='middle').configure_axis(grid=False).configure_view(strokeWidth = 1)