In [1]:
import pandas as pd
import altair as alt
import numpy as np
import vega_datasets
from vega_datasets import data

In [2]:
acs_data = pd.read_csv("../data/acs_data.csv")

acs_data = acs_data[acs_data['state'] != "Puerto Rico"]

acs_2023 = acs_data[acs_data['year'] == 2023]

acs_data['year'] = acs_data['year'].astype(str)

acs_data['male_u6_medicaid_perc'] = acs_data['male_u6_medicaid'] / acs_data['male_u6']
acs_data['male_617_medicaid_perc'] = acs_data['male_617_medicaid'] / acs_data['male_617']
acs_data['male_1824_medicaid_perc'] = acs_data['male_1824_medicaid'] / acs_data['male_1824']
acs_data['male_2534_medicaid_perc'] = acs_data['male_2534_medicaid'] / acs_data['male_2534']
acs_data['male_3544_medicaid_perc'] = acs_data['male_3544_medicaid'] / acs_data['male_3544']
acs_data['male_4554_medicaid_perc'] = acs_data['male_4554_medicaid'] / acs_data['male_4554']
acs_data['male_5564_medicaid_perc'] = acs_data['male_5564_medicaid'] / acs_data['male_5564']

acs_data['female_u6_medicaid_perc'] = acs_data['female_u6_medicaid'] / acs_data['female_u6']
acs_data['female_617_medicaid_perc'] = acs_data['female_617_medicaid'] / acs_data['female_617']
acs_data['female_1824_medicaid_perc'] = acs_data['female_1824_medicaid'] / acs_data['female_1824']
acs_data['female_2534_medicaid_perc'] = acs_data['female_2534_medicaid'] / acs_data['female_2534']
acs_data['female_3544_medicaid_perc'] = acs_data['female_3544_medicaid'] / acs_data['female_3544']
acs_data['female_4554_medicaid_perc'] = acs_data['female_4554_medicaid'] / acs_data['female_4554']
acs_data['female_5564_medicaid_perc'] = acs_data['female_5564_medicaid'] / acs_data['female_5564']

acs_data['black_under6_perc'] = acs_data['black_under6_with'] / acs_data['black_under6']
acs_data['black_617_perc'] = acs_data['black_617_with'] / acs_data['black_617']
acs_data['black_1824_perc'] = acs_data['black_1824_with'] / acs_data['black_1824']
acs_data['black_2534_perc'] = acs_data['black_2534_with'] / acs_data['black_2534']
acs_data['black_3544_perc'] = acs_data['black_3544_with'] / acs_data['black_3544']
acs_data['black_4554_perc'] = acs_data['black_4554_with'] / acs_data['black_4554']
acs_data['black_5564_perc'] = acs_data['black_5564_with'] / acs_data['black_5564']

acs_data['white_under6_perc'] = acs_data['white_under6_with'] / acs_data['white_under6']
acs_data['white_617_perc'] = acs_data['white_617_with'] / acs_data['white_617']
acs_data['white_1824_perc'] = acs_data['white_1824_with'] / acs_data['white_1824']
acs_data['white_2534_perc'] = acs_data['white_2534_with'] / acs_data['white_2534']
acs_data['white_3544_perc'] = acs_data['white_3544_with'] / acs_data['white_3544']
acs_data['white_4554_perc'] = acs_data['white_4554_with'] / acs_data['white_4554']
acs_data['white_5564_perc'] = acs_data['white_5564_with'] / acs_data['white_5564']

by_age = pd.read_csv("../data/by_age.csv")
by_age = by_age[by_age['state'] != "Puerto Rico"]

by_age_count = pd.read_csv("../data/by_age_count.csv")
by_age_count = by_age_count[by_age_count['state'] != "Puerto Rico"]


In [3]:
# 1. Scatterplot of % of population in Medicaid vs. % of population in public health insurance
alt.Chart(acs_2023).mark_point(color='purple').encode(
        alt.X('medicaid_perc:Q').title('Percentage of population enrolled in Medicaid'),
        alt.Y('public_perc:Q').title('Percentage of population enrolled in any public health insurance'),
        size="total:Q",
        color=alt.Color("expansion:N", legend=alt.Legend(title = 'Expansion Status'))
    ).properties(title = 'Medicaid enrollment vs. Public health insurance enrollment')

In [4]:
# 2. Scatterplot of % of population below vs. above 138% of FPL in any public health insurance
alt.Chart(acs_2023).mark_point(color='purple').encode(
        alt.X('public_under138_perc:Q').title('Percentage of below 138% of FPL in any public health insurance'),
        alt.Y('public_above138_perc:Q').title('Percentage of above 138% of FPL in any public health insurance'),
        size="total:Q",
        color=alt.Color("expansion:N", legend=alt.Legend(title = 'Expansion Status'))
    ).properties(title = 'Public health insurance enrollment: population above 138% of Federal Poverty Level (FPL) vs. population below 138% of FPL')

In [5]:
# 3. Chloropleth of proportion of population in Medicaid in each state
    # Add a marker of the expansion states?
states = alt.topo_feature(data.us_10m.url, 'states')

states_df = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt', 
                        sep="|",  dtype='str',  header=0, 
                        names=['state_fips', 'state', 'state_name', 'StateENS'],
                        usecols=['state_fips', 'state_name', 'state']).set_index('state')

joined = acs_data.join(states_df).reset_index()

plot = alt.Chart(states).mark_geoshape(stroke='lightgrey',
    strokeWidth=.5).encode(
    color='medicaid_perc:Q'
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(joined, 'id', ['medicaid_perc'])
).properties(
    width=700, height=400
).project(type='albersUsa'
)

outline = alt.Chart(states).mark_geoshape(
    stroke='black', fillOpacity=0).project(
    type='albersUsa'
).properties(
    width=700, height=400
)

alt.layer(plot,outline).properties(
    title='Proportion of population in Medicaid by state'
)

In [6]:
# 3. Proportion of Medicaid by state: bar chart
    # Going to go with the chloropleth instead, I think
df_aggregated = (
    acs_data.groupby('state')['medicaid_perc'].mean().rename("medicaid_perc").reset_index() )

base = alt.Chart(df_aggregated)
chart = base.mark_bar().encode(
    x=alt.Y("state:N").title('State'),
    y=alt.X('medicaid_perc:Q').title("Percentage of population enrolled in Medicaid"),
    ) 
line = base.mark_rule(strokeDash=[2, 2]).encode(
    y=alt.datum(.5) )

chart.properties(title = 'Proportion of population in Medicaid by state')

In [7]:
# 4. Percentage in Medicaid and in public coverage under 138% by expansion status and year
alt.Chart(acs_data).mark_bar().encode(
    x=alt.X('expansion:O').title(''),
    y=alt.Y('mean(public_under138_perc):Q').title('Percentage of population underneath 138% of FPL enrolled in public health insurance'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    column='year:N'
).properties(title = 'Percentage of low-income individuals in public coverage by expansion status across years')

In [8]:
alt.Chart(acs_data).mark_bar().encode(
    x=alt.X('expansion:O').title(''),
    y=alt.Y('mean(medicaid_perc):Q').title('Percentage of population enrolled in Medicaid'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    column='year:N'
).properties(title = 'Percentage of population in Medicaid by expansion status across years')

In [9]:
# 5. Bar charts comparing expansion vs. non-expansion in each age/gender combination, small multiples
alt.Chart(by_age[(by_age['year'] == 2023)]).mark_bar().encode(
    x=alt.X('expansion:O').title(''),
    y=alt.Y('mean(percentage):Q').title('Percentage of age group enrolled in Medicaid'),
    column=alt.Column(
    'age:N',
    sort=['u6', '617', '1824', '2534', '3544', '4554', '5564'],
    header=alt.Header(title=None, labelOrient='bottom') ),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    row="gender:N"
).properties(title = 'Percentage of population in Medicaid by age group and gender')


In [10]:
# 6. Stacked bar charts of the age distribution of Medicaid by state and gender
    # Group into children vs. adults?
    # Highlight the expansion states somehow?
alt.Chart(by_age_count[by_age_count['year'] == 2023]).mark_bar().encode(
    x=alt.X('state'),
    y=alt.Y('sum(number)').stack("normalize").title('Percentage of Medicaid enrollment in each age group'),
    color=alt.Color('age:N', legend=alt.Legend(title = 'Age Group')),
    row="gender:N"
).properties(title = 'Percentage of Medicaid population by age group and gender')

In [11]:
# 7. Line charts of changes over time in % enrolled in Medicaid, labeled by expansion status
    # Color by expansion status

alt.Chart(acs_data[acs_data['year'] != 2014]).mark_point().encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    y=alt.Y('medicaid_perc:Q').title('Percentage in Medicaid'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status'))
) + alt.Chart(acs_data[acs_data['year'] != 2014]).mark_line().encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    alt.Y('medicaid_perc:Q'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    detail = 'state'
) .properties(title = 'State-level trajectories of percentage of population in Medicaid population over time')

In [12]:
# 8. Line charts of changes over time in % enrolled in any public insurance under 138%, labeled by expansion status
    # Color by expansion status

alt.Chart(acs_data[acs_data['year'] != 2014]).mark_point().encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    y=alt.Y('public_under138_perc:Q').title('Percentage of population under 138% of FPL in public health insurance'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status'))
) + alt.Chart(acs_data[acs_data['year'] != 2014]).mark_line().encode(
    alt.X('year:T', axis=alt.Axis(format="%Y")),
    alt.Y('public_under138_perc:Q'),
    color=alt.Color('expansion:N', legend=alt.Legend(title = 'Expansion Status')),
    detail = 'state'
) .properties(title = 'State-level trajectories of percentage of population under 138% of FPL in public health insurance over time')