In [1]:
import pandas as pd
import numpy as np 
import os
import altair as alt
import tabula 
import html5lib

In [2]:
pd.options.mode.chained_assignment = None

In [3]:
insurance_dfs = []
type_dfs = []
files = [f for f in os.listdir("../data") if f.endswith('html')]

for file in files:
    by_insurance = pd.read_html(f"../data/{file}")[-1]
    by_disability = pd.read_html(f"../data/{file}")[-2]
    
    disabled = by_insurance.iloc[1:8, :]
    disabled['disability_status'] = 'disabled'
    vals = disabled[disabled['Disability Status/ Insurance Type'] == 'Uninsured'].values[0]
    pct_insured = 100 - float(vals[1])
    disabled_insured_row = ['Insured', pct_insured]
    disabled_insured_row.extend(vals[2:])
    disabled_insured_row = pd.DataFrame([disabled_insured_row], columns = disabled.columns)
    disabled = pd.concat([disabled, disabled_insured_row])
    
    not_disabled = by_insurance.iloc[9:, :]
    not_disabled['disability_status'] = 'not_disabled'
    vals = not_disabled[not_disabled['Disability Status/ Insurance Type'] == 'Uninsured'].values[0]
    pct_insured = 100 - float(vals[1])
    not_disabled_insured_row = ['Insured', pct_insured]
    not_disabled_insured_row.extend(vals[2:])
    not_disabled_insured_row = pd.DataFrame([not_disabled_insured_row], columns = not_disabled.columns)
    not_disabled = pd.concat([not_disabled, not_disabled_insured_row])
    
    
    by_insurance = pd.concat([not_disabled, disabled])
    by_insurance['year'] = int(file.split("_")[-1][:4])
    
    by_disability['year'] = int(file.split("_")[-1][:4])
    
    insurance_dfs.append(by_insurance)
    type_dfs.append(by_disability)

    
df = pd.concat(insurance_dfs).reset_index(drop = True)
disability_df = pd.concat(type_dfs).reset_index(drop = True)

In [4]:
disability_df['pct_uninsured'] = 1.0 - disability_df['Percent'].astype(float) / 100.0

disability_df = disability_df.rename(columns = {'Disability Type': 'type'})

In [5]:
df['percent_new'] = df['Percent'].astype(float) / 100.0
df = df.rename(columns = {'Disability Status/ Insurance Type': 'type'})

In [6]:
insured = df[(df.type == 'Insured') & (df.disability_status == 'disabled')]
uninsured = df[(df.type == 'Uninsured') & (df.disability_status == 'disabled')]
employer = df[(df.type == 'Employer/Union') & (df.disability_status == 'disabled')]
purchased = df[(df.type == 'Purchased') & (df.disability_status == 'disabled')]
medicare = df[(df.type == 'Medicare') & (df.disability_status == 'disabled')]
medicaid = df[(df.type == 'Medicaid') & (df.disability_status == 'disabled')]

In [7]:
disabled = df[(df.disability_status == 'disabled') & (df.type != 'Insured') & (df.type != 'Uninsured')]

all_uninsured = df[df.type == 'Uninsured']

In [8]:
#visualization 1
highlight = alt.selection_single(on = 'mouseover', fields = ['type'], nearest = True)
color_condition = alt.condition(highlight, alt.Color('type:N'), alt.value('gray'))

nearest_val = alt.selection_single(on = 'mouseover', nearest = True, fields = ['year'], empty = 'none')
opacity_condition = alt.condition(nearest_val, alt.value(1.0), alt.value(0.0))
text_condition = alt.condition(nearest_val, alt.Text('percent_new:Q'), alt.value(0.0))

aca_df = pd.DataFrame({'year': [2010]})
aca_effect_df = pd.DataFrame({'year': [2014]})

uninsured_rates = alt.Chart(all_uninsured).mark_line().encode(
    x = alt.X('year:O', axis = alt.Axis(title = 'Year')), 
    y = alt.Y('percent_new:Q', axis = alt.Axis(format = '%', title = 'Percent Uninsured')), 
    color = alt.Color('disability_status:O')
)

aca_passage = alt.Chart(aca_df).mark_rule(
    color = 'tomato', 
    size = 2.5
).encode(
    x = alt.X('year:O')
)

aca_passage_text = alt.Chart(aca_df).mark_text(
    lineBreak = r'\n',
    text = r"2010: Passage of \nthe Affordable Care Act",
    align = 'left',
    fontSize = 10, 
    dy = 135, 
    dx = -95
)

aca_effect = alt.Chart(aca_effect_df).mark_rule(
    color = 'tomato', 
    size = 2.5
).encode(
    x = alt.X('year:O')
)

aca_effect_text = alt.Chart(aca_effect_df).mark_text(
    lineBreak = r'\n',
    text = r'2014: Most ACA \nProvisions go into Effect', 
    align = 'left',
    fontSize = 10, 
    dy = 135, 
    dx = 20
)

rate_points = alt.Chart(all_uninsured).add_selection(
    nearest_val
).mark_point(
    filled = True, 
    size = 50
).encode(
    x = alt.X('year:O'), 
    y = alt.Y('percent_new:Q'), 
    opacity = opacity_condition, 
    color = alt.Color('disability_status:N')
)

rate_labels = alt.Chart(all_uninsured).mark_text(
    fontSize = 12, 
    dx = 7, 
    dy = 10, 
).encode(
    x = alt.X('year:O'), 
    y = alt.Y('percent_new:Q'), 
    text = alt.Text('percent_new:Q', format = '.1%')
).transform_filter(nearest_val)

c1 = alt.layer(uninsured_rates, aca_passage, aca_effect, aca_passage_text, aca_effect_text, rate_points, rate_labels)

c1

In [9]:
#visualization 2

insured_lines = alt.Chart(disabled).mark_line().encode(
    x = alt.X('year:O'),
    y = alt.Y('percent_new:Q'), 
    color = alt.Color('type:N')
)

insured_points = alt.Chart(disabled).add_selection(
    nearest_val
).mark_point(
    filled = True, 
    size = 50
).encode(
    x = alt.X('year:O'), 
    y = alt.Y('percent_new:Q'), 
    opacity = opacity_condition, 
    color = alt.Color('disability_status:N')
)

insured_labels = alt.Chart(disabled).mark_text(
    fontSize = 12, 
    dx = 7, 
    dy = 10, 
).encode(
    x = alt.X('year:O'), 
    y = alt.Y('percent_new:Q'), 
    text = alt.Text('percent_new:Q', format = '.1%')
).transform_filter(nearest_val)

c2 = alt.layer(insured_points, insured_lines, insured_labels)
c2

In [10]:
#visualization 1
highlight = alt.selection_single(on = 'mouseover', fields = ['type'], nearest = True)
color_condition = alt.condition(highlight, alt.Color('type:N'), alt.value('gray'))

nearest_val = alt.selection_single(on = 'mouseover', nearest = True, fields = ['year'], empty = 'none')
opacity_condition = alt.condition(nearest_val, alt.value(1.0), alt.value(0.0))
text_condition = alt.condition(nearest_val, alt.Text('pct_uninsured:Q'), alt.value(0.0))

aca_df = pd.DataFrame({'year': [2010]})
aca_effect_df = pd.DataFrame({'year': [2014]})

uninsured_rates = alt.Chart(disability_df).transform_filter(
    alt.datum.type == 'Any Disability'
).mark_line(
    strokeWidth = 5.5
).encode(
    x = alt.X('year:O', axis = alt.Axis(title = 'Year')), 
    y = alt.Y('pct_uninsured:Q', axis = alt.Axis(format = '%', title = 'Percent Uninsured')), 
    color = alt.Color('type:N')
)

cognitive_rate = alt.Chart(disability_df).transform_filter(
    alt.datum.type == 'Cognitive'
).mark_line().encode(
    x = alt.X('year:O'), 
    y = alt.Y('pct_uninsured:Q'), 
    color = alt.value('lightblue')
)


visual_rate = alt.Chart(disability_df).transform_filter(
    alt.datum.type == 'Visual'
).mark_line().encode(
    x = alt.X('year:O'), 
    y = alt.Y('pct_uninsured:Q'), 
    color = alt.value('lightgrey')
)

hearing_rate = alt.Chart(disability_df).transform_filter(
    alt.datum.type == 'Hearing'
).mark_line().encode(
    x = alt.X('year:O'), 
    y = alt.Y('pct_uninsured:Q'), 
    color = alt.value('lightgrey')
)

ambulatory_rate = alt.Chart(disability_df).transform_filter(
    alt.datum.type == 'Ambulatory'
).mark_line().encode(
    x = alt.X('year:O'), 
    y = alt.Y('pct_uninsured:Q'), 
    color = alt.value('lightgrey')
)

independent_living_rate = alt.Chart(disability_df).transform_filter(
    alt.datum.type == 'Independent Living'
).mark_line().encode(
    x = alt.X('year:O'), 
    y = alt.Y('pct_uninsured:Q'), 
    color = alt.value('lightgray')
)

aca_passage = alt.Chart(aca_df).mark_rule(
    color = 'tomato', 
    size = 2.5
).encode(
    x = alt.X('year:O')
)

aca_passage_text = alt.Chart(aca_df).mark_text(
    lineBreak = r'\n',
    text = r"2010: Passage of \nthe Affordable Care Act",
    align = 'left',
    fontSize = 10, 
    dy = 135, 
    dx = -95
)

aca_effect = alt.Chart(aca_effect_df).mark_rule(
    color = 'tomato', 
    size = 2.5
).encode(
    x = alt.X('year:O')
)

aca_effect_text = alt.Chart(aca_effect_df).mark_text(
    lineBreak = r'\n',
    text = r'2014: Most ACA \nProvisions go into Effect', 
    align = 'left',
    fontSize = 10, 
    dy = 135, 
    dx = 20
)


cognitive_label = alt.Chart({'values':[{'x': 2009, 'y': 0.165}]}).mark_text(
    text = 'Cognitive Disabilities', 
    angle = 10, 
    fontSize = 8.5 
).encode(
    x = 'x:O', 
    y = 'y:Q'
)

c1 = alt.layer(cognitive_rate, visual_rate, hearing_rate, 
               ambulatory_rate, independent_living_rate, uninsured_rates, 
               aca_passage, aca_effect, aca_passage_text, aca_effect_text, 
               cognitive_label
              ).properties(
                    width = 600, 
                    height = 400
)

c1

In [98]:
info_box_str =   r'''The Affordable Care Act, also known as ObamaCare, was passed by congress in 2010. Among other provisions, the \n 
    ACA made it illegal for insurance companies to deny coverage based on pre-existing conditions. Before this, individuals \n
    with disabilities or other chronic health conditions could be denied coverage. Likewise, insurance providers may not \n 
    charge higher premiums on account of disabilities under new ACA rules. The ACA has significantly expanded insurance \n
    options for Americans with disabilities, which will hopefully improve their access to low-cost high-quality care from\n
    providers that are informed about disabilities and sensitive to disabled American's unique care needs. 
    \n\nSee more at: https://tinyurl.com/ACADisability'''

In [108]:
# fix rules at certain years
aca_df = pd.DataFrame({'year': [2010], 'val': [val_2010]})
aca_effect_df = pd.DataFrame({'year': [2014], 'val': [val_2014]})

# color scheme for types plot 
type_colors = ['lightblue', 'lightblue', 'lightblue', 'lightblue']

title = alt.TitleParams('Percent of Disabled Americans Without Insurance', 
                        subtitle = 'The percent of disabled people without insurnace has decreased by nearly 50% since the passage of the Affordable Care Act',
                        anchor = 'start',
                        dx = 45, 
                        fontSize = 22, 
                        subtitleFontSize = 14)

all_lines = alt.Chart(disability_df, title = title).transform_filter(
    alt.datum.type == 'Any Disability'
).mark_line(
    strokeWidth = 5
).encode(
    x = alt.X('year:O', axis = alt.Axis(title = 'Year', labelFontSize = 12, titleFontSize = 15)), 
    y = alt.Y('pct_uninsured:Q', 
              axis = alt.Axis(format = '%', title = 'Percent Uninsured', labelFontSize = 12, titleFontSize = 15)), 
    color = alt.Color('type:N', scale = alt.Scale(range = ['#712ac7']), 
                      legend = alt.Legend(title = 'Type of Disability', titleFontSize = 15, 
                                          labelFontSize = 12, labelFontWeight = 'bold', 
                                          symbolStrokeWidth = 10, symbolSize = 200,
                                          orient = "none", legendX = 815, legendY = 0))
)

type_lines = alt.Chart(disability_df).transform_filter(
    (alt.datum.type != 'Any Disability') & (alt.datum.type != 'No Disability') & 
    (alt.datum.type != 'Self-Care') & (alt.datum.type != 'Independent Living')
).mark_line(
    strokeWidth = 1.5
).encode(
    x = alt.X('year:O', axis = alt.Axis(title = 'Year')), 
    y = alt.Y('pct_uninsured:Q', axis = alt.Axis(format = '%', title = 'Percent Uninsured')), 
    color = alt.Color('type:N', scale = alt.Scale(range = type_colors), legend = None), 
    
    shape = alt.Shape('type:N', scale = alt.Scale(range = ['cross', 'circle','triangle-right', 'diamond']), 
                       legend = alt.Legend(title = '', labelFontSize = 12, labelOffset = 11,
                                           symbolFillColor = 'lightblue', symbolSize = 150,
                                           symbolStrokeColor = 'lightblue', orient = "none", 
                                           legendX = 819, legendY = 44)
                     )
)

aca_passage = alt.Chart(aca_df).mark_rule( 
    color = "#4254f5",
    size = 2.5
).encode(
    x = alt.X('year:O')
)

aca_passage_text = alt.Chart(aca_df).mark_text(
    lineBreak = r'\n',
    text = r"2010: Passage of \nthe Affordable Care Act",
    align = 'left',
    fontSize = 13.5, 
    fontWeight = 'bold',
    dy = -40,
    dx = -225
)

aca_effect = alt.Chart(aca_effect_df).mark_rule(
    color = "#4254f5",
    size = 2.5
).encode(
    x = alt.X('year:O')
)

aca_effect_text = alt.Chart(aca_effect_df).mark_text(
    lineBreak = r'\n',
    text = r'2014: Most ACA \nProvisions Go Into Effect', 
    align = 'left',
    fontSize = 13.5, 
    fontWeight = 'bold',
    dy = 95,
    dx = 45
)

layers = alt.layer(
    type_lines, all_lines, aca_passage, aca_effect, aca_passage_text, aca_effect_text
).resolve_scale(
    color = 'independent'
).properties(
    width = 800, 
    height = 500
)

box = alt.Chart({'values':[{}]}
).mark_rect(
    stroke = 'lightblue',
    strokeWidth = 2.5,
    color = '#f2f3f5'
).encode(
    x = alt.value(0),
    x2 = alt.value(800),
    y = alt.value(0),
    y2 = alt.value(155))

text = alt.Chart({'values':[{}]}
).mark_text(
    align = "left", 
    baseline = "top",
    fontSize = 14.5, 
    lineBreak = r"\n"
).encode(
    x = alt.value(10),  
    y = alt.value(10), 
    text = alt.value(info_box_str)
)

text_box = alt.layer(box, text)

alt.vconcat(layers, text_box, padding = 0).configure_point(size = 90)