<h1>SI 649 Midterm Project</h1>
<b>Haley Johnson</b></br>
<p>Code to create interactive visualization</p>

In [2]:
import pandas as pd
import numpy as np 
import os
import altair as alt

In [3]:
pd.options.mode.chained_assignment = None

In [4]:
df = pd.read_csv("../data/preventitive_care.csv")

<h2>Manipulate Data Frame</h2>

In [5]:
df = df.drop(columns = ['FootnoteText', 'Url', 'FootnoteType'])

In [6]:
preventitive_care = df[(df.Category == 'Prevention & Screenings') & (df.Year == 2020.0) & (df.Response == 'Yes')]

preventitive_care['Data_Value'] = preventitive_care.Data_Value / 100
preventitive_care['High_Confidence_Limit'] = preventitive_care.High_Confidence_Limit / 100
preventitive_care['Low_Confidence_Limit'] = preventitive_care.Low_Confidence_Limit / 100
preventitive_care['margin_of_error'] = preventitive_care['High_Confidence_Limit'] - preventitive_care['Data_Value']

In [7]:
preventitive_care['short_question'] = preventitive_care['Question'].replace('Mammogram in the past 2 years among females 50 to 74 years of age', 'Mammograms')
preventitive_care.short_question = preventitive_care.short_question.replace('Up-to-date cervical cancer screening among females 21 to 65 years of age', 'Cervical Cancer Screening')
preventitive_care.short_question = preventitive_care.short_question.replace('Up-to-date colorectal cancer screening among adults 50 to 75 years of age', 'Colorectcal Cancer Screening')
preventitive_care.short_question = preventitive_care.short_question.replace('Routine check-up in the past year among adults 18 years of age or older', 'Routine Check Up')
preventitive_care.short_question = preventitive_care.short_question.replace('Visited a dentist in the past year among adults 18 years of age or older', 'Dentist Visit')
preventitive_care.short_question = preventitive_care.short_question.replace('Had a flu vaccine in the past 12 months among adults 18 years of age or older', 'Flu Vaccine')

<h2>Visualization</h2>

In [50]:
sorting = ['Any Disability', 'Cognitive Disability', 'Hearing Disability', 'Mobility Disability',
           'Vision Disability', 'No Disability']


main_title = alt.TitleParams("Many Disabled People Aren't Up To Date on Important Preventitive Care", 
                         subtitle = '''Population-adjusted prevelance of preventitive care''',
                         anchor = 'start',
                         dx = 45, 
                         fontSize = 22, 
                         subtitleFontSize = 16, 
                         subtitlePadding = 6)

strats = list(preventitive_care.Stratification1.unique())
strats_dropdown = alt.binding_select(options = strats, name = "Ability Type: ")
strats_select = alt.selection_single(fields = ['Stratification1'], init = {'Stratification1' : 'Any Disability'}, bind = strats_dropdown, empty = 'none')

coords = alt.selection_single(encodings = ['x', 'y'], on = 'mouseover', nearest = True, empty = 'none')


lines = alt.Chart(preventitive_care, title = main_title).add_selection(
    strats_select
).mark_rule(
    size = 6, 
    color = '#4676f0'
).encode(
    alt.Y('short_question:N', axis = alt.Axis(title = 'Care Type', labelFontSize = 11.5, titleFontSize = 13.5)), 
    alt.X('High_Confidence_Limit:Q', scale = alt.Scale(domain = [0.35, 0.87], zero = False), 
          axis = alt.Axis(title = 'Percent Up To Date', format = '.0%', labelFontSize = 11.5, titleFontSize = 13.5)), 
    alt.X2('Low_Confidence_Limit:Q'),
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.2%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.2%')]
).transform_filter(strats_select)

mean = alt.Chart(preventitive_care).add_selection(
    coords
).mark_point(
    filled = True,
    size = 125, 
    color = '#4676f0', 
    opacity = 1.0
).encode(
    y = alt.Y('short_question:N', axis = alt.Axis(labelFontSize = 100)), 
    x = alt.X('Data_Value:Q'), 
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.2%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.2%')]
).transform_filter(strats_select)

top = alt.layer(lines, mean
).properties(
    width = 750, 
    height = 300
).interactive()


text_base = alt.Chart(preventitive_care).mark_rect(
    color = '#fff'
).encode(
    y = alt.Y('short_question:N', axis = alt.Axis(title = 'Care Type', labelFontSize = 10.5, titleFontSize = 13)),
    x = alt.X('Stratification1:N', sort = sorting, axis = alt.Axis(title = '', labelFontSize = 10.5)), 
    color = alt.condition(coords, alt.value('#7396f0'), alt.value('#f7f9fc')
                          #alt.value('#f0f3f7')
                         )
)

text_marks = alt.Chart(preventitive_care).mark_text(
    baseline = 'middle'
).encode(
    y = alt.Y('short_question:N'), 
    x = alt.X('Stratification1:N', sort = sorting),
    text = alt.Text('Data_Value:Q', format = ".1%"),
)

bottom_title = alt.TitleParams('Data Table:', anchor = 'start', fontSize = 14, dx = 162, dy = -2)
bottom = alt.layer(text_base, text_marks).properties(width = 750, title = bottom_title)

alt.vconcat(top, bottom).configure_axisX(labelAngle = 0)



<h2>Other Version</h2>

In [14]:
vals = ['Data_Value', 'High_Confidence_Limit', 'Low_Confidence_Limit']
mean_by_type = preventitive_care[preventitive_care.Stratification1 != 'Any Disability']\
                .groupby(['Stratification1', 'Question', 'short_question'])[vals].mean()
mean_by_type = mean_by_type.reset_index()

mean_by_type['margin_of_error'] = mean_by_type['High_Confidence_Limit'] - mean_by_type['Data_Value']

In [29]:
mean_by_type['disabled_binary'] = mean_by_type['Stratification1'].apply(lambda s: np.where(s == 'No Disability', 
                                                                                           'No Disability', 
                                                                                           'Disabled'))


In [45]:
questions = list(mean_by_type.Question.unique())
sorting = ['Cognitive Disability', 'Hearing Disability', 'Mobility Disability', 
           'Vision Disability', 'No Disability']

coords = alt.selection_single(encodings = ['x', 'y'], on = 'mouseover', nearest = True, empty = 'none')


title = alt.TitleParams("Many Disabled People Aren't Up To Date on Important Preventitive Care", 
                        anchor = 'start', fontSize = 18, dx = 105)

select_care_type = alt.selection_single(
    fields = ['Question'], 
    init = {'Question' : questions[0]}, 
    bind = alt.binding_select(options = questions, name = 'Select Preventive Care Type: '), 
    on = 'dragover', 
    clear = 'false'
)

lines = alt.Chart(mean_by_type, title = title).mark_rule(
    size = 6
).encode(
    alt.Y('Stratification1:N', sort = sorting,
          axis = alt.Axis(title = 'Type of Disability', titleFontSize = 13, labelFontSize = 10.5)
         ), 
    alt.X('High_Confidence_Limit:Q', 
          axis = alt.Axis(title = 'Percent Up To Date', titleFontSize = 13, labelFontSize = 11, format = '.0%')
          , 
          scale = alt.Scale(domain = [0.30, 0.9], zero = False)
         ),
    alt.X2('Low_Confidence_Limit:Q'),
    color = alt.Color('disabled_binary:N', legend = alt.Legend(title = 'Disability Status', 
                                                              titleFontSize = 15, labelFontSize = 12)
                     ),
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.1%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.1%')]
).transform_filter(select_care_type)


mean = alt.Chart(mean_by_type).add_selection(select_care_type, coords).mark_point(
    filled = True,
    size = 125,  
    opacity = 1.0
).encode(
    y = alt.Y('Stratification1:N', sort = sorting), 
    x = alt.X('Data_Value:Q'), 
    color = alt.Color('disabled_binary:N'),
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.1%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.1%')]
).transform_filter(select_care_type)


zero = alt.Chart(mean_by_type).mark_rule(
    color = 'lightblue', 
    size = 2.5
).encode(
    x = alt.X('Data_Value_Disabled_Mean'),
    tooltip = alt.Tooltip('Data_Value_Disabled_Mean:Q', title = 'Average For Disabled People', format = '.0%')
).transform_filter(select_care_type)


c = alt.layer(lines, mean
).properties(
    width = 650, 
    height = 400
)

In [46]:
text_base = alt.Chart(mean_by_type).mark_rect(
    color = '#fff'
).encode(
    y = alt.Y('short_question:N', axis = alt.Axis(title = 'Care Type', labelFontSize = 10.5, titleFontSize = 13)),
    x = alt.X('Stratification1:N', sort = sorting, axis = alt.Axis(title = '', labelFontSize = 10.5)), 
    color = alt.condition(coords, alt.value('#7396f0'), alt.value('#f7f9fc'))
)

text_marks = alt.Chart(mean_by_type).mark_text(
    baseline = 'middle'
).encode(
    y = alt.Y('short_question:N'), 
    x = alt.X('Stratification1:N', sort = sorting),
    text = alt.Text('Data_Value:Q', format = ".1%"),
)

bottom_title = alt.TitleParams('Data Table:', anchor = 'start', fontSize = 14, dx = 162, dy = -2)
bottom = alt.layer(text_base, text_marks).properties(width = 650, title = bottom_title)


In [47]:
alt.vconcat(c, bottom).configure_axisX(labelAngle = 0)