<h1>SI 649 Midterm Project</h1>
<b>Haley Johnson</b></br>
<p>Code to create interactive visualization</p>

In [158]:
import pandas as pd
import numpy as np 
import os
import altair as alt

In [159]:
pd.options.mode.chained_assignment = None

In [160]:
df = pd.read_csv("../data/preventitive_care.csv")

<h2>Manipulate Data Frame</h2>

In [161]:
df = df.drop(columns = ['FootnoteText', 'Url', 'FootnoteType'])

In [207]:
preventitive_care = df[(df.Category == 'Prevention & Screenings') & (df.Year == 2020.0) & (df.Response == 'Yes')]

preventitive_care['Data_Value'] = preventitive_care.Data_Value / 100
preventitive_care['High_Confidence_Limit'] = preventitive_care.High_Confidence_Limit / 100
preventitive_care['Low_Confidence_Limit'] = preventitive_care.Low_Confidence_Limit / 100
preventitive_care['margin_of_error'] = preventitive_care['High_Confidence_Limit'] - preventitive_care['Data_Value']

In [208]:
preventitive_care['short_question'] = preventitive_care['Question'].replace('Mammogram in the past 2 years among females 50 to 74 years of age', 'Mammograms')
preventitive_care.short_question = preventitive_care.short_question.replace('Up-to-date cervical cancer screening among females 21 to 65 years of age', 'Cervical Cancer Screening')
preventitive_care.short_question = preventitive_care.short_question.replace('Up-to-date colorectal cancer screening among adults 50 to 75 years of age', 'Colorectcal Cancer Screening')
preventitive_care.short_question = preventitive_care.short_question.replace('Routine check-up in the past year among adults 18 years of age or older', 'Routine Check Up')
preventitive_care.short_question = preventitive_care.short_question.replace('Visited a dentist in the past year among adults 18 years of age or older', 'Dentist Visit')
preventitive_care.short_question = preventitive_care.short_question.replace('Had a flu vaccine in the past 12 months among adults 18 years of age or older', 'Flu Vaccine')

In [209]:
disabled_mean = pd.DataFrame(preventitive_care[preventitive_care.Stratification1 == 'Any Disability']\
                .groupby('Question')['Data_Value'].mean())

disabled_mean = disabled_mean.reset_index()

vals = ['Data_Value', 'High_Confidence_Limit', 'Low_Confidence_Limit', 'margin_of_error']
mean_by_type = preventitive_care[preventitive_care.Stratification1 != 'Any Disability']\
               .groupby(['Stratification1', 'Question', 'short_question'])[vals].mean()

mean_by_type = mean_by_type.reset_index()

mean_by_type = mean_by_type.merge(disabled_mean, on = 'Question', suffixes = ('', '_Disabled_Mean'))
mean_by_type['annotation_indicator'] = mean_by_type['Data_Value_Disabled_Mean'] - mean_by_type['Data_Value_Disabled_Mean']

<h2>Visualization</h2>

In [210]:
questions = list(mean_by_type.short_question.unique())
zero_df = pd.DataFrame({'x': [0]})
sorting = ['Cognitive Disability', 'Hearing Disability', 'Mobility Disability', 'Vision Disability', 'No Disability']

lines = alt.Chart(mean_by_type).mark_rule(
    #color = 'blue', 
    size = 3
).encode(
    alt.Y('Stratification1:N', sort = sorting,
          axis = alt.Axis(title = 'Type of Disability', titleFontSize = 13, labelFontSize = 10.5)
         ), 
    alt.X('High_Confidence_Limit:Q'),
    alt.X2('Low_Confidence_Limit:Q'),
    color = alt.Color('Stratification1:N', legend = alt.Legend(title = 'Disability Status', titleFontSize = 15, labelFontSize = 12)),
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.1%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.1%')]
)

mean = alt.Chart(mean_by_type).mark_point(
    filled = True,
    size = 50
).encode(
    y = alt.Y('Stratification1:N', sort = sorting), 
    x = alt.X('Data_Value:Q'), 
    color = alt.Color('Stratification1:N')
)


zero = alt.Chart(mean_by_type).mark_rule(
    color = 'lightblue', 
    size = 2.5
).encode(
    x = alt.X('Data_Value_Disabled_Mean'),
    tooltip = alt.Tooltip('Data_Value_Disabled_Mean:Q', title = 'Average For Disabled People', format = '.0%')
)

mean_annotation_text = alt.Chart(mean_by_type).mark_text(
    fontSize = 9.5, 
    dx = 9, 
    dy = 10, 
    align = 'left',
    text = 'Average For Disabled People'
).encode(
    x = alt.X('Data_Value_Disabled_Mean:Q')
)


In [211]:
sub_charts_components = []

for question in questions:
    sub_chart_mean = mean.transform_filter(alt.datum.short_question == question)
    sub_chart_line = lines.transform_filter(alt.datum.short_question == question)
    sub_chart_zero = zero.transform_filter(alt.datum.short_question == question)
    sub_chart_annotation = mean_annotation_text.transform_filter(alt.datum.Question == question)
    sub_charts_components.append((question, sub_chart_mean, sub_chart_line, sub_chart_zero, sub_chart_annotation))

In [212]:
selection_1 = alt.selection_single(encodings = ['color'], on = 'mouseover', nearest = True)
color_condition = alt.condition(selection_1, alt.Color("Stratification1:N"), alt.value('gray'))


domains = [[0.35, 0.55], [0.6, 0.9], [0.6, 0.9], [0.7, 0.9], [0.6, 0.9], [0.4, 0.8]]
sub_charts = []

for i in range(len(sub_charts_components)):

    q, m, l, z, a = sub_charts_components[i]
    if (i == 4) or (i == 5):
        title = 'Percent Up To Date'
    else:
        title = ''
    domain = domains[i]
    l = l.add_selection(selection_1)
    l = l.encode(x = alt.X('High_Confidence_Limit:Q', scale = alt.Scale(domain = domain, zero = False, bins = alt.ScaleBins(step = 0.02)), 
                           axis = alt.Axis(title = title, titleFontSize = 13, labelFontSize = 11, format = '.0%')), 
                 color = color_condition)
    m = m.encode(color = color_condition)
    sub_chart = alt.layer(m, l, z, a).properties(title = q)
    sub_charts.append(sub_chart)

In [218]:
r1 = alt.hconcat(sub_charts[0], sub_charts[1], spacing = 35)
r2 = alt.hconcat(sub_charts[2], sub_charts[3], spacing = 35)
r3 = alt.hconcat(sub_charts[4], sub_charts[5], spacing = 35)


top = alt.vconcat(r1, r2, r3).properties(
    title = alt.TitleParams("Many Disabled People Aren't Up To Date on Important Preventitive Care", 
    subtitle = 'Populaton adjusted prevelance of care',
    fontSize = 26, subtitleFontSize = 16))

In [221]:
coords = alt.selection_single(encodings = ['x', 'y'], on = 'mouseover', nearest = True)

base = alt.Chart(mean_by_type).add_selection(coords).mark_rect(
    color = '#fff'
).encode(
    y = alt.Y('short_question:N'),
    x = alt.X('Stratification1:N'), 
    color = alt.condition(coords, alt.Color('short_question:N'), alt.value('lightgray'))
)

text = alt.Chart(mean_by_type).mark_text(
    baseline = 'middle'
).encode(
    y = alt.Y('short_question:N'), 
    x = alt.X('Stratification1:N'),
    text = alt.Text('Data_Value:Q', format=".0%"),
)

bottom = alt.layer(base, text).properties(width = 800)

In [222]:
alt.vconcat(top, bottom)


In [285]:
sorting = ['Any Disability', 'Cognitive Disability', 'Hearing Disability', 'Mobility Disability',
           'Vision Disability', 'No Disability']


main_title = alt.TitleParams("Many Disabled People Aren't Up To Date on Important Preventitive Care", 
                         subtitle = '''Population-adjusted prevelance of preventitive care''',
                         anchor = 'start',
                         dx = 45, 
                         fontSize = 22, 
                         subtitleFontSize = 14, 
                         subtitlePadding = 6)

strats = list(preventitive_care.Stratification1.unique())
strats_dropdown = alt.binding_select(options = strats, name = "Disability Type: ")
strats_select = alt.selection_single(fields = ['Stratification1'], init = {'Stratification1' : 'Any Disability'}, bind = strats_dropdown, empty = 'none')

coords = alt.selection_single(encodings = ['x', 'y'], on = 'mouseover', nearest = True, empty = 'none')


l = alt.Chart(preventitive_care, title = main_title).add_selection(
    strats_select
).mark_rule(
    size = 6, 
    color = '#4676f0'
).encode(
    alt.Y('short_question:N', axis = alt.Axis(title = 'Percent Up To Date', labelFontSize = 11.5, titleFontSize = 13.5)), 
    alt.X('High_Confidence_Limit:Q', scale = alt.Scale(domain = [0.35, 0.85], zero = False), 
          axis = alt.Axis(title = 'Care Type', format = '.0%', labelFontSize = 11.5, titleFontSize = 13.5)), 
    alt.X2('Low_Confidence_Limit:Q'),
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.2%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.2%')]
).transform_filter(strats_select)

m = alt.Chart(preventitive_care).add_selection(
    coords
).mark_point(
    filled = True,
    size = 125, 
    color = '#4676f0'
).encode(
    y = alt.Y('short_question:N', axis = alt.Axis(labelFontSize = 100)), 
    x = alt.X('Data_Value:Q'), 
    tooltip = [alt.Tooltip('Data_Value:Q', title = 'Percent Up To Date', format = '.2%'), 
               alt.Tooltip('margin_of_error:Q', title = 'Margin of Error', format = '.2%')]
).transform_filter(strats_select)

top = alt.layer(l, m
).properties(
    width = 600, 
    height = 300
).interactive()


base = alt.Chart(preventitive_care).mark_rect(
    color = '#fff'
).encode(
    y = alt.Y('short_question:N', axis = alt.Axis(title = 'Care Type', labelFontSize = 10.5, titleFontSize = 13)),
    x = alt.X('Stratification1:N', sort = sorting, axis = alt.Axis(title = '', labelFontSize = 10.5)), 
    color = alt.condition(coords, alt.value('#7396f0'), alt.value('#efefef'))
)

text = alt.Chart(preventitive_care).mark_text(
    baseline = 'middle'
).encode(
    y = alt.Y('short_question:N'), 
    x = alt.X('Stratification1:N', sort = sorting),
    text = alt.Text('Data_Value:Q', format = ".1%"),
)

bottom_title = alt.TitleParams('Data Table:', anchor = 'start', fontSize = 13, dx = 162)
bottom = alt.layer(base, text).properties(width = 600, title = bottom_title)

alt.vconcat(top, bottom).configure_axisX(labelAngle = 0)

