In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_demographics_df,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
DATA_RELEASE = "2020-06-25"

In [None]:
df = read_full_demographics_df()
df.head();

In [None]:
# Replace missing values with np.nan
df = df.replace(-99, np.nan)
df = df.replace(-999, np.nan)

In [None]:
# Some race categories may have zero patients after summing across all sites.
zero_patients_by_race_df = (df.groupby("race").sum()["num_patients_all"] == 0).to_frame()
races_with_zero_patients = zero_patients_by_race_df.loc[zero_patients_by_race_df["num_patients_all"] == True].index.values.tolist()
df = df.loc[~df["race"].isin(races_with_zero_patients)]
df.head();

In [None]:
# Human-readable mappings
HR_COLNAME_MAP = {
    "sex": "Sex",
    "race": "Race",
    "age_group": "Age Group",
    "country": "Country"
}

HR_SEVERITY_MAP = {
    "all": "All",
    "ever_severe": "Ever Severe",
    "never_severe": "Never Severe"
}
HR_SEX_MAP = {
    "all": "All",
    "male": "Male",
    "female": "Female",
    "other": "Other",
}
HR_AGE_GROUP_MAP = {
    "all": "All",
    "00to02": "0 - 2",
    "03to05": "3 - 5",
    "06to11": "6 - 11",
    "12to17": "12 - 17",
    "18to25": "18 - 25",
    "26to49": "26 - 49",
    "50to69": "50 - 69",
    "70to79": "70 - 79",
    "80plus": "80 +",
    "other": "Other",
}
HR_RACE_MAP = {
    'all': 'All',
    'american_indian': 'American Indian or Alaska Native',
    'asian': 'Asian',
    'black': 'Black',
    'hawaiian_pacific_islander': 'Native Hawaiian or Other Pacific Islander',
    'hispanic_latino': 'Hispanic or Latino',
    'white': 'White',
    'other': 'Other',
}
HR_SEVERITY_COLNAME_MAP = {
    "num_patients_all": "All",
    "num_patients_ever_severe": "Ever Severe",
    "num_patients_never_severe": "Never Severe"
}

SEVERITY_VALUES = list(HR_SEVERITY_MAP.values())
SEX_VALUES = list(HR_SEX_MAP.values())
AGE_GROUP_VALUES = list(HR_AGE_GROUP_MAP.values())
RACE_VALUES = list(HR_RACE_MAP.values())
COUNTRY_VALUES = df["country"].unique().tolist()

In [None]:
df["sex"] = df["sex"].replace(HR_SEX_MAP)
df["race"] = df["race"].replace(HR_RACE_MAP)
df["age_group"] = df["age_group"].replace(HR_AGE_GROUP_MAP)

In [None]:
df["num_patients_never_severe"] = df["num_patients_all"] - df["num_patients_ever_severe"]

In [None]:
overall_df = df.groupby(["sex", "race", "age_group"]).sum().reset_index()
overall_df.head();

In [None]:
country_df = df.groupby(["country", "color", "sex", "race", "age_group"]).sum().reset_index()
country_df.head();

In [None]:
country_color_map = dict(zip(country_df["country"].values.tolist(), country_df["color"].values.tolist()))

In [None]:
def compute_ci(df):
    df["p"] = df["num_patients_ever_severe"] / df["num_patients_all"]
    df["n"] = df["num_patients_all"]

    def calculate_ci(row, upper=False):
        try:
            return row["p"] + ((1 if upper else -1) * (1.96 * np.sqrt((row["p"]*(1 - row["p"])) / row["n"])))
        except ZeroDivisionError:
            return np.nan

    df["ci_95_lower"] = df.apply(lambda row: calculate_ci(row, upper=False), axis=1)
    df["ci_95_upper"] = df.apply(lambda row: calculate_ci(row, upper=True), axis=1)

    df["np"] = df["n"] * df["p"]
    df["n1minusp"] = df["n"] * (1 - df["p"])

    df["p_x100"] = df["p"] * 100
    df["ci_95_lower_x100"] = df["ci_95_lower"] * 100
    df["ci_95_upper_x100"] = df["ci_95_upper"] * 100
    
    
    # Clip values < 0 and 100 >
    df["p_x100"] = df["p_x100"].clip(lower=0, upper=100)
    df["ci_95_lower_x100"] = df["ci_95_lower_x100"].clip(lower=0, upper=100)
    df["ci_95_upper_x100"] = df["ci_95_upper_x100"].clip(lower=0, upper=100)
    
    
    return df

In [None]:
df = compute_ci(df);
overall_df = compute_ci(overall_df);
country_df = compute_ci(country_df);

In [None]:
overall_df.head();

# Percentage Severe by Race and Sex, points with error bars, age group dropdown

In [None]:
chart = alt.Chart(overall_df)

column_width = 105

age_dropdown = alt.binding_select(options=AGE_GROUP_VALUES)
age_selection = alt.selection_single(fields=["age_group"], bind=age_dropdown, name="Age Group", init={"age_group": "All"})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values())) 

filtered_chart = chart.transform_filter(
    age_selection
)

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

top_bar = filtered_chart.mark_point(filled=True, size=90, color="gray").encode(
    x=alt.X("sex:N", axis=alt.Axis(title=None, labels=False, ticks=False)),
    shape=alt.Shape("sex:N", legend=alt.Legend(title="Sex", orient="bottom")),
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=column_width,height=300)


top_errorbar = filtered_chart.mark_errorbar().encode(
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

# Create a bottom bar plot to show country counts
bottom_bar = alt.Chart(country_df).transform_filter(
    age_selection
).mark_bar().encode(
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    color=alt.Color("country:N", legend=alt.Legend(title="Country", orient="bottom"), scale=country_color_scale),
    y=alt.Y("n:Q", axis=alt.Axis(title="# of patients")),
    tooltip=tooltip + [
        alt.Tooltip("country", title="Country"),
    ]
).properties(width=column_width,height=150)

top_chart = alt.layer(top_bar, top_errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(title=None)
    ),
)

bottom_chart = bottom_bar.facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(title=None, labels=False)
    ),
)



top_and_bottom_chart = alt.vconcat(top_chart, bottom_chart, spacing=10).properties(title={
    "text": ["Percentage Severe by Race and Sex"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

themed_top_and_bottom_chart = apply_theme(top_and_bottom_chart).configure_mark(
    color='orange'
).add_selection(
    age_selection
)

themed_top_and_bottom_chart

# Percentage Severe by Race and Sex, bars with error bars, age group dropdown

In [None]:
chart = alt.Chart(overall_df)

column_width = 105

age_dropdown = alt.binding_select(options=AGE_GROUP_VALUES)
age_selection = alt.selection_single(fields=["age_group"], bind=age_dropdown, name="Age Group", init={"age_group": "All"})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values())) 

filtered_chart = chart.transform_filter(
    age_selection
)

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

top_bar = filtered_chart.mark_bar(color="silver").encode(
    x=alt.X("sex:N", axis=alt.Axis(title=None, ticks=False, labels=False)),
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=column_width,height=300)


top_errorbar = filtered_chart.mark_errorbar().encode(
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

# Create a bottom bar plot to show country counts
bottom_bar = alt.Chart(country_df).transform_filter(
    age_selection
).mark_bar().encode(
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    color=alt.Color("country:N", legend=alt.Legend(title="Country", orient="bottom"), scale=country_color_scale),
    y=alt.Y("n:Q", axis=alt.Axis(title="# of patients")),
    tooltip=tooltip + [
        alt.Tooltip("country", title="Country"),
    ]
).properties(width=column_width,height=150)

top_chart = alt.layer(top_bar, top_errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(title=None)
    ),
)

bottom_chart = bottom_bar.facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(title=None, labels=False)
    ),
)



top_and_bottom_chart = alt.vconcat(top_chart, bottom_chart, spacing=10).properties(title={
    "text": ["Percentage Severe by Race and Sex"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

themed_top_and_bottom_chart = apply_theme(top_and_bottom_chart).configure_mark(
    color='orange'
).add_selection(
    age_selection
)

themed_top_and_bottom_chart

In [None]:
# This is a function to create a plot with 3 levels of stratification:
# variable 1: column facet
# variable 2: x-axis nominal field within each column
# variable 3: dropdown selection to filter the data
def create_plot_var1_colfacet_var2_x_var3_dropdown(var1, var2, var3, column_width=100, tick_size=15):
    
    var1["title"] = HR_COLNAME_MAP[var1["colname"]]
    var2["title"] = HR_COLNAME_MAP[var2["colname"]]
    var3["title"] = HR_COLNAME_MAP[var3["colname"]]
    
    chart = alt.Chart(overall_df)

    var3_dropdown = alt.binding_select(options=var3["values"])
    var3_selection = alt.selection_single(fields=[var3["colname"]], bind=var3_dropdown, name=var3["title"], init={var3["colname"]: "All"})

    country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))

    filtered_chart = chart.transform_filter(
        var3_selection
    )

    tooltip = [
        alt.Tooltip("race", title="Race"),
        alt.Tooltip("sex", title="Sex"),
        alt.Tooltip("age_group", title="Age Group"),
        alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
        alt.Tooltip("p_x100", title="Percentage Severe"),
        alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
        alt.Tooltip("num_patients_all", title="Number of Patients"),
        alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
        alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
    ]

    top_bar = filtered_chart.mark_tick(color="black", size=tick_size, thickness=2).encode(
        x=alt.X(f"{var2['colname']}:{var2['dtype']}", axis=alt.Axis(title=None, ticks=False, labels=False)),
        y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
        tooltip=tooltip
    ).properties(width=column_width,height=300)


    top_errorbar = filtered_chart.mark_bar(color="silver", size=tick_size).encode(
        x=alt.X(f"{var2['colname']}:{var2['dtype']}", axis=alt.Axis(title=var2['title'])),
        y=alt.Y("ci_95_upper_x100:Q", title=""), 
        y2=alt.Y2("ci_95_lower_x100:Q", title=""),
        tooltip=tooltip
    ).properties(width=column_width,height=300)

    # Create a bottom bar plot to show country counts
    bottom_bar = alt.Chart(country_df).transform_filter(
        var3_selection
    ).mark_bar().encode(
        x=alt.X(f"{var2['colname']}:{var2['dtype']}", axis=alt.Axis(title=var2['title'])),
        color=alt.Color("country:N", legend=alt.Legend(title="Country", orient="bottom"), scale=country_color_scale),
        y=alt.Y("n:Q", axis=alt.Axis(title="# of patients")),
        tooltip=tooltip + [
            alt.Tooltip("country", title="Country"),
        ]
    ).properties(width=column_width,height=150)

    top_chart = alt.layer(top_errorbar, top_bar).facet(
        column=alt.Column(
            f"{var1['colname']}:{var1['dtype']}",
            sort=var1['values'],
            header=alt.Header(title=None)
        ),
    )

    bottom_chart = bottom_bar.facet(
        column=alt.Column(
            f"{var1['colname']}:{var1['dtype']}",
            sort=var1['values'],
            header=alt.Header(title=None, labels=False)
        ),
    )

    top_and_bottom_chart = alt.vconcat(top_chart, bottom_chart, spacing=10).properties(title={
        "text": [f"Percentage Severe by {var1['title']} and {var2['title']}"], 
        "dx": 50,
        "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
        "subtitleColor": "gray",
        "anchor": "middle",
    })

    themed_top_and_bottom_chart = apply_theme(top_and_bottom_chart).configure_mark(
        color='orange'
    ).add_selection(
        var3_selection
    )

    return themed_top_and_bottom_chart

In [None]:
age_group_plot_var = {
    "colname": "age_group",
    "values": AGE_GROUP_VALUES,
    "dtype": "N"
}

race_plot_var = {
    "colname": "race",
    "values": RACE_VALUES,
    "dtype": "N"
    
}

sex_plot_var = {
    "colname": "sex",
    "values": SEX_VALUES,
    "dtype": "N"
}

In [None]:
create_plot_var1_colfacet_var2_x_var3_dropdown(age_group_plot_var, race_plot_var, sex_plot_var, 80, 10)

In [None]:
create_plot_var1_colfacet_var2_x_var3_dropdown(race_plot_var, age_group_plot_var, sex_plot_var, 150, 10)

In [None]:
create_plot_var1_colfacet_var2_x_var3_dropdown(age_group_plot_var, sex_plot_var, race_plot_var, 85, 12)

In [None]:
create_plot_var1_colfacet_var2_x_var3_dropdown(sex_plot_var, age_group_plot_var, race_plot_var, 175, 12)

In [None]:
create_plot_var1_colfacet_var2_x_var3_dropdown(race_plot_var, sex_plot_var, age_group_plot_var, 105, 20)

In [None]:
create_plot_var1_colfacet_var2_x_var3_dropdown(sex_plot_var, race_plot_var, age_group_plot_var, 175, 25)

# Percentage Severe by Sex and Race, faceted, age group dropdown

In [None]:
column_width = 100
row_height = 100

age_dropdown = alt.binding_select(options=AGE_GROUP_VALUES)
age_selection = alt.selection_single(fields=["age_group"], bind=age_dropdown, name="Age Group", init={"age_group": "All"})

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

chart = alt.Chart(overall_df)

filtered_chart = chart.transform_filter(
    age_selection
)

bar = filtered_chart.mark_point(color="gray", filled=True, size=100).encode(
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=column_width,height=row_height)


errorbar = filtered_chart.mark_errorbar(color="black").encode(
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

plot = alt.layer(bar, errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(labelOrient="bottom", title="Race", titleOrient="bottom")
    ),
    row=alt.Row(
        "sex:N",
        sort=SEX_VALUES,
        header=alt.Header(title="Sex")
    ),
).properties(title={
    "text": ["Percentage Severe by Sex and Race"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    age_selection
)

plot

In [None]:
column_width = 130
row_height = 110

age_color_scale = alt.Scale(domain=AGE_GROUP_VALUES, range=["#000000", "#C7DFEE", "#BAD8EB", "#AAD0E6", "#9BC8E2", "#8CBEDD", "#7FB5D8", "#73ABD3", "#69A0CD", "#5F96C5", "#D3E5F2"]) 

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

chart = alt.Chart(overall_df)

bar = chart.mark_point(color="gray", filled=True, size=100).encode(
    x=alt.X("age_group:O", axis=alt.Axis(title=None), sort=AGE_GROUP_VALUES),
    color=alt.Color("age_group:O", legend=alt.Legend(title="Age Group"), sort=AGE_GROUP_VALUES, scale=age_color_scale),
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=column_width,height=row_height)


errorbar = chart.mark_errorbar(color="black").encode(
    x=alt.X("age_group:O", axis=alt.Axis(title="Age Group"), sort=AGE_GROUP_VALUES),
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

plot = alt.layer(bar, errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(labelOrient="top", title=None, titleOrient="top")
    ),
    row=alt.Row(
        "sex:N",
        sort=SEX_VALUES,
        header=alt.Header(title="Sex")
    ),
).properties(title={
    "text": ["Percentage Severe by Sex, Race, and Age Group"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
column_width = 130
row_height = 110

age_color_scale = alt.Scale(domain=AGE_GROUP_VALUES, range=["#000000", "#C7DFEE", "#BAD8EB", "#AAD0E6", "#9BC8E2", "#8CBEDD", "#7FB5D8", "#73ABD3", "#69A0CD", "#5F96C5", "#D3E5F2"]) 

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

chart = alt.Chart(overall_df)

bar = chart.mark_point(color="gray", filled=True, size=100).encode(
    x=alt.X("age_group:O", axis=alt.Axis(title="Age Group"), sort=AGE_GROUP_VALUES),
    shape=alt.Shape("sex:N", legend=alt.Legend(title="Sex")),
    color=alt.Color("age_group:O", legend=alt.Legend(title="Age Group"), sort=AGE_GROUP_VALUES, scale=age_color_scale),
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=column_width,height=row_height)


errorbar = chart.mark_errorbar(color="black").encode(
    x=alt.X("age_group:O", axis=alt.Axis(title="Age Group"), sort=AGE_GROUP_VALUES),
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

plot = alt.layer(bar, errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(labelOrient="top", title=None, titleOrient="bottom")
    ),
    row=alt.Row(
        "sex:N",
        sort=SEX_VALUES,
        header=alt.Header(title="Sex")
    ),
).properties(title={
    "text": ["Percentage Severe by Sex, Race, and Age Group"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
column_width = 130
row_height = 110

age_color_scale = alt.Scale(domain=AGE_GROUP_VALUES, range=["#000000", "#C7DFEE", "#BAD8EB", "#AAD0E6", "#9BC8E2", "#8CBEDD", "#7FB5D8", "#73ABD3", "#69A0CD", "#5F96C5", "#D3E5F2"]) 

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

chart = alt.Chart(overall_df)

bar = chart.mark_point(color="gray", filled=True, size=100).encode(
    x=alt.X("age_group:O", axis=alt.Axis(title="Age Group"), sort=AGE_GROUP_VALUES),
    shape=alt.Shape("race:N", legend=alt.Legend(title="Race")),
    color=alt.Color("age_group:O", legend=alt.Legend(title="Age Group"), sort=AGE_GROUP_VALUES, scale=age_color_scale),
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=column_width,height=row_height)


errorbar = chart.mark_errorbar(color="black").encode(
    x=alt.X("age_group:O", axis=alt.Axis(title="Age Group"), sort=AGE_GROUP_VALUES),
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

plot = alt.layer(bar, errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(labelOrient="top", title=None, titleOrient="bottom")
    ),
    row=alt.Row(
        "sex:N",
        sort=SEX_VALUES,
        header=alt.Header(title="Sex")
    ),
).properties(title={
    "text": ["Percentage Severe by Sex, Race, and Age Group"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

# Count by severity and age group, bar plot, country dropdown

In [None]:
by_age_group_and_country_df = country_df.loc[(country_df["race"] == "All") & (country_df["sex"] == "All")]
by_age_group_and_country_df.head();

In [None]:
by_age_group_agg_df = by_age_group_and_country_df.groupby(["age_group", "sex", "race"]).sum().reset_index()
by_age_group_agg_df["country"] = "All"
by_age_group_agg_df["color"] = np.nan
by_age_group_agg_df = compute_ci(by_age_group_agg_df)
by_age_group_agg_df;

In [None]:
by_age_group_and_country_df = by_age_group_and_country_df.append(by_age_group_agg_df, ignore_index=True)
by_age_group_and_country_df.tail();

In [None]:
by_age_group_and_country_molten_df = by_age_group_and_country_df.melt(id_vars=set(by_age_group_and_country_df.columns.values.tolist()) - set(["num_patients_all", "num_patients_ever_severe", "num_patients_never_severe"]))
by_age_group_and_country_molten_df = by_age_group_and_country_molten_df.loc[by_age_group_and_country_molten_df["variable"] != "num_patients_all"]
by_age_group_and_country_molten_df = by_age_group_and_country_molten_df.loc[~by_age_group_and_country_molten_df["age_group"].isin(["All"])]
by_age_group_and_country_molten_df["variable"] = by_age_group_and_country_molten_df["variable"].replace(HR_SEVERITY_COLNAME_MAP)
by_age_group_and_country_molten_df;

In [None]:
column_width = 60

country_dropdown = alt.binding_select(options=["All"] + COUNTRY_VALUES)
country_selection = alt.selection_single(fields=["country"], bind=country_dropdown, name="Country", init={"country": "All"})

tooltip = [
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("variable", title="Severity"),
    alt.Tooltip("value", title="Number of Patients"),
]

chart = alt.Chart(by_age_group_and_country_molten_df)

filtered_chart = chart.transform_filter(
    country_selection
)

bar = filtered_chart.mark_bar().encode(
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of Patients")),
    x=alt.X("variable:O", axis=alt.Axis(title="Severity"), sort=["Never Severe", "Ever Severe"]),
    color=alt.X("variable:O", legend=alt.Legend(title="Severity", orient="top-left"), sort=["Never Severe", "Ever Severe"]),
    tooltip=tooltip
).properties(width=column_width,height=300)


plot = bar.facet(
    column=alt.Column(
        "age_group:N",
        sort=AGE_GROUP_VALUES,
        header=alt.Header(labelOrient="bottom", title="Age Group", titleOrient="bottom")
    ),
).properties(title={
    "text": ["Number of Patients by Age Group and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    country_selection
)

plot

In [None]:
column_width = 60

by_age_group_and_country_molten_without_all_df = by_age_group_and_country_molten_df.loc[by_age_group_and_country_molten_df["country"] != "All"]

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("variable", title="Severity"),
    alt.Tooltip("value", title="Number of Patients"),
]

chart = alt.Chart(by_age_group_and_country_molten_without_all_df)

bar = chart.mark_bar().encode(
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of Patients")),
    x=alt.X("variable:O", axis=alt.Axis(title="Severity"), sort=["Never Severe", "Ever Severe"]),
    color=alt.X("country:N", legend=alt.Legend(title="Country", orient="top-left"), sort=COUNTRY_VALUES, scale=country_color_scale),
    tooltip=tooltip
).properties(width=column_width,height=300)


plot = bar.facet(
    column=alt.Column(
        "age_group:N",
        sort=AGE_GROUP_VALUES,
        header=alt.Header(labelOrient="bottom", title="Age Group", titleOrient="bottom")
    ),
).properties(title={
    "text": ["Number of Patients by Age Group, Country, and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
column_width = 60

by_age_group_and_country_molten_without_all_df = by_age_group_and_country_molten_df.loc[by_age_group_and_country_molten_df["country"] != "All"]

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("variable", title="Severity"),
    alt.Tooltip("value", title="Number of Patients"),
]

chart = alt.Chart(by_age_group_and_country_molten_without_all_df)

bar = chart.mark_bar().encode(
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of Patients")),
    x=alt.X("variable:O", axis=alt.Axis(title="Severity"), sort=["Never Severe", "Ever Severe"]),
    opacity=alt.X("variable:N", legend=alt.Legend(title="Severity", orient="top-left"), sort=["Never Severe", "Ever Severe"]),
    color=alt.X("country:N", legend=alt.Legend(title="Country", orient="top-left"), sort=COUNTRY_VALUES, scale=country_color_scale),
    tooltip=tooltip
).properties(width=column_width,height=300)


plot = bar.facet(
    column=alt.Column(
        "age_group:N",
        sort=AGE_GROUP_VALUES,
        header=alt.Header(labelOrient="bottom", title="Age Group", titleOrient="bottom")
    ),
).properties(title={
    "text": ["Number of Patients by Age Group, Country, and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

# Count by severity and race, bar plot, country dropdown

In [None]:
by_race_and_country_df = country_df.loc[(country_df["age_group"] == "All") & (country_df["sex"] == "All")]
by_race_and_country_df.head();

In [None]:
by_race_agg_df = by_race_and_country_df.groupby(["age_group", "sex", "race"]).sum().reset_index()
by_race_agg_df["country"] = "All"
by_race_agg_df["color"] = np.nan
by_race_agg_df = compute_ci(by_race_agg_df)
by_race_agg_df;

In [None]:
by_race_and_country_df = by_race_and_country_df.append(by_race_agg_df, ignore_index=True)
by_race_and_country_df.tail();

In [None]:
by_race_and_country_molten_df = by_race_and_country_df.melt(id_vars=set(by_race_and_country_df.columns.values.tolist()) - set(["num_patients_all", "num_patients_ever_severe", "num_patients_never_severe"]))
by_race_and_country_molten_df = by_race_and_country_molten_df.loc[by_race_and_country_molten_df["variable"] != "num_patients_all"]
by_race_and_country_molten_df = by_race_and_country_molten_df.loc[~by_race_and_country_molten_df["race"].isin(["All"])]
by_race_and_country_molten_df["variable"] = by_race_and_country_molten_df["variable"].replace(HR_SEVERITY_COLNAME_MAP)
by_race_and_country_molten_df;

In [None]:
column_width = 60

country_dropdown = alt.binding_select(options=["All"] + COUNTRY_VALUES)
country_selection = alt.selection_single(fields=["country"], bind=country_dropdown, name="Country", init={"country": "All"})

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("variable", title="Severity"),
    alt.Tooltip("value", title="Number of Patients"),
]

chart = alt.Chart(by_race_and_country_molten_df)

filtered_chart = chart.transform_filter(
    country_selection
)

bar = filtered_chart.mark_bar().encode(
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of Patients")),
    x=alt.X("variable:O", axis=alt.Axis(title="Severity"), sort=["Never Severe", "Ever Severe"]),
    color=alt.X("variable:O", legend=alt.Legend(title="Severity", orient="right"), sort=["Never Severe", "Ever Severe"]),
    tooltip=tooltip
).properties(width=column_width,height=300)


plot = bar.facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(labelOrient="bottom", title="Race", titleOrient="bottom")
    ),
).properties(title={
    "text": ["Number of Patients by Race and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    country_selection
)

plot

# Count by severity and sex, bar plot, country dropdown

In [None]:
by_sex_and_country_df = country_df.loc[(country_df["race"] == "All") & (country_df["age_group"] == "All")]
by_sex_and_country_df.head();

In [None]:
by_sex_agg_df = by_sex_and_country_df.groupby(["age_group", "sex", "race"]).sum().reset_index()
by_sex_agg_df["country"] = "All"
by_sex_agg_df["color"] = np.nan
by_sex_agg_df = compute_ci(by_sex_agg_df)
by_sex_agg_df;

In [None]:
by_sex_and_country_df = by_sex_and_country_df.append(by_sex_agg_df, ignore_index=True)
by_sex_and_country_df.tail();

In [None]:
by_sex_and_country_molten_df = by_sex_and_country_df.melt(id_vars=set(by_sex_and_country_df.columns.values.tolist()) - set(["num_patients_all", "num_patients_ever_severe", "num_patients_never_severe"]))
by_sex_and_country_molten_df = by_sex_and_country_molten_df.loc[by_sex_and_country_molten_df["variable"] != "num_patients_all"]
by_sex_and_country_molten_df = by_sex_and_country_molten_df.loc[~by_sex_and_country_molten_df["sex"].isin(["All"])]
by_sex_and_country_molten_df["variable"] = by_sex_and_country_molten_df["variable"].replace(HR_SEVERITY_COLNAME_MAP)
by_sex_and_country_molten_df;

In [None]:
column_width = 60

country_dropdown = alt.binding_select(options=["All"] + COUNTRY_VALUES)
country_selection = alt.selection_single(fields=["country"], bind=country_dropdown, name="Country", init={"country": "All"})

tooltip = [
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("variable", title="Severity"),
    alt.Tooltip("value", title="Number of Patients"),
]

chart = alt.Chart(by_sex_and_country_molten_df)

filtered_chart = chart.transform_filter(
    country_selection
)

bar = filtered_chart.mark_bar().encode(
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of Patients")),
    x=alt.X("variable:O", axis=alt.Axis(title="Severity"), sort=["Never Severe", "Ever Severe"]),
    color=alt.X("variable:O", legend=alt.Legend(title="Severity", orient="right"), sort=["Never Severe", "Ever Severe"]),
    tooltip=tooltip
).properties(width=column_width,height=300)


plot = bar.facet(
    column=alt.Column(
        "sex:N",
        sort=SEX_VALUES,
        header=alt.Header(labelOrient="bottom", title="Sex", titleOrient="bottom")
    ),
).properties(title={
    "text": ["Number of Patients by Sex and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    country_selection
)

plot