In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_demographics_df,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

In [None]:
DATA_RELEASE = "2020-06-25"

In [None]:
df = read_full_demographics_df()

In [None]:
df

In [None]:
df = df.replace(-99, np.nan)
df = df.replace(-999, np.nan)

In [None]:
# Some race categories may have zero patients after summing across all sites.
zero_patients_by_race_df = (df.groupby("race").sum()["num_patients_all"] == 0).to_frame()
races_with_zero_patients = zero_patients_by_race_df.loc[zero_patients_by_race_df["num_patients_all"] == True].index.values.tolist()
races_with_zero_patients

In [None]:
df = df.loc[~df["race"].isin(races_with_zero_patients)]
df.head()

In [None]:
# Human-readable mappings
HR_SEVERITY_MAP = {
    "all": "All",
    "ever_severe": "Ever Severe",
    "never_severe": "Never Severe"
}
HR_SEX_MAP = {
    "all": "All",
    "male": "Male",
    "female": "Female",
    "other": "Other",
}
HR_AGE_GROUP_MAP = {
    "all": "All",
    "00to02": "0 - 2",
    "03to05": "3 - 5",
    "06to11": "6 - 11",
    "12to17": "12 - 17",
    "18to25": "18 - 25",
    "26to49": "26 - 49",
    "50to69": "50 - 69",
    "70to79": "70 - 79",
    "80plus": "80 +",
    "other": "Other",
}
HR_RACE_MAP = {
    'all': 'All',
    'american_indian': 'American Indian or Alaska Native',
    'asian': 'Asian',
    'black': 'Black',
    'hawaiian_pacific_islander': 'Native Hawaiian or Other Pacific Islander',
    'hispanic_latino': 'Hispanic or Latino',
    'white': 'White',
    'other': 'Other',
}
HR_SEVERITY_COLNAME_MAP = {
    "num_patients_all": "All",
    "num_patients_ever_severe": "Ever Severe",
    "num_patients_never_severe": "Never Severe"
}

SEVERITY_VALUES = list(HR_SEVERITY_MAP.values())
SEX_VALUES = list(HR_SEX_MAP.values())
AGE_GROUP_VALUES = list(HR_AGE_GROUP_MAP.values())
RACE_VALUES = list(HR_RACE_MAP.values())

In [None]:
df["sex"] = df["sex"].replace(HR_SEX_MAP)
df["race"] = df["race"].replace(HR_RACE_MAP)
df["age_group"] = df["age_group"].replace(HR_AGE_GROUP_MAP)

In [None]:
df["num_patients_never_severe"] = df["num_patients_all"] - df["num_patients_ever_severe"]

In [None]:
df.head()

In [None]:
overall_df = df.groupby(["sex", "race", "age_group"]).sum().reset_index()
overall_df.head()

In [None]:
def compute_ci(df):
    df["p"] = df["num_patients_ever_severe"] / df["num_patients_all"]
    df["n"] = df["num_patients_all"]

    def calculate_ci(row, upper=False):
        try:
            return row["p"] + ((1 if upper else -1) * (1.96 * np.sqrt((row["p"]*(1 - row["p"])) / row["n"])))
        except ZeroDivisionError:
            return np.nan

    df["ci_95_lower"] = df.apply(lambda row: calculate_ci(row, upper=False), axis=1)
    df["ci_95_upper"] = df.apply(lambda row: calculate_ci(row, upper=True), axis=1)

    df["np"] = df["n"] * df["p"]
    df["n1minusp"] = df["n"] * (1 - df["p"])

    df["p_x100"] = df["p"] * 100
    df["ci_95_lower_x100"] = df["ci_95_lower"] * 100
    df["ci_95_upper_x100"] = df["ci_95_upper"] * 100
    
    
    # Clip values < 0 and 100 >
    df["p_x100"] = df["p_x100"].clip(lower=0, upper=100)
    df["ci_95_lower_x100"] = df["ci_95_lower_x100"].clip(lower=0, upper=100)
    df["ci_95_upper_x100"] = df["ci_95_upper_x100"].clip(lower=0, upper=100)
    
    
    return df

In [None]:
df = compute_ci(df)
overall_df = compute_ci(overall_df)

In [None]:
overall_df.head()

In [None]:
chart = alt.Chart(overall_df)

age_dropdown = alt.binding_select(options=AGE_GROUP_VALUES)
age_selection = alt.selection_single(fields=["age_group"], bind=age_dropdown, name="Age Group", init={"age_group": "All"})

filtered_chart = chart.transform_filter(
    age_selection
)

tooltip = [
    alt.Tooltip("race", title="Race"),
    alt.Tooltip("sex", title="Sex"),
    alt.Tooltip("age_group", title="Age Group"),
    alt.Tooltip("ci_95_lower_x100", title="Percentage Severe, 95% CI Lower"),
    alt.Tooltip("p_x100", title="Percentage Severe"),
    alt.Tooltip("ci_95_upper_x100", title="Percentage Severe, 95% CI Upper"),
    alt.Tooltip("num_patients_all", title="Number of Patients"),
    alt.Tooltip("num_patients_ever_severe", title="Number of Patients Ever Severe"),
    alt.Tooltip("num_patients_never_severe", title="Number of Patients Never Severe"),
]

bar = filtered_chart.mark_point(filled=True, size=80).encode(
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    color=alt.Color("sex:N", legend=None),
    y=alt.Y("p_x100:Q", axis=alt.Axis(title="% Severe")),
    tooltip=tooltip
).properties(width=150,height=300)


errorbar = filtered_chart.mark_errorbar().encode(
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    y=alt.Y("ci_95_upper_x100:Q", title=""), 
    y2=alt.Y2("ci_95_lower_x100:Q", title=""),
    tooltip=tooltip
)

plot = alt.layer(bar, errorbar).facet(
    column=alt.Column(
        "race:N",
        sort=RACE_VALUES,
        header=alt.Header(title="Race")
    ),
).properties(title={
    "text": ["Demographics"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).configure_mark(
    color='orange'
).add_selection(
    age_selection
)

plot