In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_demographics_df,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

In [None]:
DATA_RELEASE = "2020-06-25"

In [None]:
df = read_full_demographics_df()

In [None]:
df

In [None]:
ALL_SITES = df["siteid"].unique().tolist()
ALL_SEVERITY_VALUES = ["all", "severe"]
ALL_AGE_GROUPS = ["0to2","3to5","6to11","12to17","18to25","26to49","50to69","70to79", "80plus"]
ALL_SEXES = ["male", "female", "other"]

In [None]:
# Compute percentage of male / female by age group and severity
age_sex_df = pd.DataFrame(columns=["siteid", "sex", "age_group", "severity", "percentage", "count"])

for site_id, site_df in df.loc[df["race"] == "all"].groupby(['siteid']):
    for age_group in ALL_AGE_GROUPS:
        match_df = site_df.loc[site_df["age_group"] == age_group].reset_index().copy()
        
        for sex in ALL_SEXES:
            if match_df.loc[match_df["sex"] == sex].shape[0] == 0:
                new_row = {
                    "siteid": site_id,
                    "sex": sex,
                    "age_group": age_group,
                    "race": "all",
                    "num_patients_all": 0,
                    "num_patients_ever_severe": 0
                }
                match_df = match_df.append(new_row, ignore_index=True)
            
        male_row = match_df.loc[match_df["sex"] == "male"].reset_index().loc[0]
        female_row = match_df.loc[match_df["sex"] == "female"].reset_index().loc[0]
        other_row = match_df.loc[match_df["sex"] == "other"].reset_index().loc[0]


        male_all = male_row["num_patients_all"]
        male_all = np.nan if male_all < 0 else male_all

        male_severe = male_row["num_patients_ever_severe"]
        male_severe = np.nan if male_severe < 0 else male_severe
        
        male_never = male_all - male_severe
        male_diff = male_never - male_severe

        female_all = female_row["num_patients_all"]
        female_all = np.nan if female_all < 0 else female_all

        female_severe = female_row["num_patients_ever_severe"]
        female_severe = np.nan if female_severe < 0 else female_severe
        
        female_never = female_all - female_severe
        female_diff = female_never - female_severe

        other_all = other_row["num_patients_all"]
        other_all = 0 if other_all < 0 else other_all

        other_severe = other_row["num_patients_ever_severe"]
        other_severe = 0 if other_severe < 0 else other_severe
        
        other_never = other_all - other_severe
        other_diff = other_never - other_severe


        total_all = male_all + female_all + other_all
        total_severe = male_severe + female_severe + other_severe
        total_never = male_never + female_never + other_never

        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "male",
            "percentage": 100 * male_all / total_all,
            "severity": "all",
            "count": male_all
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "female",
            "percentage": 100 * female_all / total_all,
            "severity": "all",
            "count": female_all
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "other",
            "percentage": 100 * other_all / total_all,
            "severity": "all",
            "count": other_all
        }, ignore_index=True)

        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "male",
            "percentage": 100 * male_severe / total_severe,
            "severity": "ever_severe",
            "count": male_severe
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "female",
            "percentage": 100 * female_severe / total_severe,
            "severity": "ever_severe",
            "count": female_severe
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "other",
            "percentage": 100 * other_severe / total_severe,
            "severity": "ever_severe",
            "count": other_severe
        }, ignore_index=True)
        
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "male",
            "percentage": 100 * male_never / total_never,
            "severity": "never_severe",
            "count": male_never
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "female",
            "percentage": 100 * female_never / total_never,
            "severity": "never_severe",
            "count": female_never
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "other",
            "percentage": 100 * other_never / total_never,
            "severity": "never_severe",
            "count": other_never
        }, ignore_index=True)

age_sex_df = age_sex_df.sort_values(by=['siteid', 'severity', 'age_group', 'sex'])
age_sex_df = age_sex_df.fillna(value=0)
age_sex_df = age_sex_df.loc[age_sex_df["severity"].isin(["ever_severe", "never_severe"])]

In [None]:
SITES = age_sex_df["siteid"].unique().tolist()

site_dropdown = alt.binding_select(options=SITES)
site_selection = alt.selection_single(fields=["siteid"], bind=site_dropdown, name="siteid", init={"siteid": "APHP"})

chart = alt.Chart(age_sex_df).add_selection(
    site_selection
).transform_filter(
    site_selection
).mark_bar().encode(
    y="percentage:Q",
    color="sex:N",
    x="sex:N",
    column=alt.Column(
        "age_group:O",
        sort=["0to2","3to5","6to11","12to17","18to25","26to49","50to69","70to79", "80plus"],
        header=alt.Header(labelOrient="bottom", title="Age group", titleOrient="bottom")
    ),
    row=alt.Row("severity:N", sort=["severe", "all"]),
    tooltip=[
        alt.Tooltip("severity"),
        alt.Tooltip("age_group"),
        alt.Tooltip("sex"),
        alt.Tooltip("percentage")
    ]
).properties(title={
    "text": ["Demographics"], 
    "dx": 50,
    "subtitle": "",
    "subtitleColor": "gray",
    "anchor": "middle",
})
chart

In [None]:
age_sex_df["severity"] = age_sex_df["severity"].replace({
    "ever_severe": "Ever Severe",
    "never_severe": "Never Severe"
})
age_sex_df["sex"] = age_sex_df["sex"].replace({
    "male": "Male",
    "female": "Female",
    "other": "Other"
})
age_sex_df["age_group"] = age_sex_df["age_group"].replace({
    "0to2": "0 - 2",
    "3to5": "3 - 5",
    "6to11": "6 - 11",
    "12to17": "12 - 17",
    "18to25": "18 - 25",
    "26to49": "26 - 49",
    "50to69": "50 - 69",
    "70to79": "70 - 79",
    "80plus": "80 +"
})

In [None]:
plot = alt.Chart(age_sex_df).add_selection(
    site_selection
).transform_filter(
    site_selection
).mark_bar().encode(
    y=alt.Y("count:Q", axis=alt.Axis(title="Count")),
    color=alt.Color("sex:N", legend=alt.Legend(title="Sex")),
    x=alt.X("sex:N", axis=alt.Axis(title="Sex")),
    column=alt.Column(
        "age_group:O",
        sort=["0 - 2","3 - 5","6 - 11","12 - 17","18 - 25","26 - 49","50 - 69","70 - 79", "80 +"],
        header=alt.Header(labelOrient="bottom", title="Age Group", titleOrient="bottom")
    ),
    row=alt.Row(
        "severity:N",
        sort=["Ever Severe", "Never Severe"],
        header=alt.Header(title="Severity")
    ),
    tooltip=[
        alt.Tooltip("severity", title="Severity"),
        alt.Tooltip("age_group", title="Age Group"),
        alt.Tooltip("sex", title="Sex"),
        alt.Tooltip("count", title="Count")
    ]
).properties(title={
    "text": ["Demographics"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
age_sex_df

In [None]:
age_sex_df_ever_severe = age_sex_df.loc[age_sex_df["severity"] == "Ever Severe"].copy()
age_sex_df_ever_severe["opacity"] = 1.0
age_sex_df_never_severe = age_sex_df.loc[age_sex_df["severity"] == "Never Severe"].copy()
age_sex_df_never_severe["opacity"] = 0.5


ever_severe_plot = alt.Chart(age_sex_df_ever_severe).mark_bar().encode(
    y=alt.Y(
        "age_group:O",
        sort=["0 - 2","3 - 5","6 - 11","12 - 17","18 - 25","26 - 49","50 - 69","70 - 79", "80 +"],
        axis=alt.Axis(title="Age Group")
    ),
    x=alt.Y("count:Q", axis=alt.Axis(title="Count")),
    color=alt.Color("sex:N", legend=alt.Legend(title="Sex", orient="top")),
    opacity=alt.Opacity("severity:N", legend=alt.Legend(title="Severity", orient="top"), sort=["Never Severe", "Ever Severe"]),
    column=alt.Column(
        "siteid:N",
        header=alt.Header(labelOrient="bottom", title="Site", titleOrient="bottom")
    ),
    row=alt.Row(
        "sex:N",
        sort=["Male", "Female", "Other"],
        header=alt.Header(title="Sex")
    ),
    tooltip=[
        alt.Tooltip("severity", title="Severity"),
        alt.Tooltip("age_group", title="Age Group"),
        alt.Tooltip("sex", title="Sex"),
        alt.Tooltip("count", title="Count")
    ]
).properties(width=100, height=100)
never_severe_plot = alt.Chart(age_sex_df_never_severe).mark_bar().encode(
    y=alt.Y(
        "age_group:O",
        sort=["0 - 2","3 - 5","6 - 11","12 - 17","18 - 25","26 - 49","50 - 69","70 - 79", "80 +"],
        axis=alt.Axis(title="Age Group")
    ),
    x=alt.Y("count:Q", axis=alt.Axis(title="Count")),
    color=alt.Color("sex:N", legend=alt.Legend(title="Sex", orient="top")),
    opacity=alt.Opacity("severity:N", legend=alt.Legend(title="Severity", orient="top"), sort=["Never Severe", "Ever Severe"]),
    column=alt.Column(
        "siteid:N",
        header=alt.Header(labelOrient="bottom", title="Site", titleOrient="bottom")
    ),
    row=alt.Row(
        "sex:N",
        sort=["Male", "Female", "Other"],
        header=alt.Header(title="Sex")
    ),
    tooltip=[
        alt.Tooltip("severity", title="Severity"),
        alt.Tooltip("age_group", title="Age Group"),
        alt.Tooltip("sex", title="Sex"),
        alt.Tooltip("count", title="Count")
    ]
).properties(width=100, height=100)

plot = alt.vconcat(never_severe_plot, ever_severe_plot).resolve_scale(x="shared", color="shared").properties(title={
    "text": ["Demographics"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})


plot = apply_theme(plot)

plot

In [None]:
SITES = df["siteid"].unique().tolist()

In [None]:
proportion_df = pd.DataFrame(index=SITES, data=[], columns=[])


all_df = df.loc[(df["sex"] == "all") & (df["age_group"] == "all") & (df["race"] == "all")].set_index("siteid")
all_df["proportion_ever_severe"] = all_df["num_patients_ever_severe"] / all_df["num_patients_all"]

male_df = df.loc[(df["sex"] == "male") & (df["age_group"] == "all") & (df["race"] == "all")].set_index("siteid")
male_df["proportion_ever_severe"] = male_df["num_patients_ever_severe"] / male_df["num_patients_all"]

female_df = df.loc[(df["sex"] == "female") & (df["age_group"] == "all") & (df["race"] == "all")].set_index("siteid")
female_df["proportion_ever_severe"] = female_df["num_patients_ever_severe"] / female_df["num_patients_all"]

other_df = df.loc[(df["sex"] == "other") & (df["age_group"] == "all") & (df["race"] == "all")].set_index("siteid")
other_df["proportion_ever_severe"] = other_df["num_patients_ever_severe"] / other_df["num_patients_all"]

proportion_df["proportion_ever_severe_among_all"] = all_df["proportion_ever_severe"]
proportion_df["proportion_ever_severe_among_male"] = male_df["proportion_ever_severe"]
proportion_df["proportion_ever_severe_among_female"] = female_df["proportion_ever_severe"]
proportion_df["proportion_ever_severe_among_other"] = other_df["proportion_ever_severe"]


proportion_df = proportion_df.reset_index()
proportion_df.head()

In [None]:
proportion_molten_df = proportion_df.melt(id_vars=["index"])
proportion_molten_df = proportion_molten_df.rename(columns={"index": "siteid"})
proportion_molten_df.head()

In [None]:
proportion_molten_df = proportion_molten_df.sort_values(by="value", ascending=False)

CATEGORIES = proportion_molten_df["variable"].unique().tolist()

category_dropdown = alt.binding_select(options=CATEGORIES)
category_selection = alt.selection_single(fields=["variable"], bind=category_dropdown, name="Category", init={"variable": "proportion_ever_severe_among_all"})

filtered_plot = alt.Chart(proportion_molten_df)


plot = filtered_plot.mark_bar().encode(
    x=alt.X("siteid:N", axis=alt.Axis(title="Site ID")),
    y=alt.Y("value:Q", axis=alt.Axis(title="Proportion Ever Severe")),
    color=alt.Color("variable:N", legend=None),
    column=alt.Column(
        "variable:N",
        sort=CATEGORIES,
        header=alt.Header(title=None)
    ),
).properties(title={
    "text": ["Proportion Ever Severe by Site, Among Category"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
}, width=400)
plot = apply_theme(plot)
plot

In [None]:
SEX_VALUES = df["sex"].unique().tolist()
RACE_VALUES = df["race"].unique().tolist()

In [None]:
df = df.replace(-99, np.nan)
df = df.replace(-999, np.nan)

In [None]:
race_sex_df = df.loc[df["age_group"] == "all"]
race_sex_df = race_sex_df.groupby(["sex", "race"]).sum().reset_index()
race_sex_df["num_patients_never_severe"] = race_sex_df["num_patients_all"] - race_sex_df["num_patients_ever_severe"]
race_sex_molten_df = race_sex_df.melt(id_vars=["sex", "race"])
race_sex_molten_df["variable"] = race_sex_molten_df["variable"].replace({
    "num_patients_all": "All",
    "num_patients_ever_severe": "Ever",
    "num_patients_never_severe": "Never"
})
race_sex_molten_df = race_sex_molten_df.loc[race_sex_molten_df["variable"] != "All"]
race_sex_molten_df.head()

In [None]:
plot = alt.Chart(race_sex_molten_df).mark_bar().encode(
    y=alt.Y("value:Q", axis=alt.Axis(title="Count")),
    color=alt.Color("variable:N", legend=alt.Legend(title="Severity")),
    x=alt.X("variable:N", axis=None),
    column=alt.Column(
        "race:N",
        sort=["all"] + list(set(RACE_VALUES) - set(["all"])),
        header=alt.Header(labelOrient="bottom", title="Race", titleOrient="bottom")
    ),
    row=alt.Row(
        "sex:N",
        sort=["all", "male", "female", "other"],
        header=alt.Header(title="Sex")
    ),
    tooltip=[
        alt.Tooltip("variable", title="Severity"),
        alt.Tooltip("race", title="Age Group"),
        alt.Tooltip("sex", title="Sex"),
        alt.Tooltip("value", title="Count")
    ]
).properties(title={
    "text": ["Demographics"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
}, width=100, height=100)

plot = apply_theme(plot)

plot