In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_demographics_df,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

In [None]:
df = read_full_demographics_df()

In [None]:
df

In [None]:
ALL_SITES = df["siteid"].unique().tolist()
ALL_SEVERITY_VALUES = ["all", "severe"]
ALL_AGE_GROUPS = ["0to2","3to5","6to11","12to17","18to25","26to49","50to69","70to79", "80plus"]
ALL_SEXES = ["male", "female", "other"]

In [None]:
# Compute percentage of male / female by age group and severity
age_sex_df = pd.DataFrame(columns=["siteid", "sex", "age_group", "severity", "percentage"])

for site_id, site_df in df.loc[df["race"] == "all"].groupby(['siteid']):
    for age_group in ALL_AGE_GROUPS:
        match_df = site_df.loc[site_df["age_group"] == age_group].reset_index().copy()
        
        for sex in ALL_SEXES:
            if match_df.loc[match_df["sex"] == sex].shape[0] == 0:
                new_row = {
                    "siteid": site_id,
                    "sex": sex,
                    "age_group": age_group,
                    "race": "all",
                    "num_patients_all": 0,
                    "num_patients_ever_severe": 0
                }
                match_df = match_df.append(new_row, ignore_index=True)
            
        male_row = match_df.loc[match_df["sex"] == "male"].reset_index().loc[0]
        female_row = match_df.loc[match_df["sex"] == "female"].reset_index().loc[0]
        other_row = match_df.loc[match_df["sex"] == "other"].reset_index().loc[0]


        male_all = male_row["num_patients_all"]
        male_all = np.nan if male_all < 0 else male_all

        male_severe = male_row["num_patients_ever_severe"]
        male_severe = np.nan if male_severe < 0 else male_severe

        female_all = female_row["num_patients_all"]
        female_all = np.nan if female_all < 0 else female_all

        female_severe = female_row["num_patients_ever_severe"]
        female_severe = np.nan if female_severe < 0 else female_severe

        other_all = other_row["num_patients_all"]
        other_all = 0 if other_all < 0 else other_all

        other_severe = other_row["num_patients_ever_severe"]
        other_severe = 0 if other_severe < 0 else other_severe


        total_all = male_all + female_all + other_all
        total_severe = male_severe + female_severe + other_severe

        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "male",
            "percentage": 100 * male_all / total_all,
            "severity": "all"
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "female",
            "percentage": 100 * female_all / total_all,
            "severity": "all"
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "other",
            "percentage": 100 * other_all / total_all,
            "severity": "all"
        }, ignore_index=True)

        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "male",
            "percentage": 100 * male_severe / total_severe,
            "severity": "severe"
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "female",
            "percentage": 100 * female_severe / total_severe,
            "severity": "severe"
        }, ignore_index=True)
        age_sex_df = age_sex_df.append({
            "siteid": site_id,
            "age_group": age_group,
            "sex": "other",
            "percentage": 100 * other_severe / total_severe,
            "severity": "severe"
        }, ignore_index=True)

age_sex_df = age_sex_df.sort_values(by=['siteid', 'severity', 'age_group', 'sex'])
age_sex_df = age_sex_df.fillna(value=0)

In [None]:
SITES = age_sex_df["siteid"].unique().tolist()

site_dropdown = alt.binding_select(options=SITES)
site_selection = alt.selection_single(fields=["siteid"], bind=site_dropdown, name="siteid", init={"siteid": "APHP"})

alt.Chart(age_sex_df).add_selection(
    site_selection
).transform_filter(
    site_selection
).mark_bar().encode(
    y="percentage:Q",
    color="sex:N",
    x="sex:N",
    column=alt.Column(
        "age_group:O",
        sort=["0to2","3to5","6to11","12to17","18to25","26to49","50to69","70to79", "80plus"],
        header=alt.Header(labelOrient="bottom", title="Age group", titleOrient="bottom")
    ),
    row=alt.Row("severity:N", sort=["severe", "all"]),
    tooltip=[
        alt.Tooltip("severity"),
        alt.Tooltip("age_group"),
        alt.Tooltip("sex"),
        alt.Tooltip("percentage")
    ]
)