In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_demographics_df,
    get_visualization_subtitle,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows() # Allow using rows more than 5000

In [None]:
df = read_full_demographics_df()

In [None]:
df

In [None]:
df = df.loc[df["num_patients_all"] >= 0]
df = df.loc[df["num_patients_ever_severe"] >= 0]

In [None]:
# Compute percentage of male / female by age group and severity
age_sex_df = pd.DataFrame(columns=["siteid", "sex", "age_group", "severity", "percentage"])

for (site_id, age_group), site_df in df.loc[df["race"] == "all"].groupby(['siteid', 'age_group']):
    try:
        male_all = site_df.loc[site_df["sex"] == "male"].reset_index().loc[0]["num_patients_all"]
    except:
        male_all = np.nan
    try:
        male_severe = site_df.loc[site_df["sex"] == "male"].reset_index().loc[0]["num_patients_ever_severe"]
    except:
        male_severe = np.nan
    try:
        female_all = site_df.loc[site_df["sex"] == "female"].reset_index().loc[0]["num_patients_all"]
    except:
        female_all = np.nan
    try:
        female_severe = site_df.loc[site_df["sex"] == "female"].reset_index().loc[0]["num_patients_ever_severe"]
    except:
        female_severe = np.nan
        
    total_all = male_all + female_all
    total_severe = male_severe + female_severe
        
    age_sex_df = age_sex_df.append({
        "siteid": site_id,
        "age_group": age_group,
        "sex": "male",
        "percentage": 100 * male_all / total_all,
        "severity": "all"
    }, ignore_index=True)
    age_sex_df = age_sex_df.append({
        "siteid": site_id,
        "age_group": age_group,
        "sex": "female",
        "percentage": 100 * female_all / total_all,
        "severity": "all"
    }, ignore_index=True)

    age_sex_df = age_sex_df.append({
        "siteid": site_id,
        "age_group": age_group,
        "sex": "male",
        "percentage": 100 * male_severe / total_severe,
        "severity": "severe"
    }, ignore_index=True)
    age_sex_df = age_sex_df.append({
        "siteid": site_id,
        "age_group": age_group,
        "sex": "female",
        "percentage": 100 * female_severe / total_severe,
        "severity": "severe"
    }, ignore_index=True)


age_sex_df = age_sex_df.loc[age_sex_df["percentage"] > 0]
age_sex_df = age_sex_df.loc[age_sex_df["age_group"] != "all"]

In [None]:
SITES = age_sex_df["siteid"].unique().tolist()

site_dropdown = alt.binding_select(options=SITES)
site_selection = alt.selection_single(fields=["siteid"], bind=site_dropdown, name="siteid", init={"siteid": "APHP"})

alt.Chart(age_sex_df).add_selection(
    site_selection
).transform_filter(
    site_selection
).mark_bar().encode(
    y="percentage:Q",
    color="sex:N",
    x="sex:N",
    column=alt.Column(
        "age_group:O",
        sort=["0to2","3to5","6to11","12to17","18to25","26to49","50to69","70to79", "80plus"],
        header=alt.Header(labelOrient="bottom", title="Age group", titleOrient="bottom")
    ),
    row=alt.Row("severity:N", sort=["severe", "all"])
)