In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
# data_release='2021-04-27'

consistent_date = {
    '2020-Mar-Apr': "'20 Jan - '20 Apr",
    '2020-May-Jun': "'20 May - '20 Jun",
    '2020-Jul-Aug': "'20 Jul - '20 Aug",
    '2020-Sep-Oct': "'20 Sep - '20 Oct",
    '2020-Nov-2021-Jan': "'20 Nov - '21 Jan"
}

date = ['2020-Mar-Apr', '2020-May-Jun', '2020-Jul-Aug', '2020-Sep-Oct', '2020-Nov-2021-Jan']
new_date = ['Jan ~ Apr', 'May ~ Jun', 'Jul ~ Aug', 'Sep ~ Oct', 'Nov ~ ']
new_date = ["'20 Jan - '20 Apr", "'20 May - '20 Jun", "'20 Jul - '20 Aug", "'20 Sep - '20 Oct", "'20 Nov - '21 Jan"]

colors = ['#E79F00', '#0072B2', '#D45E00', '#CB7AA7', '#029F73', '#57B4E9']

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'UKFR', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']


# Demographics

In [None]:
def FUNC_DEMOGRAPHICS_BY_WAVE_WITH_LINES(_data):
    d = _data.copy()
    
    consistent_date = {
        '2020-03': "'20 Jan - '20 Apr",
        '2020-05': "'20 May - '20 Jun",
        '2020-07': "'20 Jul - '20 Aug",
        '2020-09': "'20 Sep - '20 Oct",
        '2020-11': "'20 Nov - '21 Jan"
    }
    d.date = d.date.apply(lambda x: consistent_date[x])
#     print(d)



    
    """
    CATEGORIES WE USE
    """
    AGE_GROUPS = ['18-25', '26-49', '50-69', '70-79', '80+']
    SEX_GROUPS = ['Female', 'Male']
    RACE_GROUPS = ['White', 'Black', 'Asian', 'Hispanic and Other']
    COUNTRY_COLORS = ['#0072B2', '#E79F00', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    /////////////////////
    SUB-CHARTS FOR GROUPS
    /////////////////////
    """
    
    """
    COMMON VISUAL PARAMETERS
    """
    width = 200
    height = 200
    titleX = -60
    padding = 0.3
    
    """
    AGE GROUPS
    """
    ad = d[d.group.isin(AGE_GROUPS)]
    
    ############## Bar Chart for % of Participants ##############
    age_p_line = alt.Chart(
        ad
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%'), title="Percentage of Patients"), # titleX=titleX
        color=alt.Color("group:N", title=None, scale=alt.Scale(), legend=None) # range=COUNTRY_COLORS
    ).properties(
        title={
            "text": f"Demographics By Age",
            "anchor": "middle",
            "fontSize": 18
        },
        width=width, height=height
    )
#     .facet(
#         spacing=2,
#         column=alt.Column(
#             "group:N",
#             header=alt.Header(labelOrient="bottom", title='Age', titleOrient="bottom")
#         )
#     )

    age_text = age_p_line.mark_text(
        size=16,align='left', dx=25
    ).encode(
        x=alt.X('date:N', sort=new_date), text=alt.Text("group:N"),
    ).transform_filter(
        {"field": "date", "oneOf": ["'20 Nov - '21 Jan"]}
    )
    
    age_p_line = (age_p_line + age_text)

    """
    SEX GROUPS
    """    
    sd = d[d.group.isin(SEX_GROUPS)]
    
    sex_p_line = alt.Chart(
        sd
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%'), title="Percentage of Patients"),
        color=alt.Color("group:N", title=None, scale=alt.Scale(range=['#DC3A11', '#3366CC']), legend=None)
    ).properties(
        title={
            "text": f"Demographics By Sex",
            "anchor": "middle",
            "fontSize": 18
        },
        width=width, height=height
    )
#     .facet(
#         spacing=2,
#         column=alt.Column(
#             "group:N",
#             header=alt.Header(labelOrient="bottom", title='Sex', titleOrient="bottom")
#         )
#     ) 

    sex_text = sex_p_line.mark_text(
        size=16,align='left', dx=25
    ).encode(
        x=alt.X('date:N', sort=new_date), text=alt.Text("group:N"),
    ).transform_filter(
        {"field": "date", "oneOf": ["'20 Nov - '21 Jan"]}
    )
    
    sex_p_line = (sex_p_line + sex_text)
    
    """
    RACE GROUPS
    """    
    rd = d[d.group.isin(RACE_GROUPS)]
    
    race_p_line = alt.Chart(
        rd
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%'), title="Percentage of Patients"),
        color=alt.Color("group:N", title=None, legend=None)#, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        title={
            "text": f"Demographics By Race",
            "anchor": "middle",
            "fontSize": 18
        },
        width=width, height=height
    )
#     .facet(
#         spacing=2,
#         column=alt.Column(
#             "group:N",
#             header=alt.Header(labelOrient="bottom", title='Race', titleOrient="bottom")
#         )
#     ) 

    race_text = race_p_line.mark_text(
        size=16,align='left', dx=25
    ).encode(
        x=alt.X('date:N', sort=new_date), text=alt.Text("group:N"),
    ).transform_filter(
        {"field": "date", "oneOf": ["'20 Nov - '21 Jan"]}
    )
    
    race_p_line = (race_p_line + race_text)
    
    """
    ////////////////////////////////////
    ASSEMBLE
    ////////////////////////////////////
    """
    final_chart = (
        # alt.hconcat((age_p_line & age_n_line), (sex_p_line & sex_n_line), spacing=40).properties(
#         alt.hconcat(age_p_line, sex_p_line, spacing=40).resolve_scale(y='shared').properties(
        alt.hconcat(age_p_line, sex_p_line, race_p_line, spacing=40).resolve_scale(y='shared', color='independent').properties(
            title={
                "text": f"Demographics",
                "dx": 80,
                "anchor": "middle",
                "fontSize": 18,
#                 "subtitle": get_visualization_subtitle(data_release='2021-02-15', with_num_sites=False), 
                "subtitleColor": "gray",
            }
        )
    )

    return (age_p_line, sex_p_line, race_p_line) # final_chart






In [None]:
def plot_outcome(country=None):
    width = 200
    height = 200
    size = 28
    point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)


    ddf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare","table.deceasedRate.toShare.csv"))
    sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.samplesize.toShare.csv"))
    cdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.cls.toShare.csv"))
    adf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.age.noVA.toShare.csv"))
    gdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.sex.noVA.toShare.csv"))
    
    ddf = ddf.drop(columns=["Unnamed: 0"])
    sdf = sdf.drop(columns=["Unnamed: 0"])
    cdf = cdf.drop(columns=["Unnamed: 0"])

    # More readable values
    ddf.siteid = ddf.siteid.apply(lambda x: x.upper())
    sdf.siteid = sdf.siteid.apply(lambda x: x.upper())
    cdf.siteid = cdf.siteid.apply(lambda x: x.upper())

    ddf = pd.melt(ddf, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
    sdf['date'] = sdf.month
    cdf['date'] = cdf.calendar_month

    ddf.date = ddf.date.apply(lambda x: consistent_date[x])
    sdf.date = sdf.date.apply(lambda x: consistent_date[x])
    cdf.date = cdf.date.apply(lambda x: consistent_date[x])

    # Add a reference (META)
    ddf['reference'] = ddf.date.apply(lambda x: ddf[(ddf.date == x) & (ddf.siteid == country)].value.sum())

    unique_day=ddf.date.unique().tolist()
    ddf=ddf[ddf.siteid == country]
    sdf=sdf[sdf.siteid == country]
    cdf=cdf[cdf.siteid == country]
    adf=adf[adf.siteid == country]
    gdf=gdf[gdf.siteid == country]
    
    c =['Death Rate']

    s = sdf.copy()

    sample = alt.Chart(
            s
        ).transform_filter(
            {'field': 'siteid', 'oneOf': [country]}
        ).mark_bar(
            size=size
        ).encode(
            x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
            y=alt.Y("N:Q", title=None, scale=alt.Scale(clamp=True), axis=alt.Axis(titleX=-50)),
            color=alt.value(colors[3])
        ).properties(
            title={
                "text": 'Number of Admitted Patients',
                "anchor": 'middle',
                "fontSize": 18,
            },
            width=width,
            height=height
        )
    d = ddf.copy()
    header = alt.Header(title=None)
    x = alt.Axis(ticks=False, labels=False, domain=False, title=None)

    outcome = alt.Chart(
            d
        ).transform_filter(
            {'field': 'siteid', 'oneOf': [country]}
        ).mark_line(
            point=point,
            size=2
        ).encode(
            x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
            y=alt.Y("value:Q", title=None, axis=alt.Axis(format=".0%")),
            color=alt.value(colors[2] if c == 'Death Rate' else colors[4])
        ).properties(
            title={
                "text": 'Mortality Rate',
                "anchor": 'middle',
                "fontSize": 18,
                "dy": -18
            },
            width=width,
            height=height
        )

    
    # Charlson Score
    c = cdf.copy()

    charlson = alt.Chart(
            c
        ).transform_filter(
            {'field': 'siteid', 'oneOf': [country]}
        ).mark_bar(
            size=size
        ).encode(
            x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
            y=alt.Y("charlson_score:Q", title=None, scale=alt.Scale(clamp=True), axis=alt.Axis(titleX=-50)),
            color=alt.value(colors[5])
        ).properties(
            title={
                "text": 'Charlson Comorbidity Index',
                "anchor": 'middle',
                "fontSize": 18
            },
            width=width,
            height=height
        )
    return(sample, outcome, charlson)


In [None]:
def plot_outcome_demographic(country=None):
    
    adf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.age.noVA.toShare.csv"))
    gdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.sex.noVA.toShare.csv"))
    
    adf.siteid = adf.siteid.apply(lambda x: x.upper())
    gdf.siteid = gdf.siteid.apply(lambda x: x.upper())


    adf=adf[adf.siteid == country]
    gdf=gdf[gdf.siteid == country]
    


    # age
    df = adf.rename(columns={
        'group': 'group',
        'age_group_new': 'group',
        'calendar_date': 'date',
        'Freq': 'value',
    })
    #sex
  

    gdf = gdf.rename(columns={
        'group': 'group',
        'age_group_new': 'group',
        'calendar_date': 'date',
        'Freq': 'value',
    })

    gdf['group'] = 'male'

    fgdf = gdf.copy()
    fgdf.group = 'female'
    fgdf.value = fgdf.value.apply(lambda x: 1-x)
    gdf = gdf.append(fgdf)

    df = df.append(gdf)

    #df.siteid = df.siteid.apply(lambda x: x.replace('meta-', ''))

    df = df.rename(columns={
        'group': 'group',
        'age_group_new': 'group',
        'calendar_date': 'date',
        'race_new': 'group',
        'Freq': 'value',
    })

    df.group = df.group.apply(
        lambda x: {
            '00to25': '0-25',
            '18to25': '18-25',
            '26to49': '26-49',
            '50to69': '50-69',
            '70to79': '70-79',
            '80plus': '80+',
            'female': 'Female',
            'male': 'Male',
            'white': 'White',
            'black': 'Black',
            'Black': 'Black',
            'Asian': 'Asian',
            'Hispanic and Other': 'Hispanic and Other',
            'White': 'White',
            'other': 'Other',
            'other_age': 'Other',
            'other_sex': 'Other',
            'other_race': 'Other'
        }[x]
    )
    (age, sex, race) = FUNC_DEMOGRAPHICS_BY_WAVE_WITH_LINES(df)
    (sample, outcome, charlson)=plot_outcome(country=country)
    plot=alt.hconcat(
      alt.hconcat(alt.hconcat(sample, age, spacing=102), sex, spacing=40).resolve_scale(color='independent', y='independent'), 
      alt.hconcat(alt.hconcat(charlson), outcome, spacing=100).resolve_scale(color='independent', y='independent'),
      spacing=40
     )
    plot = plot.properties(
        title={
            "text": [
                country.replace("META-","")
            ],
            "dx": 120,
            #"subtitle": [
            #    'Lab values are standarized by SD'
            #],
            #"subtitleColor": "gray",
        }
    )

    return plot

countrylist=["META-USA", "META-FRANCE", "META-GERMANY", "META-ITALY", "META-SPAIN"]
plot = alt.vconcat(*(
    plot_outcome_demographic(country=country) for country in countrylist
), spacing=30).resolve_scale(color='independent')

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

plot
save(plot,join("..", "result", "final-outcome-demographic-country.png"), scalefactor=8.0)

