In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
data_release='2021-04-27'

consistent_date = {
    '2020-Mar-Apr': "'20 Mar - '20 Apr",
    '2020-May-Jun': "'20 May - '20 Jun",
    '2020-Jul-Aug': "'20 Jul - '20 Aug",
    '2020-Sep-Oct': "'20 Sep - '20 Oct",
    '2020-Nov-2021-Jan': "'20 Nov - '21 Jan"
}

date = ['2020-Mar-Apr', '2020-May-Jun', '2020-Jul-Aug', '2020-Sep-Oct', '2020-Nov-2021-Jan']
new_date = ['Mar ~ Apr', 'May ~ Jun', 'Jul ~ Aug', 'Sep ~ Oct', 'Nov ~ ']
new_date = ["'20 Mar - '20 Apr", "'20 May - '20 Jun", "'20 Jul - '20 Aug", "'20 Sep - '20 Oct", "'20 Nov - '21 Jan"]

colors = ['#E79F00', '#0072B2', '#D45E00', '#CB7AA7', '#029F73', '#57B4E9']

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'UKFR', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.deceasedRate.toShare.csv"))
df2 = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.severeRate.toShare.csv"))
sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.summary.toShare.csv"))
cs = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.cls.toShare.csv"))
st = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.stay.toShare.csv"))
print(st.head())

# Rename columns
df = df.drop(columns=["Unnamed: 0"])
df2 = df2.drop(columns=["Unnamed: 0"])
sdf = sdf.drop(columns=["Unnamed: 0"])
cs = cs.drop(columns=["Unnamed: 0"])

# More readable values
df.siteid = df.siteid.apply(lambda x: x.upper())
df2.siteid = df2.siteid.apply(lambda x: x.upper())
sdf.siteid = sdf.siteid.apply(lambda x: x.upper())
cs.siteid = cs.siteid.apply(lambda x: x.upper())

print(df.siteid.unique().tolist())
print(df2.siteid.unique().tolist())

df = pd.melt(df, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
df2 = pd.melt(df2, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
sdf['date'] = sdf.month
cs['date'] = cs.calendar_month

print(df.date.unique().tolist())
df.date = df.date.apply(lambda x: consistent_date[x])
df2.date = df2.date.apply(lambda x: consistent_date[x])
sdf.date = sdf.date.apply(lambda x: consistent_date[x])
cs.date = cs.date.apply(lambda x: consistent_date[x])

# Add a reference (META)
df['reference'] = df.date.apply(lambda x: df[(df.date == x) & (df.siteid == 'META')].value.sum())
df2['reference'] = df2.date.apply(lambda x: df2[(df2.date == x) & (df2.siteid == 'META')].value.sum())

# print(sdf)
# print(df.head())
# print(df2.head())
cs.head()

# All Sites

In [None]:
width=80
height=100
height_Bottom=60
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)
size=13

for c in ['Death Rate', 'Severity Rate']:
        
    if c == 'Severity Rate':
        d = df2.copy()
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None)
    else:
        d = df.copy()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None)
    
    plot = alt.Chart(
        d
    ).mark_line(
        point=point,
        size=2
    ).encode(
        x=alt.X("date:N", title=None, axis=x, sort=new_date),
        y=alt.Y("value:Q", title=c, axis=alt.Axis(format=".0%", titleX=-50)),
        color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors), legend=None)
    ).properties(
        width=width,
        height=height
    )

    ref = plot.encode(
        x=alt.X("date:N", title=None, axis=x, sort=new_date),
        y=alt.Y("reference:Q", title=c, axis=alt.Axis(format=".0%", titleX=-50)),
        color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=['#D4D4D4']), legend=None)
    )

    plot = alt.layer(ref, plot).resolve_scale(color='independent').facet(
        column=alt.Column("siteid:N", header=header, sort=sites)
    )
    
    if c == 'Severity Rate':
        plot = alt.vconcat(res, plot).resolve_scale(y='independent')

    res = plot


# Charlson Score
c = cs.copy()
# c = c[(c.siteid != 'APHP') & (c.siteid != 'FRBDX')]

charlson = alt.Chart(
        c
    ).mark_bar(
        size=size
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(ticks=False, labels=False, domain=False, title=None), sort=new_date),
        y=alt.Y("charlson_score:Q", title='Charlson', scale=alt.Scale(clamp=True), axis=alt.Axis(titleX=-50)),
        color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors), legend=None)
    ).properties(
        width=width,
        height=height_Bottom
    ).facet(
        column=alt.Column("siteid:N", header=alt.Header(title=None, labels=False), sort=sites)
    )

# Sample Size
s = sdf.copy()

# s = s[(s.siteid != 'APHP') & (s.siteid != 'FRBDX')]

ms = s.groupby('date').sum().reset_index()
ms['siteid'] = 'META'

s = ms.append(s)

sample = alt.Chart(
        s
    ).mark_bar(
        size=size
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y("N:Q", title='# Patients', scale=alt.Scale(clamp=True), axis=alt.Axis(titleX=-50)),
        color=alt.Color("siteid:N", scale=alt.Scale(domain=sites, range=site_colors), legend=None)
    ).properties(
        width=width,
        height=height_Bottom
    ).facet(
        column=alt.Column("siteid:N", header=alt.Header(title=None, labels=False), sort=sites)
    )
plot = alt.vconcat(plot, charlson).resolve_scale(y='independent')

plot = alt.vconcat(plot, sample).resolve_scale(y='independent')
    
plot = plot.properties(
    title={
        "text": [
            f"Event Summary"
        ],
        "dx": 60,
        "subtitle": [
            'Gray Trends Represent Meta-Analysis Results',
            get_visualization_subtitle(data_release=data_release, with_num_sites=False)
        ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=70
)

plot

# Only "META"

In [None]:
width = 200
height = 200
size = 30
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)

for c in ['Death Rate', 'Severity Rate']:
        
    if c == 'Severity Rate':
        d = df2.copy()
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(labelAngle=-55, tickCount=5)
    else:
        d = df.copy()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None)

    plot = alt.Chart(
        d
    ).transform_filter(
        {'field': 'siteid', 'oneOf': ['META']}
    ).mark_line(
        point=point,
        size=2
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y("value:Q", title=None, axis=alt.Axis(format=".0%")),
        color=alt.value(colors[2] if c == 'Death Rate' else colors[4])
    ).properties(
        title={
            "text": c,
            "anchor": 'middle'
        },
        width=width,
        height=height
    )
    
    if c == 'Severity Rate':
        plot = alt.hconcat(res, plot, spacing=30).resolve_scale(y='independent')

    res = plot

    
# Charlson Score
c = cs.copy()
c = c[(c.siteid != 'APHP') & (c.siteid != 'FRBDX')]

charlson = alt.Chart(
        c
    ).transform_filter(
        {'field': 'siteid', 'oneOf': ['META']}
    ).mark_bar(
        size=size
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y("charlson_score:Q", title=None, scale=alt.Scale(clamp=True), axis=alt.Axis(titleX=-50)),
        color=alt.value(colors[5])
    ).properties(
        title={
            "text": 'Charlson Score',
            "anchor": 'middle'
        },
        width=width,
        height=height
    )

# Sample Size
s = sdf.copy()

s = s[(s.siteid != 'APHP') & (s.siteid != 'FRBDX')]

ms = s.groupby('date').sum().reset_index()
ms['siteid'] = 'META'

s = ms.append(s)

sample = alt.Chart(
        s
    ).transform_filter(
        {'field': 'siteid', 'oneOf': ['META']}
    ).mark_bar(
        size=size
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y("N:Q", title=None, scale=alt.Scale(clamp=True), axis=alt.Axis(titleX=-50)),
        color=alt.value(colors[3])
    ).properties(
        title={
            "text": 'Sample Size',
            "anchor": 'middle'
        },
        width=width,
        height=height
    )

plot = alt.hconcat(plot, charlson, spacing=30).resolve_scale(y='independent')

plot = alt.hconcat(plot, sample, spacing=30).resolve_scale(y='independent')

plot = plot.properties(
    title={
        "text": [
            f"Meta Analysis Of Event Summary"
        ],
        "dx": 30,
        "subtitle": [
            get_visualization_subtitle(data_release=data_release, with_num_sites=False)
        ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

plot

# New Figure Four Sub-Figures Combined w/o CI

In [None]:
# data_release='2021-04-27'

consistent_date = {
    '2020-Mar-Apr': "'20 Mar - '20 Apr",
    '2020-May-Jun': "'20 May - '20 Jun",
    '2020-Jul-Aug': "'20 Jul - '20 Aug",
    '2020-Sep-Oct': "'20 Sep - '20 Oct",
    '2020-Nov-2021-Jan': "'20 Nov - '21 Jan"
}

date = ['2020-Mar-Apr', '2020-May-Jun', '2020-Jul-Aug', '2020-Sep-Oct', '2020-Nov-2021-Jan']
new_date = ['Mar ~ Apr', 'May ~ Jun', 'Jul ~ Aug', 'Sep ~ Oct', 'Nov ~ ']
new_date = ["'20 Mar - '20 Apr", "'20 May - '20 Jun", "'20 Jul - '20 Aug", "'20 Sep - '20 Oct", "'20 Nov - '21 Jan"]

colors = ['#E79F00', '#0072B2', '#D45E00', '#CB7AA7', '#029F73', '#57B4E9']

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'UKFR', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.deceasedRate.toShare.csv"))
df2 = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.severeRate.toShare.csv"))
sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.summary.toShare.csv"))
cs = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.cls.toShare.csv"))
st = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.stay.toShare.csv"))
print(st.head())

# Rename columns
df = df.drop(columns=["Unnamed: 0"])
df2 = df2.drop(columns=["Unnamed: 0"])
sdf = sdf.drop(columns=["Unnamed: 0"])
cs = cs.drop(columns=["Unnamed: 0"])

# More readable values
df.siteid = df.siteid.apply(lambda x: x.upper())
df2.siteid = df2.siteid.apply(lambda x: x.upper())
sdf.siteid = sdf.siteid.apply(lambda x: x.upper())
cs.siteid = cs.siteid.apply(lambda x: x.upper())

print(df.siteid.unique().tolist())
print(df2.siteid.unique().tolist())

df = pd.melt(df, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
df2 = pd.melt(df2, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
sdf['date'] = sdf.month
cs['date'] = cs.calendar_month

print(df.date.unique().tolist())
df.date = df.date.apply(lambda x: consistent_date[x])
df2.date = df2.date.apply(lambda x: consistent_date[x])
sdf.date = sdf.date.apply(lambda x: consistent_date[x])
cs.date = cs.date.apply(lambda x: consistent_date[x])

# Add a reference (META)
df['reference'] = df.date.apply(lambda x: df[(df.date == x) & (df.siteid == 'META')].value.sum())
df2['reference'] = df2.date.apply(lambda x: df2[(df2.date == x) & (df2.siteid == 'META')].value.sum())

# print(sdf)
# print(df.head())
# print(df2.head())
cs.head()

width = 180
height = 200
size = 30
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)

for c in ['Death Rate']:
        
    if c == 'Severity Rate':
        d = df2.copy()
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(labelAngle=-55, tickCount=5)
    else:
        d = df.copy()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None)

    plot = alt.Chart(
        d
    ).transform_filter(
        {'field': 'siteid', 'oneOf': ['META']}
    ).mark_line(
        point=point,
        size=2
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(domain=[0, 0.3])), # axis=alt.Axis(format=".0%"), 
        color=alt.value('black') # colors[2] if c == 'Death Rate' else colors[4])
    ).properties(
#         title={
#             "text": c,
#             "anchor": 'middle'
#         },
        width=width,
        height=height
    )
    
    if c == 'Severity Rate':
        plot = alt.hconcat(res, plot, spacing=30).resolve_scale(y='independent')

    res = plot

plot = plot.properties(
    title={
        "text": [
            f"Overall Mortality Rate"
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

data_release='2021-04-29'

consistent_date = {
    '2020-Mar-Apr': "'20 Mar - '20 Apr",
    '2020-May-Jun': "'20 May - '20 Jun",
    '2020-Jul-Aug': "'20 Jul - '20 Aug",
    '2020-Sep-Oct': "'20 Sep - '20 Oct",
    '2020-Nov-2021-Jan': "'20 Nov - '21 Jan"
}

date = ['Mar - Apr', 'May - Jun', 'Jul - Aug', 'Sep - Oct', 'Since Nov']
date = ["'20 Mar - '20 Apr", "'20 May - '20 Jun", "'20 Jul - '20 Aug", "'20 Sep - '20 Oct", "'20 Nov - '21 Jan"]

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#D45E00', '#0072B2', '#CB7AA7', '#E79F00', '#029F73', '#DBD03C', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9']

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.score.toShare.csv"))
print(df.head())

# Rename columns
df = df.drop(columns=["Unnamed: 0"])
df = df.rename(columns={
    'siteid': 'site',
    'calendar_month': 'month'
})

# More readable values
df.site = df.site.apply(lambda x: x.upper())

print(df.site.unique().tolist())
print(df.month.unique().tolist())

# Drop "combine" sites
df = df[df.site != "COMBINE"]

# df = pd.melt(df, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')

df.month = df.month.apply(lambda x: consistent_date[x])

# Add a reference (META)
# df['reference'] = df.date.apply(lambda x: df[(df.date == x) & (df.siteid == 'META')].value.sum())

df.head()

d = df.copy()

d = d[d.site == 'META']

width = 180
height = 200
size = 30
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)


"""
DATA PREPROCESSING...
"""
d.loc[d.site == 'combine', 'site'] = 'All Sites'
d.cat = d.cat.apply(lambda x: {
    'L':'Low', 
    'M': 'Medium', 
    'H': 'High',
    'H/M': 'H/M',
    'L/M': 'L/M'
}[x])



"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%% TOP %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='pos'

y_title = '% of Patients By Risk Level'
y_scale = alt.Scale(domain=[0, 1])
colors = ['#7BADD1', '#427BB5', '#14366E']
# colorDomain = ['Low Risk', 'Medium Risk', 'High Risk']
colorDomain = ['Low', 'Medium', 'High']
# colorDomain = ['L', 'M', 'H']

bar = alt.Chart(
    d
).transform_calculate(
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    y=alt.Y("value:Q", title=None, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk Level', scale=alt.Scale(domain=colorDomain, range=colors), legend=alt.Legend(orient='bottom')),
    order="order:O"
).properties(
    width=width,
    height=height
)

bar = bar.mark_bar(
    size=size, stroke='black'
)


d['visibility'] = d['value'] > 0.08

text = alt.Chart(
    d
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).mark_text(size=14, dx=0, dy=5, color='white', baseline='top', fontWeight=500).encode(
    x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    y=alt.Y('value:Q', stack='zero'),
    detail='cat:N',
    text=alt.Text('value:Q', format='.0%'),
    order="order:O",
    opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]), legend=None)
)

if metric == 'pos':
    bar = (bar + text)

    
bar = bar.properties(
    title={
        "text": [
            y_title
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)
# bar = bar.facet(
#     column=alt.Column('site:N', header=alt.Header(title=None), sort=sites),
#     spacing=20
# )

"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%% Bottom %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='ppv'

y_title = 'Mortality Rate'
colors = ['#A8DED1', '#3FA86F', '#005A24']
colors = ['#00A87E', '#00634B', 'black']
y_scale = alt.Scale(domain=[0, 0.5])

line = alt.Chart(
    d
).transform_calculate(
#     order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    # x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    x=alt.X("month:N", title=None, scale=alt.Scale(domain=date), axis=alt.Axis(labelAngle=0)),
    y=alt.Y("value:Q", title=y_title, scale=y_scale),
    color=alt.Color("cat:N", title='Risk Level', scale=alt.Scale(domain=colorDomain, range=colors), legend=alt.Legend(orient='top-right')),
    order="order:O"
).properties(
    width=width * 4 - 40,
    height=height + 100
)


line = line.mark_line(
    size=3, point=point, opacity=0.8
)

line = line.properties(
    title={
        "text": [
            y_title
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%% Bottom 2 %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='rr'
y_title = 'Ratio Between Risk Levels'
# colors = ['red', 'salmon']
colors = ['#D45E00', '#351800']
# colorDomain = ['High/Medium', 'Low/Medium']
colorDomain = ['L/M', 'H/M']
y_scale = alt.Scale(domain=[0, 3],  clamp=True)

line2 = alt.Chart(
    d
).transform_calculate(
#     order="{'High/Medium':0, 'Low/Medium': 1}[datum.variable]"  
    order="{'H/M':0, 'L/M': 1}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
#     x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    x=alt.X("month:N", title=None, scale=alt.Scale(domain=date), axis=alt.Axis(grid=False,labelAngle=-55)),
#     x=alt.X("month:N", title='Month', scale=alt.Scale(domain=date), axis=alt.Axis(grid=True, ticks=False, labels=False, domain=False, title=None)),
    y=alt.Y("value:Q", title=y_title, scale=y_scale),
    color=alt.Color("cat:N", title='Risk Ratio', scale=alt.Scale(domain=colorDomain, range=colors), legend=alt.Legend(orient='bottom')),
    order="order:O"
).properties(
    width=width,
    height=height
)


line2 = line2.mark_line(
    size=3, point=point, opacity=0.8
)

line2 = line2.properties(
    title={
        "text": [
            y_title
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

print(d.site.unique())

"""
COMBINE
"""
res = alt.hconcat(
    plot,
    bar, 
    line2, 
    spacing=30
).resolve_scale(y='independent', color='independent')

res = alt.vconcat(
    line,
    res,
    spacing=30
).resolve_scale(y='independent', color='independent')

# res = res.properties(
#     title={
#         "text": [
#             f"Meta-Analysis Of Risk Score"
#         ],
#         "dx": 60,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ], 
#         "subtitleColor": "gray",
#     }
# )

"""
STYLE
"""

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
#     legend_orient='bottom',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

res

# New Figure Four Sub-Figures Combined w/ CI
Two columns, ci_l and ci_u, represent the lower and upper CIs. Since the current data do not have CIs, I am adding that info -10% and +10% of the actual value.

In [None]:
# data_release='2021-04-27'

consistent_date = {
    '2020-Mar-Apr': "'20 Mar - '20 Apr",
    '2020-May-Jun': "'20 May - '20 Jun",
    '2020-Jul-Aug': "'20 Jul - '20 Aug",
    '2020-Sep-Oct': "'20 Sep - '20 Oct",
    '2020-Nov-2021-Jan': "'20 Nov - '21 Jan"
}

date = ['2020-Mar-Apr', '2020-May-Jun', '2020-Jul-Aug', '2020-Sep-Oct', '2020-Nov-2021-Jan']
new_date = ['Mar ~ Apr', 'May ~ Jun', 'Jul ~ Aug', 'Sep ~ Oct', 'Nov ~ ']
new_date = ["'20 Mar - '20 Apr", "'20 May - '20 Jun", "'20 Jul - '20 Aug", "'20 Sep - '20 Oct", "'20 Nov - '21 Jan"]

colors = ['#E79F00', '#0072B2', '#D45E00', '#CB7AA7', '#029F73', '#57B4E9']

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'UKFR', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#0072B2', '#0072B2', '#0072B2', '#0072B2', '#CB7AA7', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00', '#D45E00','#D45E00','#D45E00']

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.deceasedRate.toShare.csv"))
df2 = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.severeRate.toShare.csv"))
sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.summary.toShare.csv"))
cs = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.cls.toShare.csv"))
st = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.stay.toShare.csv"))
print(st.head())

# Rename columns
df = df.drop(columns=["Unnamed: 0"])
df2 = df2.drop(columns=["Unnamed: 0"])
sdf = sdf.drop(columns=["Unnamed: 0"])
cs = cs.drop(columns=["Unnamed: 0"])

# More readable values
df.siteid = df.siteid.apply(lambda x: x.upper())
df2.siteid = df2.siteid.apply(lambda x: x.upper())
sdf.siteid = sdf.siteid.apply(lambda x: x.upper())
cs.siteid = cs.siteid.apply(lambda x: x.upper())

print(df.siteid.unique().tolist())
print(df2.siteid.unique().tolist())

df = pd.melt(df, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
df2 = pd.melt(df2, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')
sdf['date'] = sdf.month
cs['date'] = cs.calendar_month

print(df.date.unique().tolist())
df.date = df.date.apply(lambda x: consistent_date[x])
df2.date = df2.date.apply(lambda x: consistent_date[x])
sdf.date = sdf.date.apply(lambda x: consistent_date[x])
cs.date = cs.date.apply(lambda x: consistent_date[x])

# Add a reference (META)
df['reference'] = df.date.apply(lambda x: df[(df.date == x) & (df.siteid == 'META')].value.sum())
df2['reference'] = df2.date.apply(lambda x: df2[(df2.date == x) & (df2.siteid == 'META')].value.sum())

# print(sdf)
# print(df.head())
# print(df2.head())
cs.head()

width = 180
height = 200
size = 30
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)

# Need to be removed if we have actual CI information in the files
df['ci_l'] = df.value * 0.9
df['ci_u'] = df.value * 1.1

for c in ['Death Rate']:
        
    if c == 'Severity Rate':
        d = df2.copy()
        header = alt.Header(title=None,labels=False)
        x = alt.Axis(labelAngle=-55, tickCount=5)
    else:
        d = df.copy()
        header = alt.Header(title=None)
        x = alt.Axis(ticks=False, labels=False, domain=False, title=None)

    plot = alt.Chart(
        d
    ).transform_filter(
        {'field': 'siteid', 'oneOf': ['META']}
    ).mark_line(
        point=point,
        size=2
    ).encode(
        x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
        y=alt.Y("value:Q", title=None, scale=alt.Scale(domain=[0, 0.3])), # axis=alt.Axis(format=".0%"), 
        color=alt.value('black') # colors[2] if c == 'Death Rate' else colors[4])
    ).properties(
#         title={
#             "text": c,
#             "anchor": 'middle'
#         },
        width=width,
        height=height
    )
    
    if c == 'Severity Rate':
        plot = alt.hconcat(res, plot, spacing=30).resolve_scale(y='independent')

    res = plot

tick = plot.mark_errorbar(
    opacity=0.7 #, color='black'
).encode(
    x=alt.X("date:N", title=None, axis=alt.Axis(labelAngle=-55, tickCount=5), sort=new_date),
    y=alt.Y("ci_l:Q", title=y_title),
    y2=alt.Y2("ci_u:Q"),
#         stroke=alt.value('black'),
    strokeWidth=alt.value(1)
)

plot = (tick + plot)
    
plot = plot.properties(
    title={
        "text": [
            f"Overall Mortality Rate"
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

data_release='2021-04-29'

consistent_date = {
    '2020-Mar-Apr': "'20 Mar - '20 Apr",
    '2020-May-Jun': "'20 May - '20 Jun",
    '2020-Jul-Aug': "'20 Jul - '20 Aug",
    '2020-Sep-Oct': "'20 Sep - '20 Oct",
    '2020-Nov-2021-Jan': "'20 Nov - '21 Jan"
}

date = ['Mar - Apr', 'May - Jun', 'Jul - Aug', 'Sep - Oct', 'Since Nov']
date = ["'20 Mar - '20 Apr", "'20 May - '20 Jun", "'20 Jul - '20 Aug", "'20 Sep - '20 Oct", "'20 Nov - '21 Jan"]

sites = ['META', 'APHP', 'FRBDX', 'ICSM', 'NWU', 'BIDMC', 'MGB', 'UCLA', 'UMICH', 'UPENN', 'UPITT', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
site_colors = ['black', '#D45E00', '#0072B2', '#CB7AA7', '#E79F00', '#029F73', '#DBD03C', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9', '#57B4E9']

df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "ToShare", "table.score.toShare.csv"))
print(df.head())

# Need to be removed if we have actual CI information in the files
df['ci_l'] = df.value * 0.9
df['ci_u'] = df.value * 1.1

# Rename columns
df = df.drop(columns=["Unnamed: 0"])
df = df.rename(columns={
    'siteid': 'site',
    'calendar_month': 'month'
})

# More readable values
df.site = df.site.apply(lambda x: x.upper())

print(df.site.unique().tolist())
print(df.month.unique().tolist())

# Drop "combine" sites
df = df[df.site != "COMBINE"]

# df = pd.melt(df, id_vars=['siteid'], value_vars=date, var_name='date', value_name='value')

df.month = df.month.apply(lambda x: consistent_date[x])

# Add a reference (META)
# df['reference'] = df.date.apply(lambda x: df[(df.date == x) & (df.siteid == 'META')].value.sum())

df.head()

d = df.copy()

d = d[d.site == 'META']

width = 180
height = 200
size = 30
point=alt.OverlayMarkDef(filled=False, fill='white', strokeWidth=2)


"""
DATA PREPROCESSING...
"""
d.loc[d.site == 'combine', 'site'] = 'All Sites'
d.cat = d.cat.apply(lambda x: {
    'L':'Low', 
    'M': 'Medium', 
    'H': 'High',
    'H/M': 'H/M',
    'L/M': 'L/M'
}[x])



"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%% TOP %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='pos'

y_title = '% of Patients By Risk Level'
y_scale = alt.Scale(domain=[0, 1])
colors = ['#7BADD1', '#427BB5', '#14366E']
# colorDomain = ['Low Risk', 'Medium Risk', 'High Risk']
colorDomain = ['Low', 'Medium', 'High']
# colorDomain = ['L', 'M', 'H']

bar = alt.Chart(
    d
).transform_calculate(
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    y=alt.Y("value:Q", title=None, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk Level', scale=alt.Scale(domain=colorDomain, range=colors), legend=alt.Legend(orient='bottom')),
    order="order:O"
).properties(
    width=width,
    height=height
)

bar = bar.mark_bar(
    size=size, stroke='black'
)


d['visibility'] = d['value'] > 0.08

text = alt.Chart(
    d
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).mark_text(size=14, dx=0, dy=5, color='white', baseline='top', fontWeight=500).encode(
    x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    y=alt.Y('value:Q', stack='zero'),
    detail='cat:N',
    text=alt.Text('value:Q', format='.0%'),
    order="order:O",
    opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]), legend=None)
)

if metric == 'pos':
    bar = (bar + text)

    
bar = bar.properties(
    title={
        "text": [
            y_title
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)
# bar = bar.facet(
#     column=alt.Column('site:N', header=alt.Header(title=None), sort=sites),
#     spacing=20
# )

"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%% Bottom %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='ppv'

y_title = 'Mortality Rate'
colors = ['#A8DED1', '#3FA86F', '#005A24']
colors = ['#00A87E', '#00634B', 'black']
y_scale = alt.Scale(domain=[0, 0.5])

line = alt.Chart(
    d
).transform_calculate(
#     order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    # x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    x=alt.X("month:N", title=None, scale=alt.Scale(domain=date), axis=alt.Axis(labelAngle=0)),
    y=alt.Y("value:Q", title=y_title, scale=y_scale),
    color=alt.Color("cat:N", title='Risk Level', scale=alt.Scale(domain=colorDomain, range=colors), legend=alt.Legend(orient='top-right')),
    order="order:O"
).properties(
    width=width * 4 - 40,
    height=height + 100
)


line = line.mark_line(
    size=3, point=point, opacity=0.8
)

tick = line.mark_errorbar(
    opacity=0.7 #, color='black'
).encode(
    x=alt.X("month:N", title=None, scale=alt.Scale(domain=date), axis=alt.Axis(grid=False,labelAngle=-55)),
    y=alt.Y("ci_l:Q", title=y_title),
    y2=alt.Y2("ci_u:Q"),
#         stroke=alt.value('black'),
    strokeWidth=alt.value(1)
)

line = (tick + line)

line = line.properties(
    title={
        "text": [
            y_title
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%% Bottom 2 %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='rr'
y_title = 'Ratio Between Risk Levels'
# colors = ['red', 'salmon']
colors = ['#D45E00', '#351800']
# colorDomain = ['High/Medium', 'Low/Medium']
colorDomain = ['L/M', 'H/M']
y_scale = alt.Scale(domain=[0, 3],  clamp=True)

line2 = alt.Chart(
    d
).transform_calculate(
#     order="{'High/Medium':0, 'Low/Medium': 1}[datum.variable]"  
    order="{'H/M':0, 'L/M': 1}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
#     x=alt.X("month:N", title=None, axis=alt.Axis(labelAngle=-55), sort=date),
    x=alt.X("month:N", title=None, scale=alt.Scale(domain=date), axis=alt.Axis(grid=False,labelAngle=-55)),
#     x=alt.X("month:N", title='Month', scale=alt.Scale(domain=date), axis=alt.Axis(grid=True, ticks=False, labels=False, domain=False, title=None)),
    y=alt.Y("value:Q", title=y_title, scale=y_scale),
    color=alt.Color("cat:N", title='Risk Ratio', scale=alt.Scale(domain=colorDomain, range=colors), legend=alt.Legend(orient='bottom')),
    order="order:O"
).properties(
    width=width,
    height=height
)


line2 = line2.mark_line(
    size=3, point=point, opacity=0.8
)

tick = line2.mark_errorbar(
    opacity=0.7 #, color='black'
).encode(
    x=alt.X("month:N", title=None, scale=alt.Scale(domain=date), axis=alt.Axis(grid=False,labelAngle=-55)),
    y=alt.Y("ci_l:Q", title=y_title),
    y2=alt.Y2("ci_u:Q"),
#         stroke=alt.value('black'),
    strokeWidth=alt.value(1)
)

line2 = (tick + line2)
    
line2 = line2.properties(
    title={
        "text": [
            y_title
        ],
        "fontSize": 18,
        "anchor": "middle",
#         "dx": 30,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

print(d.site.unique())

"""
COMBINE
"""
res = alt.hconcat(
    plot,
    bar, 
    line2, 
    spacing=30
).resolve_scale(y='independent', color='independent')

res = alt.vconcat(
    line,
    res,
    spacing=30
).resolve_scale(y='independent', color='independent')

# res = res.properties(
#     title={
#         "text": [
#             f"Meta-Analysis Of Risk Score"
#         ],
#         "dx": 60,
# #         "subtitle": [
# #             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
# #         ], 
#         "subtitleColor": "gray",
#     }
# )

"""
STYLE
"""

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
#     legend_orient='bottom',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

res

# Demographics

In [None]:
def FUNC_DEMOGRAPHICS_BY_WAVE_WITH_LINES(_data):
    d = _data.copy()
    
    """
    CATEGORIES WE USE
    """
    AGE_GROUPS = ['0-25', '26-49', '50-69', '70-79', '80+']
    SEX_GROUPS = ['Female', 'Male']
    RACE_GROUPS = ['White', 'Black', 'Asian', 'Hispanic and Other']
    COUNTRY_COLORS = ['#0072B2', '#E79F00', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    /////////////////////
    SUB-CHARTS FOR GROUPS
    /////////////////////
    """
    
    """
    COMMON VISUAL PARAMETERS
    """
    width = 130
    titleX = -60
    padding = 0.3
    
    """
    AGE GROUPS
    """
    ad = d[d.group.isin(AGE_GROUPS)]
    
    ############## Bar Chart for % of Participants ##############
    age_p_line = alt.Chart(
        ad
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X(
            'date:N', 
            title=None,
#             axis=AXIS_HIDE_TITLE
#             axis=None,
            scale=alt.Scale(padding=padding),
            axis=alt.Axis(grid=False, labels=True, ticks=False, domain=False, tickMinStep=1, labelAngle=-55),
        ),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%', titleX=titleX), title="Percentage of Patients"),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        width=width, height=300
    ).facet(
        spacing=2,
        column=alt.Column(
            "group:N",
            header=alt.Header(labelOrient="bottom", title='Age', titleOrient="bottom")
        )
    )

    """
    SEX GROUPS
    """    
    sd = d[d.group.isin(SEX_GROUPS)]
    
    sex_p_line = alt.Chart(
        sd
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X(
            'date:N', 
            title=None,
#             axis=AXIS_HIDE_TITLE
#             axis=None,
            scale=alt.Scale(padding=padding),
            axis=alt.Axis(grid=False, labels=True, ticks=False, domain=False, tickMinStep=1, labelAngle=-55),
        ),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%', titleX=titleX), title=None),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        width=width, height=300
    ).facet(
        spacing=2,
        column=alt.Column(
            "group:N",
            header=alt.Header(labelOrient="bottom", title='Sex', titleOrient="bottom")
        )
    ) 
    
    """
    RACE GROUPS
    """    
    rd = d[d.group.isin(RACE_GROUPS)]
    
    race_p_line = alt.Chart(
        rd
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X(
            'date:N', 
            title=None,
#             axis=AXIS_HIDE_TITLE
#             axis=None,
            scale=alt.Scale(padding=padding),
            axis=alt.Axis(grid=False, labels=True, ticks=False, domain=False, tickMinStep=1, labelAngle=-55),
        ),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%', titleX=titleX), title=None),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        width=width, height=300
    ).facet(
        spacing=2,
        column=alt.Column(
            "group:N",
            header=alt.Header(labelOrient="bottom", title='Race', titleOrient="bottom")
        )
    ) 
    
    """
    ////////////////////////////////////
    ASSEMBLE
    ////////////////////////////////////
    """
    final_chart = (
        # alt.hconcat((age_p_line & age_n_line), (sex_p_line & sex_n_line), spacing=40).properties(
#         alt.hconcat(age_p_line, sex_p_line, spacing=40).resolve_scale(y='shared').properties(
        alt.hconcat(age_p_line, sex_p_line, race_p_line, spacing=40).resolve_scale(y='shared').properties(
            title={
                "text": f"Demographics",
                "dx": 80,
#                 "subtitle": get_visualization_subtitle(data_release='2021-02-15', with_num_sites=False), 
                "subtitleColor": "gray",
            }
        )
    )

    return final_chart


df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.age.toShare.csv"))

df = df.rename(columns={
    'group': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'Freq': 'value',
})

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.sex.toShare.csv"))

sdf = sdf.rename(columns={
    'group': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'Freq': 'value',
})

sdf['group'] = 'male'

fsdf = sdf.copy()
fsdf.group = 'female'
fsdf.value = fsdf.value.apply(lambda x: 1-x)
sdf = sdf.append(fsdf)

df = df.append(sdf)

rdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.race.toShare.csv"))

rdf = rdf.rename(columns={
    'group': 'group',
    'race_new': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'Freq': 'value',
})

# print(sdf)

df = df.append(rdf)

df.siteid = df.siteid.apply(lambda x: x.replace('meta-', ''))

df = df.rename(columns={
    'group': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'race_new': 'group',
    'Freq': 'value',
})

df.group = df.group.apply(
    lambda x: {
        '00to25': '0-25',
        '26to49': '26-49',
        '50to69': '50-69',
        '70to79': '70-79',
        '80plus': '80+',
        'female': 'Female',
        'male': 'Male',
        'white': 'White',
        'black': 'Black',
        'Black': 'Black',
        'Asian': 'Asian',
        'Hispanic and Other': 'Hispanic and Other',
        'White': 'White',
        'other': 'Other',
        'other_age': 'Other',
        'other_sex': 'Other',
        'other_race': 'Other'
    }[x]
)

# print(df)

plot = FUNC_DEMOGRAPHICS_BY_WAVE_WITH_LINES(df[df.siteid == 'meta'])

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-left'
)

plot

In [None]:
def FUNC_DEMOGRAPHICS_BY_WAVE_WITH_LINES(_data):
    d = _data.copy()
    
    """
    CATEGORIES WE USE
    """
    AGE_GROUPS = ['0-25', '26-49', '50-69', '70-79', '80+']
    SEX_GROUPS = ['Female', 'Male']
    RACE_GROUPS = ['White', 'Black', 'Asian', 'Hispanic and Other']
    COUNTRY_COLORS = ['#0072B2', '#E79F00', '#029F73', '#D45E00', '#CB7AA7']
    
    """
    /////////////////////
    SUB-CHARTS FOR GROUPS
    /////////////////////
    """
    
    """
    COMMON VISUAL PARAMETERS
    """
    width = 130
    titleX = -60
    padding = 0.3
    
    """
    AGE GROUPS
    """
    ad = d[d.group.isin(AGE_GROUPS)]
    
    ############## Bar Chart for % of Participants ##############
    age_p_line = alt.Chart(
        ad
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X(
            'date:N', 
            title=None,
#             axis=AXIS_HIDE_TITLE
#             axis=None,
            scale=alt.Scale(padding=padding),
            axis=alt.Axis(grid=False, labels=True, ticks=False, domain=False, tickMinStep=1, labelAngle=-55),
        ),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%', titleX=titleX), title="Percentage of Patients"),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        width=width, height=300
    ).facet(
        spacing=2,
        column=alt.Column(
            "group:N",
            header=alt.Header(labelOrient="bottom", title='Age', titleOrient="bottom")
        )
    )

    """
    SEX GROUPS
    """    
    sd = d[d.group.isin(SEX_GROUPS)]
    
    sex_p_line = alt.Chart(
        sd
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X(
            'date:N', 
            title=None,
#             axis=AXIS_HIDE_TITLE
#             axis=None,
            scale=alt.Scale(padding=padding),
            axis=alt.Axis(grid=False, labels=True, ticks=False, domain=False, tickMinStep=1, labelAngle=-55),
        ),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%', titleX=titleX), title=None),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        width=width, height=300
    ).facet(
        spacing=2,
        column=alt.Column(
            "group:N",
            header=alt.Header(labelOrient="bottom", title='Sex', titleOrient="bottom")
        )
    ) 
    
    """
    RACE GROUPS
    """    
    rd = d[d.group.isin(RACE_GROUPS)]
    
    race_p_line = alt.Chart(
        rd
    ).mark_line(
        point=True,
        size=3,
    ).encode(
        x=alt.X(
            'date:N', 
            title=None,
#             axis=AXIS_HIDE_TITLE
#             axis=None,
            scale=alt.Scale(padding=padding),
            axis=alt.Axis(grid=False, labels=True, ticks=False, domain=False, tickMinStep=1, labelAngle=-55),
        ),
        y=alt.Y(f'value:Q', axis=alt.Axis(format='.0%', titleX=titleX), title=None),
        color=alt.Color("siteid:N", title=None, scale=alt.Scale(range=COUNTRY_COLORS))
    ).properties(
        width=width, height=300
    ).facet(
        spacing=2,
        column=alt.Column(
            "group:N",
            header=alt.Header(labelOrient="bottom", title='Race', titleOrient="bottom")
        )
    ) 
    
    """
    ////////////////////////////////////
    ASSEMBLE
    ////////////////////////////////////
    """
    final_chart = (
        # alt.hconcat((age_p_line & age_n_line), (sex_p_line & sex_n_line), spacing=40).properties(
        alt.hconcat(age_p_line, sex_p_line, spacing=40).resolve_scale(y='shared').properties(
#         alt.hconcat(age_p_line, sex_p_line, race_p_line, spacing=40).resolve_scale(y='shared').properties(
            title={
                "text": f"Demographics",
                "dx": 80,
#                 "subtitle": get_visualization_subtitle(data_release='2021-02-15', with_num_sites=False), 
                "subtitleColor": "gray",
            }
        )
    )

    return final_chart


df = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.age.toShare.csv"))

df = df.rename(columns={
    'group': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'Freq': 'value',
})

sdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.sex.toShare.csv"))

sdf = sdf.rename(columns={
    'group': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'Freq': 'value',
})

sdf['group'] = 'male'

fsdf = sdf.copy()
fsdf.group = 'female'
fsdf.value = fsdf.value.apply(lambda x: 1-x)
sdf = sdf.append(fsdf)

df = df.append(sdf)

rdf = pd.read_csv(join("..", "data", "Phase2.1SurvivalRSummariesPublic", "toShare", "table.race.toShare.csv"))

rdf = rdf.rename(columns={
    'group': 'group',
    'race_new': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'Freq': 'value',
})

# print(sdf)

df = df.append(rdf)

df.siteid = df.siteid.apply(lambda x: x.replace('meta-', ''))

df = df.rename(columns={
    'group': 'group',
    'age_group_new': 'group',
    'calendar_date': 'date',
    'race_new': 'group',
    'Freq': 'value',
})

df.group = df.group.apply(
    lambda x: {
        '00to25': '0-25',
        '26to49': '26-49',
        '50to69': '50-69',
        '70to79': '70-79',
        '80plus': '80+',
        'female': 'Female',
        'male': 'Male',
        'white': 'White',
        'black': 'Black',
        'Black': 'Black',
        'Asian': 'Asian',
        'Hispanic and Other': 'Hispanic and Other',
        'White': 'White',
        'other': 'Other',
        'other_age': 'Other',
        'other_sex': 'Other',
        'other_race': 'Other'
    }[x]
)

# print(df)

plot = FUNC_DEMOGRAPHICS_BY_WAVE_WITH_LINES(df[df.siteid != 'meta'])

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-left'
)

plot

# Below code is not used

In [None]:
def risk(_d, metric='pos'):
    d = _d.copy()
    
    """
    DATA PREPROCESSING...
    """
    d.loc[d.site == 'combine', 'site'] = 'All Sites'
    d.cat = d.cat.apply(lambda x: {
        'L':'Low Risk', 
        'M': 'Medium Risk', 
        'H': 'High Risk',
        'H/M': 'High/Medium',
        'L/M': 'Low/Medium'
    }[x])
    
    """
    PLOT!
    """
    y_title = '% of Patients in Each Category' if metric == 'pos' else '% of Event in Each Category'
    colors = ['#7BADD1', '#427BB5', '#14366E'] if metric == 'pos' else ['#A8DED1', '#3FA86F', '#005A24'] if metric == 'ppv' else ['red', 'salmon']
    colorDomain = ['High/Medium', 'Low/Medium'] if metric == 'rr' else ['Low Risk', 'Medium Risk', 'High Risk']
    width = 300
    size = 50
    y_scale = alt.Scale(domain=[0, 1]) if metric == 'pos' or metric=='ppv' else alt.Scale()
    
    bar = alt.Chart(
        d
    ).transform_calculate(
        order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    ).transform_filter(
        {'field': 'metric', 'oneOf': [metric]}
    ).encode(
        x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov'])),
        y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
        color=alt.Color("cat:N", title='Category', scale=alt.Scale(domain=colorDomain, range=colors)),
        order="order:O"
    ).properties(
        width=width
    )
    
    if metric == 'pos':
        bar = bar.mark_bar(
            size=size, stroke='black'
        )
    else:
        bar = bar.mark_line(
            size=3, point=True, opacity=0.8
        )
    
    d['visibility'] = d['value'] > 0.08
    text = alt.Chart(
        d
    ).transform_filter(
        {'field': 'metric', 'oneOf': [metric]}
    ).mark_text(size=16, dx=0, dy=5, color='white', baseline='top', fontWeight=500).encode(
        x=alt.X('month:N'),
        y=alt.Y('value:Q', stack='zero'),
        detail='cat:N',
        text=alt.Text('value:Q', format='.0%'),
        order="order:O",
        opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]))
    )
#     .transform_filter(
#         (f'datum.value > 0.10')
#     )
    
    if metric == 'pos':
        bar = (bar + text)
    
    bar = bar.facet(
        column=alt.Column('site:N', header=alt.Header(title=None)),
    )
    
    """
    COMBINE
    """
    res = bar.properties(
        title={
            "text": [
                f"Distribution of Risk Scores" if metric == 'pos' else f"Event Rate of Risk Scores"
            ],
            "dx": 80,
            "subtitle": [
                # lab, #.title(),
                get_visualization_subtitle(data_release=data_release, with_num_sites=False)
            ], 
            "subtitleColor": "gray",
        }
    )

    
    return res

In [None]:
pos = risk(df, metric='pos')
ppv = risk(df, metric='ppv')

res = alt.vconcat(pos, ppv, spacing=30).resolve_scale(color='independent', x='independent')

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    header_label_font_size=16
)

res.display()

In [None]:
d = df.copy()

width = 300
size = 50


"""
DATA PREPROCESSING...
"""
d.loc[d.site == 'combine', 'site'] = 'All Sites'
d.cat = d.cat.apply(lambda x: {
    'L':'Low', 
    'M': 'Medium', 
    'H': 'High',
    'H/M': 'H/M',
    'L/M': 'L/M'
}[x])
# d.cat = d.cat.apply(lambda x: {
#     'L':'Low Risk', 
#     'M': 'Medium Risk', 
#     'H': 'High Risk',
#     'H/M': 'High/Medium',
#     'L/M': 'Low/Medium'
# }[x])




"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%% TOP %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='pos'

y_title = '% of Patients in Each Category'
y_scale = alt.Scale(domain=[0, 1])
colors = ['#7BADD1', '#427BB5', '#14366E']
# colorDomain = ['Low Risk', 'Medium Risk', 'High Risk']
colorDomain = ['Low', 'Medium', 'High']
# colorDomain = ['L', 'M', 'H']

bar = alt.Chart(
    d
).transform_calculate(
#     order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov'])),
    y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk', scale=alt.Scale(domain=colorDomain, range=colors)),
    order="order:O"
).properties(
    width=width
)

bar = bar.mark_bar(
    size=size, stroke='black'
)


d['visibility'] = d['value'] > 0.08

text = alt.Chart(
    d
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).mark_text(size=16, dx=0, dy=5, color='white', baseline='top', fontWeight=500).encode(
    x=alt.X('month:N'),
    y=alt.Y('value:Q', stack='zero'),
    detail='cat:N',
    text=alt.Text('value:Q', format='.0%'),
    order="order:O",
    opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]), legend=None)
)

if metric == 'pos':
    bar = (bar + text)

bar = bar.facet(
    column=alt.Column('site:N', header=alt.Header(title=None)),
    spacing=157
)

"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%% Bottom %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='ppv'

y_title = '% of Event in Each Category'
colors = ['#A8DED1', '#3FA86F', '#005A24']
y_scale = alt.Scale(domain=[0, 1])

line = alt.Chart(
    d
).transform_calculate(
#     order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov'])),
    y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk', scale=alt.Scale(domain=colorDomain, range=colors)),
    order="order:O"
).properties(
    width=width
)


line = line.mark_line(
    size=3, point=True, opacity=0.8
)


"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%% Bottom 2 %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='rr'
y_title = 'Ratio Between Risk Categories'
colors = ['red', 'salmon']
# colorDomain = ['High/Medium', 'Low/Medium']
colorDomain = ['H/M', 'L/M']
y_scale = alt.Scale(domain=[0, 4.2])

line2 = alt.Chart(
    d
).transform_calculate(
#     order="{'High/Medium':0, 'Low/Medium': 1}[datum.variable]"  
    order="{'H/M':0, 'L/M': 1}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov'])),
    y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk Ratio', scale=alt.Scale(domain=colorDomain, range=colors)),
    order="order:O"
).properties(
    width=width
)


line2 = line2.mark_line(
    size=3, point=True, opacity=0.8
)

# line = alt.layer(line, line2, data=d).resolve_scale(y='independent', color='independent')
# .facet(
#     column='site' # alt.Column('site:N', header=alt.Header(title=None))
# )

# for site in d.site.unique()

line = alt.concat(*(
    alt.layer(line, line2, title={
        "text": site,
        "dx": 200}).transform_filter(alt.datum.site == site).resolve_scale(y='independent', color='independent')
    for site in ['All Sites', 'MGB', 'BIDMC', 'VA1', 'VA2', 'VA3', 'VA4', 'VA5']
), spacing=20).resolve_scale(color='shared')

print(d.site.unique())

"""
COMBINE
"""
top = bar.properties(
    title={
        "text": [
            f"Distribution of Risk Scores"
        ],
        "dx": 80,
        "subtitle": [
            get_visualization_subtitle(data_release=data_release, with_num_sites=False)
        ], 
        "subtitleColor": "gray",
    }
)

bot = line.properties(
    title={
        "text": [
            f"Event Rate of Risk Scores"
        ],
        "dx": 80,
        "subtitle": [
            get_visualization_subtitle(data_release=data_release, with_num_sites=False)
        ], 
        "subtitleColor": "gray",
    }
)

res = alt.vconcat(top, bot, spacing=30).resolve_scale(color='independent')

"""
STYLE
"""

res = apply_theme(
    res,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    header_label_font_size=16
)

res

In [None]:
d = df.copy()

width = 280
height = 200
height2 = 140
size = 50


"""
DATA PREPROCESSING...
"""
d.loc[d.site == 'combine', 'site'] = 'All Sites'
d.cat = d.cat.apply(lambda x: {
    'L':'Low', 
    'M': 'Medium', 
    'H': 'High',
    'H/M': 'H/M',
    'L/M': 'L/M'
}[x])
# d.cat = d.cat.apply(lambda x: {
#     'L':'Low Risk', 
#     'M': 'Medium Risk', 
#     'H': 'High Risk',
#     'H/M': 'High/Medium',
#     'L/M': 'Low/Medium'
# }[x])




"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%% TOP %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='pos'

y_title = '% of Patients in Each Category'
y_scale = alt.Scale(domain=[0, 1])
colors = ['#7BADD1', '#427BB5', '#14366E']
# colorDomain = ['Low Risk', 'Medium Risk', 'High Risk']
colorDomain = ['Low', 'Medium', 'High']
# colorDomain = ['L', 'M', 'H']

bar = alt.Chart(
    d
).transform_calculate(
#     order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov']), axis=alt.Axis(grid=True)),
    y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk', scale=alt.Scale(domain=colorDomain, range=colors)),
    order="order:O"
).properties(
    width=width,
    height=height
)

bar = bar.mark_bar(
    size=size, stroke='black'
)


d['visibility'] = d['value'] > 0.08

text = alt.Chart(
    d
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).mark_text(size=16, dx=0, dy=5, color='white', baseline='top', fontWeight=500).encode(
    x=alt.X('month:N'),
    y=alt.Y('value:Q', stack='zero'),
    detail='cat:N',
    text=alt.Text('value:Q', format='.0%'),
    order="order:O",
    opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]), legend=None)
)

if metric == 'pos':
    bar = (bar + text)

bar = bar.facet(
    column=alt.Column('site:N', header=alt.Header(title=None)),
)

"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%% Bottom %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='ppv'

y_title = '% of Risk Event'
colors = ['#A8DED1', '#3FA86F', '#005A24']
colors = ['#00A87E', '#00634B', 'black']
y_scale = alt.Scale(domain=[0, 0.6])

line = alt.Chart(
    d
).transform_calculate(
#     order="{'Low Risk':0, 'Medium Risk': 1, 'High Risk': 2}[datum.variable]"  
    order="{'L':0, 'M': 1, 'H': 2}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov']), axis=alt.Axis(grid=True, ticks=False, labels=False, domain=False, title=None)),
    y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk', scale=alt.Scale(domain=colorDomain, range=colors)),
    order="order:O"
).properties(
    width=width,
    height=height2
)


line = line.mark_line(
    size=3, point=True, opacity=0.8
).facet(
    column=alt.Column('site:N', header=alt.Header(title=None)),
)



"""
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%% Bottom 2 %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
metric='rr'
y_title = 'Ratio Between Risks'
colors = ['#D45E00', '#351800']
# colorDomain = ['High/Medium', 'Low/Medium']
colorDomain = ['L/M', 'H/M']
y_scale = alt.Scale(domain=[0, 4.2])

line2 = alt.Chart(
    d
).transform_calculate(
#     order="{'High/Medium':0, 'Low/Medium': 1}[datum.variable]"  
    order="{'H/M':0, 'L/M': 1}[datum.variable]"  
).transform_filter(
    {'field': 'metric', 'oneOf': [metric]}
).encode(
    x=alt.X("month:N", title='Month', scale=alt.Scale(domain=['Mar-Apr', 'May-Jun', 'Jul-Aug', 'Sep-Oct', 'Since Nov']), axis=alt.Axis(grid=True)),
    y=alt.Y("value:Q", title=y_title, axis=alt.Axis(format='.0%'), scale=y_scale),
    color=alt.Color("cat:N", title='Risk Ratio', scale=alt.Scale(domain=colorDomain, range=colors)),
    order="order:O",
#     shape="site:N"
).properties(
    width=width,
    height=height2
)

line2 = line2.mark_line(
    size=3, opacity=0.8, point=True
).facet(
    column=alt.Column('site:N', header=alt.Header(title=None, labels=False)),
)






"""
COMBINE
"""
top = bar.properties(
    title={
        "text": [
            f"Distribution of Risk Scores"
        ],
        "dx": 180,
        "subtitle": [
            get_visualization_subtitle(data_release=data_release, with_num_sites=False)
        ], 
        "subtitleColor": "gray",
    }
)

line = line.properties(
    title={
        "text": [
            f"Event Rate of Risk Scores"
        ],
        "dx": 180,
        "subtitle": [
            get_visualization_subtitle(data_release=data_release, with_num_sites=False)
        ], 
        "subtitleColor": "gray",
    }
)

# line2 = line2.properties(
#     title={
#         "text": [
#             f"Risk Ratio"
#         ],
#         "dx": 180,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ], 
#         "subtitleColor": "gray",
#     }
# )

res = alt.vconcat(top, line, line2, spacing=10).resolve_scale(color='independent')

"""
STYLE
"""

res = apply_theme(
    res,
    axis_y_title_font_size=14,
    axis_title_font_size=14,
    axis_label_font_size=12,
    title_anchor='start',
    legend_orient='left',
    header_label_font_size=16
)

res