In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
data_release='2021-04-27'

df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "case_numbers.csv"))

df = df.rename(columns={
    "n.all": "n_all", 
    "n.severe": "n_severe"
})

df = df.drop(columns=['Unnamed: 0', 'n_severe'])

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

df.wave = df.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])

df

In [None]:
d = df.copy()

plot = alt.Chart(
    d
).transform_filter(
    alt.datum.n_all > 0  
).mark_bar(
    size=35,
#     stroke='black',
#     strokeWidth=1
).encode(
    x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
    y=alt.Y("n_all:Q", title=None, scale=alt.Scale(type='log')),
    color=alt.Color("wave:N", scale=alt.Scale(range=['#D45E00', '#0072B2']), title='Wave'),
).properties(
    width=100,
    height=250
)

text = plot.mark_text(
    size=16, dx=0, dy=-5, color='white', baseline='bottom', fontWeight=500
).encode(
#     x=alt.X('month:N'),
#     y=alt.Y('value:Q', stack='zero'),
    x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
    y=alt.Y("n_all:Q", title=None),
#     detail='cat:N',
    text=alt.Text('n_all:Q', format=','),#, format='.0%'),
#     order="order:O",
#     opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]))
)

plot = (plot + text).facet(
    column=alt.Column("country:N", header=alt.Header(title=None), sort=COUNTRY)
).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Country-Level Hospitalizations By Wave"
        ],
        "dx": 45,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)


# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

plot

In [None]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "percCountCountry_AndAll_Weeks.csv"), sep='\t')

df = df.rename(columns={
    "Country": "country", 
    "weeks": "week"
})

# df = df.drop(columns=['Unnamed: 0', 'n_severe'])

df.country = df.country.apply(lambda x: x.upper())

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

# df.wave = df.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])

df

In [None]:
d = df.copy()

d = d.sort_values('week')

# Moving average using three time points (previous, current, next)
d['percentage'] = d.groupby('country').percentage.apply(lambda x : x.shift().rolling(3, min_periods=1).mean().fillna(x))
d['percentage'] = d.groupby('country').percentage.apply(lambda x : x.shift(-2))

d = d[d.week <= '2021-02-28']

plot = alt.Chart(
    d
).mark_line(
    size=2.5,
    point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=0.7
).encode(
    x=alt.X("week:T", title=None, axis=alt.Axis(format=("%B, %Y"), tickCount=7.9, labelAngle=0)), # https://github.com/d3/d3-time-format#locale_format
    y=alt.Y("percentage:Q", title=None, axis=alt.Axis(format=".0%")),
    color=alt.Color("country:N", scale=alt.Scale(domain=COUNTRY, range=COUNTRY_COLOR), title='Country'),
).properties(
    width=850,
    height=350
)

# text = plot.mark_text(
#     size=16, dx=0, dy=-5, color='white', baseline='bottom', fontWeight=500
# ).encode(
# #     x=alt.X('month:N'),
# #     y=alt.Y('value:Q', stack='zero'),
#     x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
#     y=alt.Y("percentage:Q", title=None, scale=alt.Scale(format=".1%")),
# #     detail='cat:N',
#     text=alt.Text('n_all:Q'),#, format='.0%'),
# #     order="order:O",
# #     opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]))
# )

# plot = (plot).facet(
#     row=alt.Row("country:N", header=alt.Header(title=None), sort=COUNTRY)
# ).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Intensity Rate Of Hospitalization Over Time"
        ],
        "dx": 35,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)


# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=30,
    axis_tick_color='black'
)

plot

# d

In [None]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "severity_shift_random_effects.csv"))

df = df.rename(columns={
    "ci.lwr": "ci_l", 
    "ci.upr": "ci_u",
    "n.all": "n",
    "weeks": "week"
})

df = df[df.effect_size == 'risk_ratio']

df['e_l'] = df.pp - df.se
df['e_u'] = df.pp + df.se

df['sig'] = df['sig'].apply(lambda x: 'p<0.05' if x == True else 'False')

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

df

In [None]:
d = df.copy()

d = d[d.day == 0]

plot = alt.Chart(
    d
).mark_point(
    size=180,
    filled=True,
    shape='diamond',
#     point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=1
).encode(
    y=alt.Y("country:O", title=None, axis=alt.Axis(labelAngle=0, tickCount=20, labels=True)),
    x=alt.X("pp:Q", title="Relative Risk", scale=alt.Scale(zero=False, clamp=True), axis=alt.Axis(labelAngle=0, tickCount=10)),
    color=alt.Color("country:N", scale=alt.Scale(domain=COUNTRY, range=COUNTRY_COLOR), title='Country'),
#     stroke=alt.Stroke("sig", scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance')
).properties(
    width=750,
    height=350
)

guide = alt.Chart(
    pd.DataFrame({'baseline': [1, 1, 1, 1, 1, 1, 1], 'country': COUNTRY})
).mark_rule(color='gray', strokeDash=[3,3], opacity=0.5).encode(
    x=alt.X('baseline:Q')
)
    
tick = plot.mark_errorbar(
    opacity=0.7, color='black'
).encode(
    y=alt.Y("country:O", title='Days Since Admission'),
    x=alt.X("ci_l:Q", title="Relative Risk"),
    x2=alt.X2("ci_u:Q"),
    stroke=alt.value('black'),
    strokeWidth=alt.value(2)
)

plot = (tick + plot + guide)
# .facet(
#     row=alt.Row("country:N", header=alt.Header(title=None, labelAngle=0, labelAnchor='start', labelPadding=30), sort=COUNTRY)
# ).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Relative Risk Of Severe Disease In Second Compared To First Wave",
#             f"Early To Late Relative Risk Of Severe COVID-19"
        ],
        "dx": 60,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16
)

plot

In [None]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "severity_shift_random_effects.csv"))

df = df.rename(columns={
    "ci.lwr": "ci_l", 
    "ci.upr": "ci_u",
    "n.all": "n",
    "weeks": "week"
})

df = df[df.effect_size == 'risk']
df = df[df.cohort == 'dayX']

df['e_l'] = df.pp - df.se
df['e_u'] = df.pp + df.se

df.wave = df.wave.apply(lambda x: x.capitalize())

df['sig'] = df['sig'].apply(lambda x: 'p<0.05' if x == True else 'False')

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

df

In [None]:
d = df.copy()

d = d[d.day == 0]

d.wave = d.wave.apply(lambda x: 'First' if x == 'Early' else 'Second')

plot = alt.Chart(
    d
).mark_bar(
    size=35,
#     filled=True,
#     shape='diamond',
#     point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=1
).encode(
    x=alt.X("wave:N", title=None, axis=alt.Axis(labelAngle=0, tickCount=20, labels=False)),
    y=alt.Y("pp:Q", title='Absolute Risk', scale=alt.Scale(zero=False, clamp=True), axis=alt.Axis(labelAngle=0, tickCount=10, format='%')),
    color=alt.Color("wave:N", scale=alt.Scale(range=['#D45E00', '#0072B2']), title=None),
#     stroke=alt.Stroke("sig", scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance')
).properties(
    width=100,
    height=250
)

guide = alt.Chart(
    pd.DataFrame({'baseline': [1, 1, 1, 1, 1, 1, 1], 'country': COUNTRY})
).mark_rule(color='gray', strokeDash=[3,3], opacity=0.5).encode(
    x=alt.X('baseline:Q')
)
    
tick = plot.mark_errorbar(
    opacity=0.7, color='black'
).encode(
    x=alt.X("wave:O", title='Days Since Admission'),
    y=alt.Y("ci_l:Q", title='Absolute Risk'),
    y2=alt.Y2("ci_u:Q"),
    stroke=alt.value('black'),
    strokeWidth=alt.value(2)
)

plot = (plot + tick).facet(
    # Bug aligning labels. https://github.com/altair-viz/altair/issues/1878
    column=alt.Column("country:N", header=alt.Header(title=None), sort=COUNTRY),
    # labelAngle=0, labelBaseline='middle', labelAnchor='middle', labelAlign='left', labelPadding=0, labelLineHeight=0
    spacing=10
).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Absolute Risk For Severe COVID-19 In The First and Second Waves",
        ],
        "dx": 60,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-left',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    header_label_orient='bottom'
)

plot

In [None]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "table.stay.rmDead.toShare.csv"))

df = df[df.siteid.str.contains("meta")]
df.siteid = df.siteid.apply(lambda x: x.replace('meta-', '').upper())

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']
WAVE_COLOR = ['#D45E00', '#0072B2']

df.week = df.week.apply(lambda x: x.replace('week1', '1 Week').replace('week2', '2 Weeks').replace('week3', '> 3 Weeks'))

df.head()

In [None]:
barSize = 30
barGap = 10
height = 320

first = alt.Chart(
    df
).mark_bar(
    size=barSize,
    xOffset=-barSize/2.0 - 2
).encode(
    x=alt.X('week:O', title=None, axis=alt.Axis()),
    y=alt.Y('n', title='Proportion of Patients', axis=alt.Axis(format='%'), scale=alt.Scale(domain=[0, 1])),
    color=alt.Color('wave:N', scale=alt.Scale(range=WAVE_COLOR)),
).transform_filter(
    {'field': 'wave', 'oneOf': ['First']}
).properties(
    width=(barSize + barGap)*6,
    height=height
)

firstError = alt.Chart(
    df
).mark_bar(
    size=1,
    xOffset=-barSize/2.0 - 2,
    color='black'
).encode(
    x=alt.X('week:O', title=None, axis=alt.Axis()),
    y='ci_l:Q',
    y2='ci_u:Q',
    opacity=alt.value(1),
    color=alt.value('black')
).transform_filter(
    {'field': 'wave', 'oneOf': ['First']}
).properties(
    width=(barSize + barGap)*6,
    height=height
)

second = alt.Chart(
    df
).mark_bar(
    size=barSize,
    xOffset=barSize/2.0 + 2
).encode(
    x=alt.X('week:O', title=None, axis=alt.Axis()),
    y=alt.Y('n', title='Proportion of Patients', axis=alt.Axis(format='%'), scale=alt.Scale(domain=[0, 1])),
    color=alt.Color('wave:N', scale=alt.Scale(range=WAVE_COLOR), title='Wave')
).transform_filter(
    {'field': 'wave', 'oneOf': ['Second']}
).properties(
    width=(barSize + barGap)*6,
    height=height
)

secondError = alt.Chart(
    df
).mark_bar(
    size=1,
    xOffset=barSize/2.0 + 2,
    color='black'
).encode(
    x=alt.X('week:O', title=None, axis=alt.Axis()),
    y='ci_l:Q',
    y2='ci_u:Q',
    opacity=alt.value(1),
    color=alt.value('black')
).transform_filter(
    {'field': 'wave', 'oneOf': ['Second']}
).properties(
    width=(barSize + barGap)*6,
    height=height
)

plot = (first + firstError + second + secondError).resolve_scale(opacity='independent').facet(
    column=alt.Column('siteid:N', title=None, sort=['META', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA'])
).properties(
    title={
        'text': 'Proportion of Patients By Length Of Stay',
        'dx': 60
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-left',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    header_label_orient='top'
)

plot

In [None]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "KM_censoring_rate.csv"), sep=',')

df = df.rename(columns={
    "censor.rate": "censor_rate", 
    "std.err": "std_error",
    "95cl_lwr": "ci_l",
    "95cl_upr": "ci_u"
})

df = df.drop(columns=['Unnamed: 0'])

df.country = df.country.apply(lambda x: x.upper())

COUNTRY = ['ALL', 'FRANCE', 'GERMANY', 'ITALY', 'USA']
COUNTRY_COLOR = ['black', '#0072B2', '#E79F00', '#029F73', '#D45E00']

df

In [None]:
d = df.copy()

# d = d.sort_values('week')

# # Moving average using three time points (previous, current, next)
# d['percentage'] = d.groupby('country').percentage.apply(lambda x : x.shift().rolling(3, min_periods=1).mean().fillna(x))
# d['percentage'] = d.groupby('country').percentage.apply(lambda x : x.shift(-2))

# d = d[d.week <= '2021-02-28']

plot = alt.Chart(
    d
).mark_line(
    size=3.5,
    point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=0.7
).encode(
    x=alt.X("day:Q", title="Days Since Admission", axis=alt.Axis(tickCount=10, labelAngle=0), scale=alt.Scale(padding=10, nice=False)), # https://github.com/d3/d3-time-format#locale_format
    y=alt.Y("censor_rate:Q", title=None, axis=alt.Axis(format=".0%")),
    color=alt.Color("country:N", scale=alt.Scale(domain=COUNTRY, range=COUNTRY_COLOR), title='Country'),
).properties(
    width=200,
    height=200
)

error = alt.Chart(
    d
).mark_errorbar(
    opacity=0.7
).encode(
    x=alt.X("day:Q", title="Days Since Admission", axis=alt.Axis(tickCount=10, labelAngle=0), scale=alt.Scale(padding=10, nice=False)), # https://github.com/d3/d3-time-format#locale_format
    y=alt.Y("ci_l:Q", title=None, axis=alt.Axis(format=".0%")),
    y2=alt.Y2("ci_u:Q"),
    color=alt.Color("country:N", scale=alt.Scale(domain=COUNTRY, range=COUNTRY_COLOR), title='Country'),
).properties(
    width=200,
    height=200
)

# text = plot.mark_text(
#     size=16, dx=0, dy=-5, color='white', baseline='bottom', fontWeight=500
# ).encode(
# #     x=alt.X('month:N'),
# #     y=alt.Y('value:Q', stack='zero'),
#     x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
#     y=alt.Y("percentage:Q", title=None, scale=alt.Scale(format=".1%")),
# #     detail='cat:N',
#     text=alt.Text('n_all:Q'),#, format='.0%'),
# #     order="order:O",
# #     opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]))
# )

plot = (plot + error).facet(
    column=alt.Column("country:N", header=alt.Header(title=None), sort=COUNTRY)
).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Censor Rate Across Hospitalization Days"
        ],
        "dx": 35,
    }
)


# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    header_label_orient='top',
    point_size=50,
    axis_tick_color='black'
)

plot

# d