In [20]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
)
from theme import apply_theme
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
data_release='2021-04-27'

df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "case_numbers.csv"))

df = df.rename(columns={
    "n.all": "n_all", 
    "n.severe": "n_severe"
})

df = df.drop(columns=['Unnamed: 0', 'n_severe'])

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

df.wave = df.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])

df

Unnamed: 0,wave,country,n_all
0,First,FRANCE,10158
1,Second,FRANCE,5023
2,First,GERMANY,267
3,Second,GERMANY,931
4,First,ITALY,3352
5,Second,ITALY,2416
6,First,SPAIN,2369
7,Second,SPAIN,3730
8,First,USA,16150
9,Second,USA,34565


In [22]:
d = df.copy()

plot = alt.Chart(
    d
).transform_filter(
    alt.datum.n_all > 0  
).mark_bar(
    size=35,
#     stroke='black',
#     strokeWidth=1
).encode(
    x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
    y=alt.Y("n_all:Q", title=None, scale=alt.Scale(type='log')),
    color=alt.Color("wave:N", scale=alt.Scale(range=['#D45E00', '#0072B2']), title='Wave'),
).properties(
    width=100,
    height=250
)

text = plot.mark_text(
    size=16, dx=0, dy=-5, color='white', baseline='bottom', fontWeight=500
).encode(
#     x=alt.X('month:N'),
#     y=alt.Y('value:Q', stack='zero'),
    x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
    y=alt.Y("n_all:Q", title=None),
#     detail='cat:N',
    text=alt.Text('n_all:Q', format=','),#, format='.0%'),
#     order="order:O",
#     opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]))
)

plot = (plot + text).facet(
    column=alt.Column("country:N", header=alt.Header(title=None), sort=COUNTRY)
).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Country-Level Hospitalizations By Wave"
        ],
        "dx": 45,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)


# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='bottom',
    legend_title_orient='left',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=100
)

plot

In [23]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "percCountCountry_AndAll_Weeks.csv"), sep='\t')

df = df.rename(columns={
    "Country": "country", 
    "weeks": "week"
})

# df = df.drop(columns=['Unnamed: 0', 'n_severe'])

df.country = df.country.apply(lambda x: x.upper())

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

# df.wave = df.wave.apply(lambda x: { 'early': 'First', 'late': 'Second' }[x])

df

Unnamed: 0,week,country,percentage
0,2020-03-16,FRANCE,0.098412
1,2020-05-11,FRANCE,0.004150
2,2020-05-18,FRANCE,0.003821
3,2020-05-04,FRANCE,0.006917
4,2020-08-24,FRANCE,0.007180
...,...,...,...
499,2021-03-22,ALL,0.000000
500,2021-05-03,ALL,0.000000
501,2021-04-26,ALL,0.000000
502,2021-05-24,ALL,0.000000


In [24]:
d = df.copy()

d = d.sort_values('week')

# Moving average using three time points (previous, current, next)
d['percentage'] = d.groupby('country').percentage.apply(lambda x : x.shift().rolling(3, min_periods=1).mean().fillna(x))
d['percentage'] = d.groupby('country').percentage.apply(lambda x : x.shift(-2))

d = d[d.week <= '2021-02-28']

plot = alt.Chart(
    d
).mark_line(
    size=2.5,
    point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=0.7
).encode(
    x=alt.X("week:T", title=None, axis=alt.Axis(format=("%B, %Y"), tickCount=7.9, labelAngle=0)), # https://github.com/d3/d3-time-format#locale_format
    y=alt.Y("percentage:Q", title=None, axis=alt.Axis(format=".0%")),
    color=alt.Color("country:N", scale=alt.Scale(domain=COUNTRY, range=COUNTRY_COLOR), title='Country'),
).properties(
    width=850,
    height=350
)

# text = plot.mark_text(
#     size=16, dx=0, dy=-5, color='white', baseline='bottom', fontWeight=500
# ).encode(
# #     x=alt.X('month:N'),
# #     y=alt.Y('value:Q', stack='zero'),
#     x=alt.X("wave:N", title=None, axis=alt.Axis(labels=False)),
#     y=alt.Y("percentage:Q", title=None, scale=alt.Scale(format=".1%")),
# #     detail='cat:N',
#     text=alt.Text('n_all:Q'),#, format='.0%'),
# #     order="order:O",
# #     opacity=alt.Opacity('visibility:N', scale=alt.Scale(domain=[True, False], range=[1, 0]))
# )

# plot = (plot).facet(
#     row=alt.Row("country:N", header=alt.Header(title=None), sort=COUNTRY)
# ).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Intensity Rate Of Hospitalization Over Time"
        ],
        "dx": 35,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)


# plot = alt.vconcat(*(
#     plot_lab(df=df, lab=lab) for lab in unique_sites
# ), spacing=30)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    point_size=30,
    axis_tick_color='black'
)

plot

# d

In [25]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "severity_shift_random_effects.csv"))

df = df.rename(columns={
    "ci.lwr": "ci_l", 
    "ci.upr": "ci_u",
    "n.all": "n",
    "weeks": "week"
})

df = df[df.effect_size == 'risk_ratio']

df['e_l'] = df.pp - df.se
df['e_u'] = df.pp + df.se

df['sig'] = df['sig'].apply(lambda x: 'p<0.05' if x == True else 'False')

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

df

Unnamed: 0,country,day,wave,cohort,effect_size,pp,se,ci_l,ci_u,x,n,sig,e_l,e_u
4,ALL,0,early_to_late,dayX,risk_ratio,0.903636,0.008093,0.889416,0.918084,,,p<0.05,0.895543,0.911729
9,ALL,1,early_to_late,dayX,risk_ratio,0.911748,0.008018,0.897531,0.926190,,,p<0.05,0.903730,0.919766
14,ALL,2,early_to_late,dayX,risk_ratio,0.921576,0.007954,0.907320,0.936055,,,p<0.05,0.913622,0.929530
19,ALL,3,early_to_late,dayX,risk_ratio,0.927719,0.007961,0.913356,0.942309,,,p<0.05,0.919758,0.935680
24,ALL,4,early_to_late,dayX,risk_ratio,0.902439,0.032184,0.847270,0.961200,,,p<0.05,0.870255,0.934623
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
504,BRAZIL,10,early_to_late,dayX,risk_ratio,0.441176,0.473825,0.174295,1.116709,,,False,-0.032648,0.915001
509,BRAZIL,11,early_to_late,dayX,risk_ratio,0.276316,0.622772,0.081525,0.936526,,,p<0.05,-0.346456,0.899088
514,BRAZIL,12,early_to_late,dayX,risk_ratio,,,,,,,False,,
519,BRAZIL,13,early_to_late,dayX,risk_ratio,1.000000,0.500000,0.375311,2.664456,,,False,0.500000,1.500000


In [26]:
d = df.copy()

d = d[d.day == 0]

plot = alt.Chart(
    d
).mark_point(
    size=180,
    filled=True,
    shape='diamond',
#     point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=1
).encode(
    y=alt.Y("country:O", title=None, axis=alt.Axis(labelAngle=0, tickCount=20, labels=True)),
    x=alt.X("pp:Q", title=None, scale=alt.Scale(zero=False, clamp=True), axis=alt.Axis(labelAngle=0, tickCount=10)),
    color=alt.Color("country:N", scale=alt.Scale(domain=COUNTRY, range=COUNTRY_COLOR), legend=None, title='Country'),
#     stroke=alt.Stroke("sig", scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance')
).properties(
    width=750,
    height=350
)

guide = alt.Chart(
    pd.DataFrame({'baseline': [1, 1, 1, 1, 1, 1, 1], 'country': COUNTRY})
).mark_rule(color='gray', strokeDash=[3,3], opacity=0.5).encode(
    x=alt.X('baseline:Q')
)
    
tick = plot.mark_errorbar(
    opacity=0.7, color='black'
).encode(
    y=alt.Y("country:O", title='Days Since Admission'),
    x=alt.X("ci_l:Q"),
    x2=alt.X2("ci_u:Q"),
    stroke=alt.value('black'),
    strokeWidth=alt.value(2)
)

plot = (tick + plot + guide)
# .facet(
#     row=alt.Row("country:N", header=alt.Header(title=None, labelAngle=0, labelAnchor='start', labelPadding=30), sort=COUNTRY)
# ).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Relative Risk Of Severe Disease In Second Compared To First Wave",
#             f"Early To Late Relative Risk Of Severe COVID-19"
        ],
        "dx": 60,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='right',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16
)

plot

In [27]:
df = pd.read_csv(join("..", "data", "1.1.resurgence", "case rate and severity risk", "severity_shift_random_effects.csv"))

df = df.rename(columns={
    "ci.lwr": "ci_l", 
    "ci.upr": "ci_u",
    "n.all": "n",
    "weeks": "week"
})

df = df[df.effect_size == 'risk']
df = df[df.cohort == 'dayX']

df['e_l'] = df.pp - df.se
df['e_u'] = df.pp + df.se

df.wave = df.wave.apply(lambda x: x.capitalize())

df['sig'] = df['sig'].apply(lambda x: 'p<0.05' if x == True else 'False')

COUNTRY = ['ALL', 'BRAZIL', 'FRANCE', 'GERMANY', 'ITALY', 'SPAIN', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

df

Unnamed: 0,country,day,wave,cohort,effect_size,pp,se,ci_l,ci_u,x,n,sig,e_l,e_u
1,ALL,0,Early,dayX,risk,0.404033,0.537430,0.335662,0.476345,15835.0,32452.0,False,-0.133396,0.941463
3,ALL,0,Late,dayX,risk,0.349138,0.551375,0.263658,0.445561,19870.0,47035.0,False,-0.202238,0.900513
6,ALL,1,Early,dayX,risk,0.406724,0.547484,0.320620,0.498969,15128.0,30780.0,False,-0.140760,0.954208
8,ALL,1,Late,dayX,risk,0.346086,0.559766,0.248412,0.458726,19253.0,45175.0,False,-0.213680,0.905852
11,ALL,2,Early,dayX,risk,0.432304,0.543421,0.351208,0.517198,14423.0,28722.0,False,-0.111117,0.975724
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
513,BRAZIL,12,Late,dayX,risk,0.416667,0.142319,0.137722,0.695611,5.0,12.0,p<0.05,0.274348,0.558985
516,BRAZIL,13,Early,dayX,risk,0.500000,0.204124,0.099917,0.900083,3.0,6.0,False,0.295876,0.704124
518,BRAZIL,13,Late,dayX,risk,0.500000,0.144338,0.217098,0.782902,6.0,12.0,False,0.355662,0.644338
521,BRAZIL,14,Early,dayX,risk,0.250000,0.153093,-0.050062,0.550062,2.0,8.0,False,0.096907,0.403093


In [28]:
d = df.copy()

d = d[d.day == 0]

plot = alt.Chart(
    d
).mark_bar(
    size=35,
#     filled=True,
#     shape='diamond',
#     point=alt.OverlayMarkDef(filled=True, strokeWidth=4, opacity=0.7),
    opacity=1
).encode(
    x=alt.X("wave:N", title=None, axis=alt.Axis(labelAngle=0, tickCount=20, labels=False)),
    y=alt.Y("pp:Q", title='Absolute Risk', scale=alt.Scale(zero=False, clamp=True), axis=alt.Axis(labelAngle=0, tickCount=10, format='%')),
    color=alt.Color("wave:N", scale=alt.Scale(range=['#D45E00', '#0072B2']), title=None),
#     stroke=alt.Stroke("sig", scale=alt.Scale(domain=['p<0.05'], range=['black']), title='Significance')
).properties(
    width=100,
    height=250
)

guide = alt.Chart(
    pd.DataFrame({'baseline': [1, 1, 1, 1, 1, 1, 1], 'country': COUNTRY})
).mark_rule(color='gray', strokeDash=[3,3], opacity=0.5).encode(
    x=alt.X('baseline:Q')
)
    
tick = plot.mark_errorbar(
    opacity=0.7, color='black'
).encode(
    x=alt.X("wave:O", title='Days Since Admission'),
    y=alt.Y("ci_l:Q", title='Absolute Risk'),
    y2=alt.Y2("ci_u:Q"),
    stroke=alt.value('black'),
    strokeWidth=alt.value(2)
)

plot = (plot + tick).facet(
    # Bug aligning labels. https://github.com/altair-viz/altair/issues/1878
    column=alt.Column("country:N", header=alt.Header(title=None), sort=COUNTRY),
    # labelAngle=0, labelBaseline='middle', labelAnchor='middle', labelAlign='left', labelPadding=0, labelLineHeight=0
    spacing=10
).resolve_scale(color='shared')

plot = plot.properties(
    title={
        "text": [
            f"Absolute Risk For Severe COVID-19 In The First and Second Waves",
        ],
        "dx": 60,
#         "subtitle": [
#             get_visualization_subtitle(data_release=data_release, with_num_sites=False)
#         ],
        "subtitleColor": "gray",
    }
)

plot = apply_theme(
    plot,
    axis_y_title_font_size=16,
    title_anchor='start',
    legend_orient='top-left',
    legend_title_orient='top',
    axis_label_font_size=14,
    header_label_font_size=16,
    header_label_orient='bottom'
)

plot