In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join
import re

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_daily_counts_df,
    get_visualization_subtitle,
    get_country_color_map,
    apply_theme,
    merge_single_site_country_pediatric_name,
)
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

## US pediatric hospitalization data

Download CDC weekly hospitalizations by age group from
https://gis.cdc.gov/grasp/COVIDNet/COVID19_5.html

Click "Download data", unzip, then move "Weekly_Data_Counts.csv" to `data/Weekly_Data_Counts.csv`

## France pediatric hospitalization data

Download `4CE_FRANCE_pedia.xlsx` from `#pediatrics` Slack channel

## UK pediatric hospitalization data

https://www.rcpch.ac.uk/resources/covid-19-service-evaluation-audit-care-needs-children-admitted-hospital-england#downloadBox

Download " Number of children admitted to hospital with positive test for COVID-19 " CSV, rename and move to `data/4CE_UK_pedia.csv`


In [None]:
DATA_RELEASE = "2020-10-26"
COHORT = "Pediatric"
MERGE_SINGLE_SITE_COUNTRIES = False
COUNTRY_SORT = ["France", "Germany", "Singapore", "Spain", "UK", "USA"]

In [None]:
min_date = datetime.datetime(2020, 3, 3) + datetime.timedelta(hours=1)
max_date = datetime.datetime(2020, 10, 9) + datetime.timedelta(hours=1)

In [None]:
CATEGORY = "category"
CATEGORY_OF_INTEREST = "new_positive_cases"

In [None]:
COUNTRY_POPULATION = {
    # From https://data.worldbank.org/indicator/SP.POP.TOTL
    "France": 67059887,
    "USA": 328239523,
    "Germany": 83132799,
    "Italy": 60297396,
    "Singapore": 5703569,
    "Spain": 47076781,
    "UK": 66834405,
}
COUNTRY_HOSP_DISCHARGE = {
    # From https://data.oecd.org/healthcare/hospital-discharge-rates.htm
    "France": 18553.0,
    "USA": 10906.2, # https://hcup-us.ahrq.gov/faststats/NationalTrendsServlet
    "Germany": 25478.4,
    "Italy": 11414.6,
    "Singapore": 12700.4, # https://www.moh.gov.sg/resources-statistics/healthcare-institution-statistics/hospital-admission-rates-by-age-and-sex/hospital-admission-rates-by-age-and-sex-2017
    "Spain": 10470.5,
    "UK": 12869.4,
}
COUNTRY_POPULATION_PED = {
    "France": 15252608, # https://www.insee.fr/en/statistiques/2382597?sommaire=2382613#consulter-sommaire
    "USA": 73197414, # https://www.census.gov/quickfacts/fact/table/US/PST045219
    "Germany": 15302675, # https://www.destatis.de/EN/Themes/Society-Environment/Population/Current-Population/Tables/lrbev01.html
    "Italy": 10295800, # https://www.unicef.org/infobycountry/italy_statistics.html#123
    "Singapore": 803400, # https://www.singstat.gov.sg/find-data/search-by-theme/population/population-and-population-structure/latest-data
    "Spain": 8383100,
    "UK": 13304200,
}
COUNTRY_HOSP_DISCHARGE = {
    "France": 18553.0,
    "USA": 8222, # https://hcup-us.ahrq.gov/faststats/NationalTrendsServlet
    "Germany": 25478.4,
    "Italy": 11414.6,
    "Singapore": 12700.4, # https://www.moh.gov.sg/resources-statistics/healthcare-institution-statistics/hospital-admission-rates-by-age-and-sex/hospital-admission-rates-by-age-and-sex-2017
    "Spain": 10470.5,
    "UK": 12869.4,
}

In [None]:
df = read_full_daily_counts_df()

## Restrict to pediatric sites

In [None]:
df.head()

In [None]:
df = df.loc[df["pediatric"] == True]
df = df.drop(columns=["pediatric"])
df.head()

In [None]:
df = df.sort_values(by="calendar_date", ascending=True)

In [None]:
df.loc[df["siteid"] == "KUMCPED"]

In [None]:
df.loc[df["siteid"] == "NWUPED"].head(50)

In [None]:
# Drop the dataset containing maternity info.
df = df.loc[~df["siteid"].isin(["APHPPED"])]

In [None]:
df["num_sites"] = 1

In [None]:
# Read the sites participating table to obtain the per-site obfuscation values
sites_df = pd.read_csv(join("..", "data", "Health_Systems_Participating.tsv"), sep='\t', skiprows=2, header=None, thousands=',')
sites_column_map = {
    0: "site_name",
    1: "siteid",
    2: "city",
    3: "country",
    4: "patient_type",
    6: "adult_num_hosp",
    7: "adult_num_beds",
    8: "adult_num_yearly_discharge",
    10: "ped_num_hosp",
    11: "ped_num_beds",
    12: "ped_num_yearly_discharge",
    18: "ped_obfusc_mask_threshold",
}
sites_df = sites_df.rename(columns=sites_column_map)
sites_df = sites_df[list(sites_column_map.values())]
sites_df["pediatric"] = sites_df["patient_type"].apply(lambda t: (t == "Pediatric" or t == "Adult & Pediatric"))
sites_df = sites_df.loc[sites_df["pediatric"]]
sites_df["siteid"] = sites_df["siteid"].apply(lambda x: "APHPPEDHOSP" if x == "APHP" else f"{x}PED")
sites_df = sites_df.dropna(subset=["site_name"])
sites_df = sites_df.set_index("siteid")
sites_df["ped_obfusc_mask_threshold"] = sites_df["ped_obfusc_mask_threshold"].apply(lambda x: x if not pd.notna(x) else x[:5])
sites_df["ped_obfusc_mask_threshold"] = sites_df["ped_obfusc_mask_threshold"].apply(lambda x: int(0 if not pd.notna(x) or len(re.sub('\D', '', x)) == 0 else re.sub('\D', '', x)))
sites_df["ped_obfusc_mask_threshold"] = sites_df["ped_obfusc_mask_threshold"].apply(lambda x: x - 1 if x > 0 else x)
sites_df.head()

In [None]:
df["num_hosps"] = df["siteid"].apply(lambda sid: sites_df.at[sid, "ped_num_hosp"] if pd.notna(sites_df.at[sid, "ped_num_hosp"]) else 1)

In [None]:
# Replace missing values with 0.5*site's obfuscation value
df["cumulative_patients_all"] = df.apply(lambda row: 0.5*sites_df.at[row["siteid"], "ped_obfusc_mask_threshold"] if row['cumulative_patients_all'] <= -99 else row['cumulative_patients_all'], axis='columns')
df["num_patients_in_hospital_on_this_date"] = df.apply(lambda row: 0.5*sites_df.at[row["siteid"], "ped_obfusc_mask_threshold"] if row['num_patients_in_hospital_on_this_date'] <= -99 else row['num_patients_in_hospital_on_this_date'], axis='columns')

In [None]:
df.loc[(df["country"] == "USA") & (df["calendar_date"] == "2020-05-18")]

In [None]:
df.loc[(df["country"] == "USA") & (df["calendar_date"] == "2020-05-19")]

## If site is missing data for a particular date, use the most recent previous data point for that date

In [None]:
max_date = df["calendar_date"].max()
max_date_str = str(max_date).split(" ")[0]

all_date_country_df = pd.DataFrame()
for siteid, cd_df in df.groupby(["siteid"]):
    min_date = cd_df["calendar_date"].min()
    min_date_str = str(min_date).split(" ")[0]
    
    num_days = (dateutil.parser.parse(max_date_str) - dateutil.parser.parse(min_date_str)).days
    
    cd_df = cd_df.copy()
    cd_df["calendar_date"] = cd_df["calendar_date"].astype(str)
    
    prev_date_row = None
    for day_offset in range(num_days):
        curr_date = dateutil.parser.parse(min_date_str) + datetime.timedelta(days=day_offset)
        curr_date_str = str(curr_date).split(" ")[0]
        
        try:
            curr_date_row = cd_df.loc[cd_df["calendar_date"] == curr_date_str].to_dict('records')[0]
            prev_date_row = curr_date_row
        except:
            prev_date_row['calendar_date'] = curr_date_str
            prev_date_row['num_sites'] = 0
            prev_date_row['num_hosps'] = 0
            cd_df = cd_df.append(prev_date_row, ignore_index=True)
    
    all_date_country_df = all_date_country_df.append(cd_df, ignore_index=True)
def convert_date(date_str):
    try:
        return dateutil.parser.parse(date_str)
    except:
        return np.nan
all_date_country_df["calendar_date"] = all_date_country_df["calendar_date"].apply(convert_date)
df = all_date_country_df

In [None]:
# Remove last day which has a big drop off for the US cumulative counts
df = df.loc[df["calendar_date"] <= (max_date - datetime.timedelta(days=1))]

In [None]:
df["num_patients_in_hospital_minus_severe_on_this_date"] = df["num_patients_in_hospital_on_this_date"] - df["num_patients_in_hospital_and_severe_on_this_date"]

In [None]:
df = df.loc[df["siteid"] != "NWUPED"]

In [None]:
country_color_map = get_country_color_map(merge_single_site_countries=MERGE_SINGLE_SITE_COUNTRIES, pediatric=True)

In [None]:
if MERGE_SINGLE_SITE_COUNTRIES:
    df["country"] = df["country"].apply(merge_single_site_country_pediatric_name)

country_sum_df = df.groupby(["country", "calendar_date"]).sum().reset_index()
country_sum_df.head()

In [None]:
df.to_csv("test_this.csv")

In [None]:
country_sum_temp_df = pd.DataFrame(index=[], data=[], columns=[])
for country, country_df in country_sum_df.groupby("country"):
    country_df = country_df.copy()
    country_df["cum_diff_all"] = np.concatenate((np.array([np.nan]), np.diff(country_df["cumulative_patients_all"].values)))
    country_df["cum_diff_severe"] = np.concatenate((np.array([np.nan]), np.diff(country_df["cumulative_patients_severe"].values)))
    country_df["cum_diff_dead"] = np.concatenate((np.array([np.nan]), np.diff(country_df["cumulative_patients_dead"].values)))
    country_df["cum_diff_all_minus_severe"] = country_df["cum_diff_all"] - country_df["cum_diff_severe"]
    
    country_df["cum_diff_all"] = country_df["cum_diff_all"].clip(lower=0)
    country_df["cum_diff_severe"] = country_df["cum_diff_severe"].clip(lower=0)
    country_df["cum_diff_dead"] = country_df["cum_diff_dead"].clip(lower=0)
    country_df["cum_diff_all_minus_severe"] = country_df["cum_diff_all_minus_severe"].clip(lower=0)

    country_sum_temp_df = country_sum_temp_df.append(country_df, ignore_index=True)
country_sum_df = country_sum_temp_df
country_sum_df.tail()

In [None]:
country_sum_molten_df = country_sum_df.melt(id_vars=["country", "calendar_date", "num_sites", "num_hosps"])
country_sum_molten_df.head()

# Daily counts by country, with `num_patients_in_hospital_minus_severe_on_this_date` and `num_patients_in_hospital_and_severe_on_this_date` as color, country as column facet

In [None]:
COUNTRIES = country_sum_molten_df["country"].unique().tolist()

column_width = 200

num_in_hospital_by_country_molten_df = country_sum_molten_df.loc[country_sum_molten_df["variable"].isin([
    "num_patients_in_hospital_on_this_date",
    "num_patients_in_hospital_minus_severe_on_this_date",
    "num_patients_in_hospital_and_severe_on_this_date"
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "num_patients_in_hospital_on_this_date": "All",
    "num_patients_in_hospital_minus_severe_on_this_date": "All minus Severe",
    "num_patients_in_hospital_and_severe_on_this_date": "Severe",
})

num_in_hospital_by_country_molten_df = num_in_hospital_by_country_molten_df.loc[num_in_hospital_by_country_molten_df["variable"] == "All"]

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All"], range=["black"])



filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
    alt.Tooltip("num_hosps", title="Number of hospitals"),
]

max_value = num_in_hospital_by_country_molten_df["value"].max()

top_plot = filtered_plot.mark_line(color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="# of patients in hospital"), scale=alt.Scale(domain=[0.0, max_value])),
    tooltip=tooltip
).properties(width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None)
    )
    , spacing=40, bounds="flush"
)

num_hosps_max = num_in_hospital_by_country_molten_df["num_hosps"].max()
num_hosps_max += 2

bottom_plot = filtered_plot.mark_bar(size=1.5, color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_hosps:Q", axis=alt.Axis(title="# of hospitals"), scale=alt.Scale(domain=[0, num_hosps_max])),
    #color=alt.Color("country:N", legend=alt.Legend(title="Country"), scale=country_color_scale),
    tooltip=tooltip
).properties(height=80, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None, labels=False),
    )
    , spacing=40, bounds="flush"
)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    #"text": ["Daily Counts by Country"], 
    "text": [""],
    "dx": 50,
    #"subtitle": get_visualization_subtitle(data_release=DATA_RELEASE, num_sites=NUM_SITES, cohort=COHORT),
    "subtitle": "",
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

for_website(plot, f"Daily Counts {COHORT}", "Num hospitalized patients by country", df=num_in_hospital_by_country_molten_df)

plot

In [None]:
COUNTRIES = country_sum_molten_df["country"].unique().tolist()

column_width = 200

num_in_hospital_by_country_molten_df = country_sum_molten_df.loc[country_sum_molten_df["variable"].isin([
    "cum_diff_all",
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "cum_diff_all": "All",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All"], range=["#A9A9A9", "#000000"])


filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
    alt.Tooltip("num_hosps", title="Number of hospitals"),
]

top_plot = filtered_plot.mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of new hospitalized patients")),
    color=alt.Color("variable:N", legend=None, scale=severity_color_scale),
    tooltip=tooltip
).properties(width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRIES,
        header=alt.Header(title=None)
    )
    , spacing=40, bounds="flush"
)

bottom_plot = filtered_plot.mark_bar(size=1.5).encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_hosps:Q", axis=alt.Axis(title="# of hospitals")),
    #color=alt.Color("country:N", legend=alt.Legend(title="Country"), scale=country_color_scale),
    tooltip=tooltip
).properties(height=80, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRIES,
        header=alt.Header(title=None, labels=False),
    )
    , spacing=40, bounds="flush"
)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Daily Counts by Country"], 
    "dx": 50,
    #"subtitle": get_visualization_subtitle(data_release=DATA_RELEASE, num_sites=NUM_SITES, cohort=COHORT),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

for_website(plot, f"Daily Counts {COHORT}", "New hospitalized patients by country", df=num_in_hospital_by_country_molten_df)

plot

## Load the JHU CSSE data

In [None]:
jhu_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/e8d823ef0828d4b659a29958403227632e71d158/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
jhu_df = pd.read_csv(jhu_url)

jhu_df = jhu_df.rename(columns={"Country/Region": "country", "Province/State": "state"})
jhu_df = jhu_df.drop(columns=["Lat", "Long"])

# Countries have different ids in the JHU data than in the 4CE data
country_map = {
    "US": "USA",
    "United Kingdom": "UK"
}

jhu_df["country"] = jhu_df["country"].apply(lambda c: country_map[c] if c in country_map else c)
jhu_df = jhu_df.loc[~pd.notna(jhu_df["state"])]
jhu_df = jhu_df.drop(columns=["state"])
if MERGE_SINGLE_SITE_COUNTRIES:
    jhu_df["country"] = jhu_df["country"].apply(merge_single_site_country_adult_name)
jhu_df = jhu_df.loc[jhu_df["country"].isin(COUNTRIES)]

jhu_df = jhu_df.melt(id_vars=["country"], var_name="date", value_name="cumulative_count")
jhu_df["date"] = jhu_df["date"].astype(str)
jhu_df = jhu_df.groupby(["country", "date"]).sum().reset_index()

jhu_df["date"] = jhu_df["date"].apply(convert_date)
jhu_df = jhu_df.sort_values(by="date", ascending=True)
jhu_df = jhu_df.loc[(jhu_df["date"] >= min_date) & (jhu_df["date"] <= max_date)]
jhu_df["date_str"] = jhu_df["date"].astype(str)

jhu_df_freeze = jhu_df.copy()

jhu_roc_df = pd.DataFrame(index=[], data=[], columns=["country", "date", "cumulative_count"])
for country, country_df in jhu_df.groupby("country"):
    country_df = country_df.copy()
    country_df["count"] = np.concatenate((np.array([np.nan]), np.diff(country_df["cumulative_count"].values)))
    country_df["cumulative_count"] = country_df["cumulative_count"].replace(0, np.nan)
    
    country_df["N0"] = country_df["cumulative_count"].shift(1) # N0 is the total case up to the day before
    country_df["n1"] = country_df["count"] # n1 is the case number this day
    country_df["n2"] = country_df["n1"].shift(1) # n2 is the case number yesterday
    
    country_df["percent_increase"] = (country_df["n1"] / country_df["N0"]) * 100
    
    country_df['R'] = country_df["percent_increase"] # TODO: is this correct?
    # TODO: update CI formula
    country_df['C'] = country_df['R'] - 1
    country_df['standard_error'] = country_df.apply(lambda obs: (obs['R']+np.power(obs['R'], 2))/obs['n2'], axis='columns')
    country_df['95_CI_below'] = country_df.apply(lambda obs: obs['C'] - 1.96*np.sqrt(obs['standard_error']), axis='columns')
    country_df['95_CI_above'] = country_df.apply(lambda obs: obs['C'] + 1.96*np.sqrt(obs['standard_error']), axis='columns')
    country_df = country_df.replace([np.inf, -np.inf], np.nan)


    jhu_roc_df = jhu_roc_df.append(country_df, ignore_index=True)
jhu_roc_df

In [None]:
def get_jhu_cumulative_count(date_str, country):
    try:
        return jhu_roc_df.loc[(jhu_roc_df["date_str"] == date_str) & (jhu_roc_df["country"] == country)].reset_index().iloc[0]["cumulative_count"]
    except:
        return 0
# Start plotting after country has 100 cases
count_threshold = 100
jhu_roc_df["jhu_past_100"] = jhu_roc_df["cumulative_count"] >= count_threshold

## Compute 7-day rolling mean of number of new hospitalized 4CE patients per country to enable comparison with JHU CSSE data

In [None]:
country_sum_df.head()

In [None]:
country_sum_ravg_df = pd.DataFrame(index=[], data=[], columns=[])
for country, country_df in country_sum_df.groupby("country"):
    country_df = country_df.copy()
    country_df["cum_diff_all"] = country_df["cum_diff_all"].rolling(14, center=True).mean()

    country_sum_ravg_df = country_sum_ravg_df.append(country_df, ignore_index=True)
country_sum_ravg_df.head()

In [None]:
country_sum_ravg_molten_df = country_sum_ravg_df.melt(id_vars=["country", "calendar_date", "num_sites", "num_hosps"])
country_sum_ravg_molten_df.head()

In [None]:
jhu_ravg_df = pd.DataFrame(index=[], data=[], columns=[])
for country, country_df in jhu_roc_df.groupby("country"):
    country_df = country_df.copy()
    country_df["cum_diff_all"] = country_df["count"].rolling(7).mean().shift(-3)

    jhu_ravg_df = jhu_ravg_df.append(country_df, ignore_index=True)
jhu_ravg_df.head()

In [None]:
num_jhu_cases_by_country_molten_df = jhu_ravg_df

In [None]:
num_jhu_cases_by_country_molten_df.head()

In [None]:
COUNTRIES = country_sum_ravg_molten_df["country"].unique().tolist()

column_width = 200

num_in_hospital_by_country_molten_df = country_sum_ravg_molten_df.loc[country_sum_ravg_molten_df["variable"].isin([
    "cum_diff_all",
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "cum_diff_all": "All",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All"], range=["#A9A9A9", "#000000"])


filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
    alt.Tooltip("num_hosps", title="Number of hospitals"),
]

top_plot = filtered_plot.mark_line(color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="# of new 4CE patients, 7-day mean")),
    tooltip=tooltip
).properties(width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None)
    )
    , spacing=30, bounds="flush"
)

middle_plot = alt.Chart(num_jhu_cases_by_country_molten_df).mark_line().encode(
    x=alt.X("date", axis=alt.Axis(title=None)),
    y=alt.Y("cum_diff_all:Q", axis=alt.Axis(title="# of new cases, 7-day mean")),
).properties(height=100, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None, labels=False)
    )
    , spacing=30, bounds="flush"
)

bottom_plot = filtered_plot.mark_bar(size=1.5, color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_hosps:Q", axis=alt.Axis(title="# of hospitals")),
    tooltip=tooltip
).properties(height=80, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None, labels=False),
    )
    , spacing=30, bounds="flush"
)

plot = alt.vconcat(top_plot, middle_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    #"text": ["Daily Counts by Country"], 
    "text": [""], 
    "dx": 50,
    #"subtitle": get_visualization_subtitle(data_release=DATA_RELEASE, num_sites=NUM_SITES, cohort=COHORT),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

for_website(plot, f"Daily Counts {COHORT}", "New hospitalized patients by country", df=num_in_hospital_by_country_molten_df)

plot

In [None]:
def convert_date_us(date_str):
    try:
        return dateutil.parser.parse(date_str)
    except:
        return np.nan

In [None]:
# AAP data
"""
us_df = pd.read_csv(join("..", "data", "aap_hospitalization_counts.csv"))
us_df = us_df.rename(columns={"Date": "date"})
us_df["date"] = us_df["date"].apply(convert_date_us)
us_df["cum_ped_count"] = us_df["Cumulative_Child"]
us_df["ped_count"] = us_df["child_weekly_count"]
us_df["country"] = "USA"
us_df = compute_increase_per_day(us_df, first_delta=7)
us_df.head()
"""

In [None]:
def compute_increase_per_day(df, first_delta=None):
    df["days_since_newyear"] = (df["date"] - datetime.datetime(year=2020, month=1, day=1)).apply(lambda dt: dt.days)
    df["date_delta"] = np.concatenate((np.array([0 if first_delta is None else first_delta]), np.diff(df["days_since_newyear"].values)))
    df["ped_count"] /= df["date_delta"]
    df["ped_count"] = df["ped_count"].replace(np.inf, np.nan)
    return df

In [None]:
hhs_df = pd.read_csv(join("..", "data", "reported_hospital_capacity_admissions_facility-level_weekly_average_timeseries_20201207.csv"))
hhs_col = "previous_day_admission_pediatric_covid_confirmed_7_day_sum"
hhs_df = hhs_df[["hospital_pk", "collection_week", hhs_col]]
hhs_df = hhs_df.rename(columns={"collection_week": "date", hhs_col: "ped_count"})
hhs_df["ped_count"] = hhs_df["ped_count"].clip(lower=0)
hhs_df = hhs_df.groupby("date").sum().reset_index()
hhs_df["date"] = hhs_df["date"].apply(convert_date_us)
hhs_df["country"] = "USA"
us_df = hhs_df
us_df = compute_increase_per_day(us_df, first_delta=7)
us_df.head()

In [None]:
fr_df = pd.read_excel(join("..", "data", "4CE_FRANCE_pedia.xlsx"))
fr_df["ped_count"] = fr_df[fr_df.columns.values.tolist()[1]] + fr_df[fr_df.columns.values.tolist()[2]]
fr_df = fr_df.rename(columns={"Date ": "date"})
fr_df["country"] = "France"
fr_df = fr_df.drop(columns=fr_df.columns.values.tolist()[1:3])
fr_df = compute_increase_per_day(fr_df)
fr_df.tail()

In [None]:
ge_df = pd.read_csv(join("..", "data", "Germany_Weekly_Pediatric_Inpatients.csv"), sep=";")
ge_df = ge_df[ge_df.columns.values.tolist()[:4]]

def week_num_to_date(week_num):
    return datetime.datetime(year=2019, month=12, day=28) + datetime.timedelta(days=week_num*7)
ge_df["date"] = ge_df["Week in 2020"].apply(week_num_to_date)
ge_df["ped_count"] = ge_df["Total Number of recorded inpatient pediatric cases"]
ge_df["country"] = "Germany"
ge_df = compute_increase_per_day(ge_df)
ge_df.head(7)

In [None]:
def convert_date_sp(date_str):
    try:
        return dateutil.parser.parse(date_str)
    except:
        return np.nan

In [None]:
sp_df = pd.read_csv(join("..", "data", "Spain_Pediatric_Hospitalizations.csv"))
sp_df["date"] = sp_df["Date"].apply(convert_date_sp)
sp_df["ped_count_cum"] = sp_df["Age 0-2"] + sp_df["Age 2-4"] + sp_df["Age 5-14"]
sp_df["country"] = "Spain"
sp_df = sp_df.sort_values(by="date", ascending=True)
sp_df = sp_df.set_index("Date")
sp_df["ped_count"] = np.concatenate((np.array([0.0]), np.diff(sp_df["ped_count_cum"].values)))
sp_df.at["7/15/20", "ped_count"] = np.nan
sp_df = compute_increase_per_day(sp_df)
sp_df.head()

In [None]:
def convert_date_uk(date_str):
    try:
        return dateutil.parser.parse(date_str + '-2020')
    except:
        return np.nan

In [None]:
uk_df = pd.read_csv(join("..", "data", "4CE_UK_pedia.csv"))

uk_df['date'] = uk_df["Date of test"].apply(convert_date_uk)
uk_df["ped_count"] = uk_df["Number of positive COVID tests per day"]
uk_df["country"] = "UK"
uk_df = uk_df.drop(columns=uk_df.columns.values.tolist()[0:3])
uk_df = compute_increase_per_day(uk_df)
uk_df.head()

In [None]:
other_df = pd.DataFrame(data = [
    {
        "date": datetime.datetime(year=2020, month=3, day=11),
        "country": country,
        "ped_count": 0.0
    } for country in ["Singapore"]
])
country_hosp_molten_df = pd.concat([fr_df, uk_df, ge_df, sp_df, us_df, other_df])
country_hosp_molten_df["ped_count"] = country_hosp_molten_df["ped_count"].clip(lower=0)
country_hosp_molten_df["row"] = 0

In [None]:
country_hosp_molten_df.head()

## Trim at October 9, 2020

In [None]:
max_datetime = datetime.datetime(year=2020, month=10, day=9)
country_sum_ravg_molten_df = country_sum_ravg_molten_df.loc[country_sum_ravg_molten_df["calendar_date"] < max_datetime]
country_hosp_molten_df = country_hosp_molten_df.loc[country_hosp_molten_df["date"] < max_datetime]

In [None]:
COUNTRIES = country_sum_ravg_molten_df["country"].unique().tolist()

column_width = 160
column_spacing = 42

min_date = num_in_hospital_by_country_molten_df["calendar_date"].min()
max_date = num_in_hospital_by_country_molten_df["calendar_date"].max()

num_in_hospital_by_country_molten_df = country_sum_ravg_molten_df.loc[country_sum_ravg_molten_df["variable"].isin([
    "cum_diff_all",
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "cum_diff_all": "All",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All"], range=["#A9A9A9", "#000000"])


filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df)

tooltip_4ce = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("value", title="Number of new 4CE patients"),
    alt.Tooltip("num_sites", title="Number of 4CE sites"),
    alt.Tooltip("num_hosps", title="Number of 4CE hospitals"),
]
tooltip_country = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("date", title="Date"),
    alt.Tooltip("ped_count", title="Number of country-level pediatric hospitalizations"),
]

top_plot = filtered_plot.mark_line(color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None), scale=alt.Scale(domain=date_domain)),
    y=alt.Y("value:Q", axis=alt.Axis(title="# of new 4CE patients per day")),
    tooltip=tooltip_4ce
).properties(height=220, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None)
    )
    , spacing=column_spacing+2, bounds="flush"
)

def get_country_plot(country_name):
    country_hosp_molten_df_stratified = country_hosp_molten_df.loc[country_hosp_molten_df["country"] == country_name]
    
    max_count = country_hosp_molten_df_stratified["ped_count"].max()
    count_domain = [0, max_count * 1.1]
    if country_name == "Singapore":
        count_domain = [0, 1]

    cp = alt.Chart(country_hosp_molten_df_stratified).mark_line().encode(
        x=alt.X("date", axis=alt.Axis(title=None), scale=alt.Scale(domain=date_domain)),
        y=alt.Y("ped_count:Q", axis=alt.Axis(title=None), scale=alt.Scale(domain=count_domain)),
        tooltip=tooltip_country
    ).properties(height=150, width=column_width)
    
    if country_name == COUNTRY_SORT[0]:
        header = alt.Header(title="# of new hosp. in country", labels=False, titlePadding=0)
    else:
        header = alt.Header(title=None, labels=False, titlePadding=50)
        
    cp = cp.facet(
        row=alt.Row("row:N", header=header)
        , spacing=column_spacing-2, bounds="flush"
    )
    return cp

country_plots = list(map(get_country_plot, COUNTRY_SORT))

plot = alt.vconcat(top_plot, alt.hconcat(*country_plots)).resolve_scale(x="shared", color="independent").properties(title={
    #"text": ["Daily Counts by Country"], 
    "text": [""], 
    "dx": 50,
    #"subtitle": get_visualization_subtitle(data_release=DATA_RELEASE, num_sites=NUM_SITES, cohort=COHORT),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

for_website(plot, f"Daily Counts {COHORT}", "New hospitalized patients by country with country rate", df=num_in_hospital_by_country_molten_df)

plot

In [None]:
country_sum_ravg_molten_df["value_tt"] = country_sum_ravg_molten_df.apply(lambda row: "hidden" if row["country"] in ["Singapore", "Germany", "UK", "Spain"] else row["value"], axis='columns')

In [None]:
COUNTRIES = country_sum_ravg_molten_df["country"].unique().tolist()

column_width = 120
column_spacing = 40

min_date = num_in_hospital_by_country_molten_df["calendar_date"].min()
max_date = num_in_hospital_by_country_molten_df["calendar_date"].max()

date_domain = [alt.DateTime(year=min_date.year, month=min_date.month, date=min_date.day), alt.DateTime(year=max_date.year, month=max_date.month, date=max_date.day)]

num_in_hospital_by_country_molten_df = country_sum_ravg_molten_df.loc[country_sum_ravg_molten_df["variable"].isin([
    "cum_diff_all",
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "cum_diff_all": "All",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All"], range=["#A9A9A9", "#000000"])


filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df)

tooltip_4ce = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("value_tt", title="Number of new 4CE hospitalizations"),
    alt.Tooltip("num_sites", title="Number of 4CE sites"),
    alt.Tooltip("num_hosps", title="Number of 4CE hospitals"),
]
tooltip_country = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("date", title="Date"),
    alt.Tooltip("ped_count", title="Number of new country-level pediatric hospitalizations"),
]

top_plot = filtered_plot.mark_line(color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None), scale=alt.Scale(domain=date_domain)),
    y=alt.Y("value:Q", axis=alt.Axis(title="# of new 4CE patients per day")),
    tooltip=tooltip_4ce
).properties(width=column_width, height=200).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None)
    )
    , spacing=column_spacing, bounds="flush"
)

middle_plot = alt.Chart(country_hosp_molten_df).mark_line().encode(
    x=alt.X("date", axis=alt.Axis(title=None), scale=alt.Scale(domain=date_domain)),
    y=alt.Y("ped_count:Q", axis=alt.Axis(title=None)),
    tooltip=tooltip_country
).properties(height=100, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None, labels=False)
    ),
    row=alt.Row("row:N", header=alt.Header(title="# of new hosp. in country", labels=False, titlePadding=30))
    , spacing=column_spacing, bounds="flush"
).resolve_scale(y='independent')

bottom_plot = filtered_plot.mark_bar(size=1.5, color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date"), scale=alt.Scale(domain=date_domain)),
    y=alt.Y("num_hosps:Q", axis=alt.Axis(title="# of hospitals")),
    tooltip=tooltip_4ce
).properties(height=80, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRY_SORT,
        header=alt.Header(title=None, labels=False),
    )
    , spacing=column_spacing, bounds="flush"
)

plot = alt.vconcat(top_plot, middle_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Pediatric Hospitalization Counts by Country"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE, with_num_sites=False, cohort=COHORT),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

for_website(plot, f"Daily Counts {COHORT}", "New hospitalized patients by country with country rate", df=num_in_hospital_by_country_molten_df)

plot

In [None]:
COUNTRIES = country_sum_ravg_molten_df["country"].unique().tolist()

column_width = 500

num_in_hospital_by_country_molten_df = country_sum_ravg_molten_df.loc[country_sum_ravg_molten_df["variable"].isin([
    "cum_diff_all",
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "cum_diff_all": "All",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All"], range=["#A9A9A9", "#000000"])

min_date = num_in_hospital_by_country_molten_df["calendar_date"].min()
max_date = num_in_hospital_by_country_molten_df["calendar_date"].max()

date_domain = [alt.DateTime(year=min_date.year, month=min_date.month, date=min_date.day), alt.DateTime(year=max_date.year, month=max_date.month, date=max_date.day)]
date_brush = alt.selection(type='interval', encodings=['x'])

dailycount_dropdown = alt.binding_select(options=COUNTRIES)
dailycount_selection = alt.selection_single(fields=["country"], bind=dailycount_dropdown, name="Country", init={"country": COUNTRIES[0]})


filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df).transform_filter(
    dailycount_selection
)

tooltip_4ce = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("value_tt", title="Number of new 4CE hospitalizations"),
    alt.Tooltip("num_sites", title="Number of 4CE sites"),
    alt.Tooltip("num_hosps", title="Number of 4CE hospitals"),
]
tooltip_country = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("date", title="Date"),
    alt.Tooltip("ped_count", title="Number of new country-level pediatric hospitalizations"),
]

top_plot = filtered_plot.mark_line(color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="# of new 4CE patients per day")),
    tooltip=tooltip_4ce
).properties(width=column_width)

middle_plot = alt.Chart(country_hosp_molten_df).transform_filter(
    dailycount_selection
).mark_line().encode(
    x=alt.X("date", axis=alt.Axis(title=None)),
    y=alt.Y("ped_count:Q", axis=alt.Axis(title="# of new hosp. in country")),
    tooltip=tooltip_country
).properties(height=100, width=column_width)

bottom_plot = filtered_plot.mark_bar(size=1.5, color="black").encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_hosps:Q", axis=alt.Axis(title="# of hospitals")),
    tooltip=tooltip_4ce
).properties(height=80, width=column_width)

plot = alt.vconcat(top_plot, middle_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Pediatric Hospitalization Counts by Country"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE, with_num_sites=False, cohort=COHORT),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    dailycount_selection
)

for_website(plot, f"Daily Counts {COHORT}", "New hospitalized patients by country with country rate and dropdown", df=num_in_hospital_by_country_molten_df)

plot