In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    read_loinc_df,
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    read_full_daily_counts_df,
    get_visualization_subtitle,
    get_country_color_map,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
DATA_RELEASE = "2020-07-21"

In [None]:
df = read_full_daily_counts_df()
df.head()

In [None]:
df = df.replace(-99, np.nan)
df = df.replace(-999, np.nan)

In [None]:
df["num_patients_in_hospital_minus_severe_on_this_date"] = df["num_patients_in_hospital_on_this_date"] - df["num_patients_in_hospital_and_severe_on_this_date"]

In [None]:
country_color_map = get_country_color_map()

In [None]:
plot = alt.Chart(df).mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_patients_in_hospital_on_this_date:Q", axis=alt.Axis(title="Number of Patients in Hospital")),
    color=alt.Color("siteid:N", legend=alt.Legend(title="Site"))
).properties(title={
    "text": ["Number of Hospitalized Patients by Site"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
df["num_sites"] = 1
country_sum_df = df.groupby(["country", "calendar_date"]).sum().reset_index()
country_sum_df.head()

In [None]:
country_sum_molten_df = country_sum_df.melt(id_vars=["country", "calendar_date", "num_sites"])
country_sum_molten_df.head()

# Daily counts by country, with country as color, variable as dropdown

In [None]:
CATEGORIES = country_sum_molten_df["variable"].unique().tolist()

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))

dailycount_dropdown = alt.binding_select(options=CATEGORIES)
dailycount_selection = alt.selection_single(fields=["variable"], bind=dailycount_dropdown, name="Value", init={"variable": "num_patients_in_hospital_on_this_date"})

filtered_plot = alt.Chart(country_sum_molten_df).transform_filter(
    dailycount_selection
)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Value"),
    alt.Tooltip("num_sites", title="Number of sites"),
]

top_plot = filtered_plot.mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of patients")),
    color=alt.Color("country:N", legend=alt.Legend(title="Country"), scale=country_color_scale),
    tooltip=tooltip
)

bottom_plot = filtered_plot.mark_bar(size=1.5).encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_sites:Q", axis=alt.Axis(title="# of sites")),
    color=alt.Color("country:N", legend=alt.Legend(title="Country"), scale=country_color_scale),
    tooltip=tooltip
).properties(height=80)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="shared").properties(title={
    "text": ["Daily Counts by Country"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    dailycount_selection
)

plot


In [None]:
plot.save("daily_counts_variable_dropdown.html")

# Daily counts by country, with `num_patients_in_hospital_minus_severe_on_this_date` and `num_patients_in_hospital_and_severe_on_this_date` as color, country as dropdown

In [None]:
COUNTRIES = country_sum_molten_df["country"].unique().tolist()

num_in_hospital_by_country_molten_df = country_sum_molten_df.loc[country_sum_molten_df["variable"].isin([
    "num_patients_in_hospital_minus_severe_on_this_date",
    "num_patients_in_hospital_and_severe_on_this_date"
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "num_patients_in_hospital_minus_severe_on_this_date": "All minus Severe",
    "num_patients_in_hospital_and_severe_on_this_date": "Severe",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All minus Severe", "Severe"], range=["#A9A9A9", "#000000"])

country_dropdown = alt.binding_select(options=COUNTRIES)
country_selection = alt.selection_single(fields=["country"], bind=country_dropdown, name="Country", init={"country": "USA"})

filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df).transform_filter(
    country_selection
)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
]

top_plot = filtered_plot.mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of patients")),
    color=alt.Color("variable:N", legend=alt.Legend(title="Severity"), scale=severity_color_scale),
    tooltip=tooltip
)

bottom_plot = filtered_plot.mark_bar(size=1.5).encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_sites:Q", axis=alt.Axis(title="# of sites")),
    color=alt.Color("country:N", legend=None, scale=country_color_scale),
    tooltip=tooltip
).properties(height=80)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Daily Counts by Country and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    country_selection
)

plot

In [None]:
plot.save("daily_counts_country_dropdown.html")

# Daily counts by country, with `cumulative_patients_all`, `cumulative_patients_severe`, `cumulative_patients_dead` as color, country as dropdown

In [None]:
COUNTRIES = country_sum_molten_df["country"].unique().tolist()

cumulative_by_country_molten_df = country_sum_molten_df.loc[country_sum_molten_df["variable"].isin([
    "cumulative_patients_all",
    "cumulative_patients_severe",
    "cumulative_patients_dead"
])].copy()
cumulative_by_country_molten_df["variable"] = cumulative_by_country_molten_df["variable"].replace({
    "cumulative_patients_all": "All",
    "cumulative_patients_severe": "Severe",
    "cumulative_patients_dead": "Dead",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All minus Severe", "Severe"], range=["#A9A9A9", "#000000"])

country_dropdown = alt.binding_select(options=COUNTRIES)
country_selection = alt.selection_single(fields=["country"], bind=country_dropdown, name="Country", init={"country": "USA"})

filtered_plot = alt.Chart(cumulative_by_country_molten_df).transform_filter(
    country_selection
)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Cumulative number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
]

top_plot = filtered_plot.mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative number of patients")),
    color=alt.Color("variable:N", legend=alt.Legend(title="Severity")),
    tooltip=tooltip
)

bottom_plot = filtered_plot.mark_bar(size=1.5).encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_sites:Q", axis=alt.Axis(title="# of sites")),
    color=alt.Color("country:N", legend=None, scale=country_color_scale),
    tooltip=tooltip
).properties(height=80)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Cumulative Daily Counts by Country and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot).add_selection(
    country_selection
)

plot

In [None]:
plot.save("cumulative_daily_counts_country_dropdown.html")

# Daily counts by country, with `num_patients_in_hospital_minus_severe_on_this_date` and `num_patients_in_hospital_and_severe_on_this_date` as color, country as column facet

In [None]:
COUNTRIES = country_sum_molten_df["country"].unique().tolist()

column_width = 200

num_in_hospital_by_country_molten_df = country_sum_molten_df.loc[country_sum_molten_df["variable"].isin([
    "num_patients_in_hospital_minus_severe_on_this_date",
    "num_patients_in_hospital_and_severe_on_this_date"
])].copy()
num_in_hospital_by_country_molten_df["variable"] = num_in_hospital_by_country_molten_df["variable"].replace({
    "num_patients_in_hospital_minus_severe_on_this_date": "All minus Severe",
    "num_patients_in_hospital_and_severe_on_this_date": "Severe",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All minus Severe", "Severe"], range=["#A9A9A9", "#000000"])


filtered_plot = alt.Chart(num_in_hospital_by_country_molten_df)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
]

top_plot = filtered_plot.mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Number of patients")),
    color=alt.Color("variable:N", legend=alt.Legend(title="Severity"), scale=severity_color_scale),
    tooltip=tooltip
).properties(width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRIES,
        header=alt.Header(title=None)
    )
)

bottom_plot = filtered_plot.mark_bar(size=1.5).encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_sites:Q", axis=alt.Axis(title="# of sites")),
    #color=alt.Color("country:N", legend=alt.Legend(title="Country"), scale=country_color_scale),
    tooltip=tooltip
).properties(height=80, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRIES,
        header=alt.Header(title=None, labels=False)
    )
)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Daily Counts by Country and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
plot.save("daily_counts_country_columns.html")

# Daily counts by country, with `cumulative_patients_all`, `cumulative_patients_severe`, `cumulative_patients_dead` as color, country as column facet

In [None]:
COUNTRIES = country_sum_molten_df["country"].unique().tolist()

column_width = 200

cumulative_by_country_molten_df = country_sum_molten_df.loc[country_sum_molten_df["variable"].isin([
    "cumulative_patients_all",
    "cumulative_patients_severe",
    "cumulative_patients_dead"
])].copy()
cumulative_by_country_molten_df["variable"] = cumulative_by_country_molten_df["variable"].replace({
    "cumulative_patients_all": "All",
    "cumulative_patients_severe": "Severe",
    "cumulative_patients_dead": "Dead",
})

country_color_scale = alt.Scale(domain=list(country_color_map.keys()), range=list(country_color_map.values()))
severity_color_scale = alt.Scale(domain=["All minus Severe", "Severe"], range=["#A9A9A9", "#000000"])

filtered_plot = alt.Chart(cumulative_by_country_molten_df)

tooltip = [
    alt.Tooltip("country", title="Country"),
    alt.Tooltip("calendar_date", title="Date"),
    alt.Tooltip("variable", title="Variable"),
    alt.Tooltip("value", title="Cumulative number of patients"),
    alt.Tooltip("num_sites", title="Number of sites"),
]

top_plot = filtered_plot.mark_line().encode(
    x=alt.X("calendar_date", axis=alt.Axis(title=None)),
    y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative number of patients")),
    color=alt.Color("variable:N", legend=alt.Legend(title="Severity")),
    tooltip=tooltip
).properties(width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRIES,
        header=alt.Header(title=None)
    )
)

bottom_plot = filtered_plot.mark_bar(size=1.5).encode(
    x=alt.X("calendar_date", axis=alt.Axis(title="Date")),
    y=alt.Y("num_sites:Q", axis=alt.Axis(title="# of sites")),
    #color=alt.Color("country:N", legend=None, scale=country_color_scale),
    tooltip=tooltip
).properties(height=80, width=column_width).facet(
    column=alt.Column(
        "country:N",
        sort=COUNTRIES,
        header=alt.Header(title=None, labels=False)
    )
)

plot = alt.vconcat(top_plot, bottom_plot).resolve_scale(x="shared", color="independent").properties(title={
    "text": ["Cumulative Daily Counts by Country and Severity"], 
    "dx": 50,
    "subtitle": get_visualization_subtitle(data_release=DATA_RELEASE),
    "subtitleColor": "gray",
    "anchor": "middle",
})

plot = apply_theme(plot)

plot

In [None]:
plot.save("cumulative_daily_counts_country_columns.html")