In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import us
import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
from vega_datasets import data
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

In [None]:
df = pd.read_csv(data.population_engineers_hurricanes.url)
state_to_id = dict(zip(df["state"].values.tolist(), df["id"].values.tolist()))
states = alt.topo_feature(data.us_10m.url, 'states')
del df

In [None]:
def convert_date_us(date_str):
    try:
        return dateutil.parser.parse(date_str)
    except:
        return np.nan

In [None]:
def convert_state(abbr):
    return int(us.states.lookup(abbr).fips)

In [None]:
hhs_df = pd.read_csv(join("..", "data", "reported_hospital_capacity_admissions_facility-level_weekly_average_timeseries_20201207.csv"))
hhs_df.head()

In [None]:
hhs_df["id"] = hhs_df["state"].apply(convert_state)

In [None]:
hhs_col = "previous_day_admission_pediatric_covid_confirmed_7_day_sum"
hhs_df = hhs_df[["hospital_pk", "state", "id", "collection_week", "hospital_subtype", "is_metro_micro", hhs_col]]
hhs_df = hhs_df.rename(columns={"collection_week": "date", hhs_col: "ped_count"})
hhs_df["ped_count"] = hhs_df["ped_count"].clip(lower=0)
orig_hhs_df = hhs_df.copy()
hhs_df = hhs_df.groupby(by=["date", "hospital_pk"]).sum().reset_index()
hhs_df["has_any"] = hhs_df["ped_count"].apply(lambda x: 1)
hhs_df["has_ped"] = hhs_df["ped_count"].apply(lambda x: pd.notna(x) and x > 0.0)
hhs_df.head()

In [None]:
hhs_df = hhs_df.groupby("date").sum().reset_index()
hhs_df["date"] = hhs_df["date"].apply(convert_date_us)
hhs_df.head()

In [None]:
plot = alt.Chart(hhs_df).mark_line().encode(
    y=alt.Y("has_ped:Q"),
    x=alt.X("date"),
).properties(
    title=f"Number of hospitals reporting > 0 value for {hhs_col} column"
)
plot

In [None]:
hhs_df["pct_with_ped"] = hhs_df["has_ped"] / hhs_df["has_any"] * 100
plot = alt.Chart(hhs_df).mark_line().encode(
    y=alt.Y("pct_with_ped:Q"),
    x=alt.X("date"),
).properties(
    title=f"Percentage of hospitals reporting > 0 value for {hhs_col} column"
)
plot

In [None]:
hhs_df = orig_hhs_df.copy()
hhs_df = hhs_df.groupby(by=["id", "hospital_pk"]).sum().reset_index()
hhs_df["has_any"] = hhs_df["ped_count"].apply(lambda x: 1)
hhs_df["has_ped"] = hhs_df["ped_count"].apply(lambda x: pd.notna(x) and x > 0.0)
hhs_df.head()

In [None]:
# group by state ID
hhs_df = hhs_df.groupby("id").sum().reset_index()
hhs_df.head()

In [None]:
plot = alt.Chart(states).mark_geoshape().encode(
    color=alt.Color("has_ped:Q")
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=hhs_df, key='id', fields=['has_ped'])
).properties(
    width=500,
    height=300,
    title=f"Number of hospitals reporting > 0 value for {hhs_col} column (on at least one date)"
).project(
    type='albersUsa'
)
plot

In [None]:
hhs_df["has_zero_ped"] = hhs_df["has_ped"].apply(lambda x: "Yes" if x == 0 else "No")

plot = alt.Chart(states).mark_geoshape().encode(
    color=alt.Color("has_zero_ped:N")
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=hhs_df, key='id', fields=['has_zero_ped'])
).properties(
    width=500,
    height=300,
    title=f"States reporting zero or missing for {hhs_col} column (on every date)"
).project(
    type='albersUsa'
)
plot

In [None]:
hhs_df = orig_hhs_df.copy()
hhs_df.head()

In [None]:
hhs_df = hhs_df.groupby(by=["state", "hospital_pk", "hospital_subtype"]).sum().reset_index()
hhs_df["has_any"] = hhs_df["ped_count"].apply(lambda x: 1)
hhs_df["has_ped"] = hhs_df["ped_count"].apply(lambda x: pd.notna(x) and x > 0.0)
hhs_df.head()

In [None]:
hhs_df = hhs_df.groupby(by=["state", "hospital_subtype"]).sum().reset_index()
hhs_df.head()

In [None]:
plot = alt.Chart(hhs_df).mark_bar().encode(
    y=alt.Y("has_any:Q"),
    x=alt.X("hospital_subtype:N"),
    color=alt.Color("hospital_subtype:N"),
    facet=alt.Facet('state:O', columns=14),
).properties(
    height=140,
    title=f"Hospital types by state (adult and pediatric)"
)
plot

In [None]:
plot = alt.Chart(hhs_df).mark_bar().encode(
    y=alt.Y("has_ped:Q"),
    x=alt.X("hospital_subtype:N"),
    color=alt.Color("hospital_subtype:N"),
    facet=alt.Facet('state:O', columns=14),
).properties(
    height=140,
    title=f"Hospital types by state reporting > 0 value for {hhs_col} column (on at least one date) "
)
plot

In [None]:
hhs_df = orig_hhs_df.copy()
hhs_df.head()

In [None]:
hhs_df = hhs_df.groupby(by=["state", "hospital_pk", "is_metro_micro"]).sum().reset_index()
hhs_df["has_any"] = hhs_df["ped_count"].apply(lambda x: 1)
hhs_df["has_ped"] = hhs_df["ped_count"].apply(lambda x: pd.notna(x) and x > 0.0)
hhs_df.head()

In [None]:
hhs_df = hhs_df.groupby(by=["state", "is_metro_micro"]).sum().reset_index()
hhs_df["is_metro_micro"] = hhs_df["is_metro_micro"].apply(lambda x: "Yes" if x else "No")
hhs_df.head()

In [None]:
plot = alt.Chart(hhs_df).mark_bar().encode(
    y=alt.Y("has_ped:Q"),
    x=alt.X("is_metro_micro:N"),
    color=alt.Color("is_metro_micro:N"),
    facet=alt.Facet('state:O', columns=14),
).properties(
    height=100,
    width=60,
    title=f"Population served by state reporting > 0 value for {hhs_col} column (on at least one date) "
)
plot