# Comparison with pennsignals BayesCHIME

Source: https://github.com/pennsignals/chime_sims

In [None]:
from os import getcwd, path

from datetime import timedelta

from scipy.stats import beta, gamma, norm, expon
from scipy.optimize import curve_fit


import pandas as pd
from numpy import linspace, sqrt, arange

from gvar import gvar
from gvar import mean as gv_mean
from gvar import sdev as gv_sdev

from plotly.subplots import make_subplots
from plotly.graph_objects import Scatter, Histogram

from models import seir_step, one_minus_logistic_fcn, FitFcn
from utils.plotting import add_gvar_scatter, hex_to_rgba

In [None]:
DATA = path.join(path.abspath(getcwd()), "data", "penn")

In [None]:
location = "Downtown"

parameters = pd.read_csv(path.join(DATA, f"{location}_parameters.csv"))
data = (
    pd.read_csv(path.join(DATA, f"{location}_ts.csv"), parse_dates=["date"])
    .dropna(how="all", axis=1)
    .fillna(0)
    .set_index("date")
    .astype(int)
)

parameters

## Read in priors

In [None]:
PARAMETER_MAP = {
    "n_hosp": "hospitalized_initial",
    "hosp_prop": "hospitalization_rate",
    "recovery_days": "recovery_days_i",
    "logistic_k": "social_distance_halfing_days",
    "logistic_x0": "social_distance_delay",
    "logistic_L": "ratio",
    "beta": "beta_i",
    "incubation_days": "incubation_days",
    "nu": "nu",
    "hosp_LOS": "length_of_stay",
}

In [None]:
DIST_MAP = {
    "beta": beta,
    "gamma": gamma,
}


def get_dist_df(distribution, p1, p2, **kwargs):
    """
    """

    data = {}
    args = {"a": p1}
    args["b" if distribution == "beta" else "scale"] = p2

    dist = DIST_MAP[distribution](**args)
    x = linspace(dist.ppf(0.01), dist.ppf(0.99), 100)
    y = dist.pdf(x)
    mean, var = dist.stats(moments="mv")

    data = pd.DataFrame({"x": x, "y": y})
    data["dist"] = distribution
    data["param"] = kwargs.get("param")

    return data, dist

In [None]:
def fit_norm_dist(x, y, dist, param):
    mean = dist.stats("m")
    scale = sqrt(dist.stats("v"))
    mean, scale = curve_fit(norm.pdf, xdata=x, ydata=y, p0=(mean, scale))[0]

    dist = norm(mean, scale)
    y = dist.pdf(x)
    data = pd.DataFrame({"x": x, "y": y})
    data["dist"] = "normal"
    data["param"] = param

    return data, dist

In [None]:
dfs = []
mapped_pars = {}

for idx, row in parameters.iterrows():

    if row["param"] not in PARAMETER_MAP:
        continue

    mapped_param = PARAMETER_MAP[row["param"]]

    if row["distribution"] == "constant":
        pass

    else:
        tmp_df, dist = get_dist_df(**row)
        dfs.append(tmp_df)

        tmp_df, dist = fit_norm_dist(tmp_df.x, tmp_df.y, dist, row["param"])
        dfs.append(tmp_df)


df = pd.concat(dfs, ignore_index=True)
df.head()

In [None]:
params = df.param.unique()

col_wrap = 3
n_cols = min(col_wrap, len(params))
n_rows = len(params) // 4 + 1

fig = make_subplots(
    cols=n_cols,
    rows=n_rows,
    shared_xaxes=False,
    shared_yaxes=False,
    subplot_titles=params,
)

for i, param in enumerate(params):
    tmp_df = df.query("param == @param")
    for color, dist in zip(["blue", "green"], tmp_df.dist.unique()):
        ttmp_df = tmp_df.query("dist == @dist")
        fig.add_trace(
            Scatter(
                x=ttmp_df.x,
                y=ttmp_df.y,
                name=dist,
                line_color=color,
                mode="lines" if dist == "normal" else "markers",
                showlegend=False,
            ),
            col=i % col_wrap + 1,
            row=i // col_wrap + 1,
        )

fig.show()

## Read in posteriors

In [None]:
posterior_df = pd.read_csv(path.join(DATA, "posteriors.csv")).drop(columns="Unnamed: 0")

In [None]:
cols = [col for col in posterior_df.columns if col in PARAMETER_MAP]

col_wrap = 3
n_cols = min(col_wrap, len(cols))
n_rows = len(cols) // 4 + 1

posterior_fits = {}

fig = make_subplots(
    cols=n_cols,
    rows=n_rows,
    shared_xaxes=False,
    shared_yaxes=False,
    subplot_titles=cols,
)

for i, col in enumerate(cols):
    xx = posterior_df[col].values
    mean, scale = norm.fit(xx)
    dist = norm(mean, scale)
    x = linspace(dist.ppf(0.001), dist.ppf(0.999), 100)
    y = dist.pdf(x)
    posterior_fits[PARAMETER_MAP[col]] = (mean, scale)
    fig.add_trace(
        Scatter(
            x=x,
            y=y,
            name=f"Normal fit",
            mode="lines",
            line_color="blue",
            showlegend=i == 1,
        ),
        col=i % col_wrap + 1,
        row=i // col_wrap + 1,
    )
    fig.add_trace(
        Histogram(
            x=xx,
            name=f"Posterior CHIME",
            histnorm="probability density",
            marker_color="indianred",
            showlegend=i == 1,
            nbinsx=20,
        ),
        col=i % col_wrap + 1,
        row=i // col_wrap + 1,
    )

fig.show()

In [None]:
penn_forecast = pd.read_csv(
    path.join(DATA, "forecast.csv"), parse_dates=["date"]
).set_index("date")
penn_forecast.index.freq = pd.infer_freq(penn_forecast.index)
penn_forecast.head()

In [None]:
PP = {key: gvar(*val) for key, val in posterior_fits.items()}

offset = expon.ppf(0.99, 1 / posterior_fits["incubation_days"][0])
PP["social_distance_delay"] += offset

print("\n".join([f"{key}={val}" for key, val in PP.items()]))

In [None]:
pp = parameters.set_index("param")
n_hosp = pp.loc["n_hosp", "base"]
mkt_share = pp.loc["mkt_share", "base"]
region_pop = pp.loc["region_pop", "base"]
hosp_prop = PP["hospitalization_rate"]

total_infections = n_hosp / mkt_share / hosp_prop
initial_susceptible = region_pop - total_infections

In [None]:
XX = {
    "date": penn_forecast.index,
    "initial_susceptible": region_pop - total_infections,
    "initial_exposed": total_infections,
    "initial_infected": 0,
    "initial_recovered": 0,
    "initial_hospitalized": n_hosp / mkt_share,
}
XX["length_of_stay"] = int(PP.pop("length_of_stay").mean)
print("\n".join([f"{key}={val}" for key, val in XX.items() if not key == "date"]))

In [None]:
x_days = arange(50)

social_distance = one_minus_logistic_fcn(
    x=x_days,
    ratio=PP["ratio"],
    decay_width=PP["social_distance_halfing_days"],
    x0=PP["social_distance_delay"],
)

In [None]:
fig = make_subplots(subplot_titles=["Relative effective social distancing"])
add_gvar_scatter(
    fig, x=x_days, y=social_distance, gv_mode="band", color="#e377c2", showlegend=False
)
fig.update_layout(
    yaxis_title=r"$\beta(t)/\beta$",
    xaxis_title=f"Days since {penn_forecast.index[0].date()}",
)
fig.show()

In [None]:
fcn_sir = FitFcn(seir_step, beta_i_fcn=one_minus_logistic_fcn)
sir_df = fcn_sir(XX, PP)
sir_df.head()

In [None]:
col = "hospitalized_new"
col_penn = "Hospitalized Admits "

color = "#1f77b4"

fig = make_subplots(subplot_titles=["50% CI comparison", "Residuals"], rows=2)
tmp_df = sir_df.copy()
tmp_df.index -= timedelta(days=offset)

add_gvar_scatter(
    fig,
    x=tmp_df.index,
    y=tmp_df[col].values * mkt_share,
    gv_mode="band",
    color="#e377c2",
    showlegend=True,
    n_sigmas=0.674,
    name="Normal (shifted by -offset)",
    col=1,
    row=1,
)

fig.add_trace(
    Scatter(
        x=penn_forecast.index,
        y=penn_forecast[col_penn + "25%"],
        fill=None,
        mode="lines",
        line={"color": color, "shape": "linear"},
        showlegend=False,
    ),
    col=1,
    row=1,
)
fig.add_trace(
    Scatter(
        x=penn_forecast.index,
        y=penn_forecast[col_penn + "Median"],
        fill="tonexty",
        mode="lines",
        line={"color": color, "shape": "linear"},
        fillcolor=hex_to_rgba(color, 0.2),
        name="CHIME",
    ),
    col=1,
    row=1,
)
fig.add_trace(
    Scatter(
        x=penn_forecast.index,
        y=penn_forecast[col_penn + "75%"],
        fill="tonexty",
        mode="lines",
        line={"color": color, "shape": "linear"},
        fillcolor=hex_to_rgba(color, 0.2),
        showlegend=False,
    ),
    col=1,
    row=1,
)

s = penn_forecast.apply(
    lambda row: gvar(
        row[col_penn + "Median"], row[col_penn + "75%"] - row[col_penn + "25%"]
    ),
    axis=1,
)

tmp_df.index = tmp_df.index.to_period()
diff = s[:"2020-09-17"].values - tmp_df.loc["2020-03-06":,col].values * mkt_share
diff

add_gvar_scatter(
    fig,
    x=s[:"2020-09-17"].index,
    y=diff,
    gv_mode="band",
    color="#e377c2",
    n_sigmas=0.674,
    name="normal (shifted by -offset)",
    col=1,
    row=2,
    y_min=-100,
    showlegend=False,
)


fig.update_layout(
    yaxis_title="Covid 19 Hospital Admits",
    xaxis_title=f"Date",
    yaxis2_title="CHIME - Normal",
    height=800,
)
fig.show()