In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import plotly.graph_objects as go
import plotly.express as px
import us

In [2]:
mortality_path = '/Users/alessandropreviero/Desktop/vaccine-allocation/data/outputs/mortality-rates-2021.npy'
mortality_m = np.load(mortality_path)

In [3]:
cdc_dir = '/Users/alessandropreviero/Desktop/vaccine-allocation/data/inputs/cdc-data.csv'
predictions_dir = '/Users/alessandropreviero/Desktop/vaccine-allocation/data/inputs/delphi-predictions.csv'

In [4]:
def load_and_clean_delphi_predictions(path: str) -> pd.DataFrame:
    # Load data from CSV file
    df = pd.read_csv(path)

    # Filter down to US only
    df = df[
        (df["Country"] == "US")
        & (df["Province"] != "None")
    ]

    # Aggregate intermediary state
    df["recovering"] = df[["AR", "DHR", "DQR"]].sum(1)
    df["dying"] = df[["AD", "DHD", "DQD"]].sum(1)

    # Select relevant columns and rename
    df = df[
        [
            "Province",
            "Day",
            "S",
            "E",
            "I",
            "R",
            "D",
            "DT",
            "DD",
            "recovering",
            "dying"
        ]
    ].rename(
        columns={
            "Province": "state",
            "Day": "date",
            "S": "susceptible",
            "E": "exposed",
            "I": "infectious",
            "R": "recovered",
            "D": "deceased",
            "DT": "total_detected_cases",
            "DD": "total_detected_deaths",
        }
    )

    # Cast date column as datetime object
    df["date"] = pd.to_datetime(df["date"])

    return df

In [5]:
RISK_CLASSES = [
    dict(min_age=0.0, max_age=9.0),
    dict(min_age=10.0, max_age=49.0),
    dict(min_age=50.0, max_age=59.0),
    dict(min_age=60.0, max_age=69.0),
    dict(min_age=70.0, max_age=79.0),
    dict(min_age=80.0, max_age=np.inf)
]
RESCALE_BASELINE = True
N_RISK_CLASSES = 6

def get_baseline_mortality_rate_estimates(
        cdc_df: pd.DataFrame,
        predictions_df: pd.DataFrame,
        start_date: dt.datetime,
        end_date: dt.datetime,
) -> np.ndarray:
    # Compute rescaling factor
    predicted_cases, predicted_deaths = predictions_df[
        (predictions_df["date"] >= start_date)
        & (predictions_df["date"] <= end_date)
        ][["total_detected_cases", "total_detected_deaths"]].max()
    cdc_mortality_rate = (cdc_df["deaths"].sum() / cdc_df["cases"].sum())
    delphi_mortality_rate = predicted_deaths / predicted_cases
    rescaling_factor = delphi_mortality_rate / cdc_mortality_rate if RESCALE_BASELINE else 1.0

    # Compute baseline mortality rates
    baseline_mortality_rate = np.zeros(N_RISK_CLASSES)
    for k, risk_class in enumerate(RISK_CLASSES):
        cases, deaths = cdc_df[
            (cdc_df["min_age"] >= risk_class["min_age"])
            & (cdc_df["max_age"] <= risk_class["max_age"])
            ][["cases", "deaths"]].sum()
        baseline_mortality_rate[k] = deaths / cases * rescaling_factor
    return baseline_mortality_rate

In [6]:
predictions_df = load_and_clean_delphi_predictions(predictions_dir)
cdc_df = pd.read_csv(cdc_dir)
start_date = dt.datetime(2021, 1, 30)
end_date = dt.datetime(2021, 4, 30)

In [8]:
base_mort = get_baseline_mortality_rate_estimates(cdc_df, predictions_df, start_date, end_date)
base_mort * 100

array([7.29949255e-03, 1.14570647e-01, 8.47606558e-01, 2.18148436e+00,
       5.85956893e+00, 1.44331664e+01])

In [23]:



final_mort = np.tile(base_mort, 51)
final_mort = np.reshape(final_mort, (51, 6))
final_mort = np.repeat(final_mort, 91, axis=1)
final_mort = np.reshape(final_mort, (51, 6, 91))
final_mort.shape

(51, 6, 91)