In [1]:
import pandas as pd
import numpy as np
import us

In [2]:
cdc_df = pd.read_csv('/Users/alessandropreviero/Desktop/vaccine-allocation/data/inputs/cdc-data.csv')
delphi_path = '/Users/alessandropreviero/Desktop/vaccine-allocation/data/inputs/delphi-parameters.csv'
pop_df = pd.read_csv('/Users/alessandropreviero/Desktop/vaccine-allocation/data/inputs/population.csv')

In [3]:
RISK_CLASSES = [
    dict(min_age=0.0, max_age=9.0),
    dict(min_age=10.0, max_age=49.0),
    dict(min_age=50.0, max_age=59.0),
    dict(min_age=60.0, max_age=69.0),
    dict(min_age=70.0, max_age=79.0),
    dict(min_age=80.0, max_age=np.inf)
]
N_REGIONS = 51  # All 50 US states plus Washington D.C.
N_RISK_CLASSES = len(RISK_CLASSES)

def load_and_clean_delphi_params(path: str) -> pd.DataFrame:
    # Load data from CSV file
    df = pd.read_csv(path)

    # Filter down to US only
    df = df[
        (df["Country"] == "US")
        & (df["Province"] != "None")
    ].drop(["Continent", "Country", "MAPE"], axis=1)

    # Rename columns
    df.rename(
        columns={
            "Province": "state",
            "Data Start Date": "start_date",
            "Median Day of Action": "intervention_time",
            "Rate of Action": "intervention_rate",
            "Infection Rate": "infection_rate",
            "Rate of Death": "death_rate",
            "Mortality Rate": "mortality_rate",
            "Rate of Mortality Rate Decay": "mortality_rate_decay",
            "Internal Parameter 1": "exposed_initial_param",
            "Internal Parameter 2": "infected_initial_param",
            "Jump Magnitude": "jump_magnitude",
            "Jump Time": "jump_time",
            "Jump Decay": "jump_decay"
        },
        inplace=True
    )

    # Cast start date as datetime object
    df["start_date"] = pd.to_datetime(df["start_date"])

    return df.set_index("state")

In [4]:
def get_population_by_state_and_risk_class(pop_df: pd.DataFrame) -> np.ndarray:

    states = list(pop_df.state.unique())
    population = np.zeros((len(states), len(RISK_CLASSES)))

    for j, state in enumerate(states):
        for k, risk_class in enumerate(RISK_CLASSES):
            population[j, k] = pop_df[
                (pop_df["min_age"] >= risk_class["min_age"])
                & (pop_df["max_age"] <= risk_class["max_age"])
                & (pop_df["state"] == state)
                ]["population"].sum()
    return population


In [5]:
pop_mat = get_population_by_state_and_risk_class(pop_df)
pop_mat.shape

(51, 6)

In [7]:
delphi_df = load_and_clean_delphi_params(delphi_path)

In [9]:
baseline_mortality_rate = np.zeros(N_RISK_CLASSES)
for k, risk_class in enumerate(RISK_CLASSES):
    cases, deaths = cdc_df[
        (cdc_df["min_age"] >= risk_class["min_age"])
        & (cdc_df["max_age"] <= risk_class["max_age"])
        ][["cases", "deaths"]].sum()
    baseline_mortality_rate[k] = deaths / cases 

In [10]:
mort_scaling = np.array([baseline_mortality_rate[i]/baseline_mortality_rate[0] for i in range(6)])
mort_scaling

array([1.00000000e+00, 1.56957003e+01, 1.16118559e+02, 2.98854249e+02,
       8.02736477e+02, 1.97728353e+03])

In [11]:
mortalities_approx = np.zeros(shape=(51, 6, 88))
states = list(delphi_df.index.unique())

for t in range(88):
    for i in range(51):
        delphi_mort = delphi_df.iloc[i]['mortality_rate'] * \
        (1 + 2/np.pi * np.arctan(-(100+t)/20 * delphi_df.iloc[i]['mortality_rate_decay']) )

        small_mort = delphi_mort/(np.dot(mort_scaling, pop_mat[i, :]) / sum(pop_mat[i, :]))

        pred_morts = mort_scaling * small_mort
        mortalities_approx[i, :, t] = pred_morts

In [46]:
# np.save('mortality-rates-2021-heur.npy', mortalities_approx)

In [14]:
mortalities_approx[45, 5, 61]

0.10655545057165831