In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import nnls
from sklearn.base import RegressorMixin
from sklearn.linear_model import RidgeCV

In [None]:
from pyseir import load_data
from pyseir.inference.infer_t0 import infer_t0
from pyseir.models.suppression_policies import distancing_measure_suppression
from pyseir.inference.infer_rt import RtInferenceEngine

In [None]:
whitelist = load_data.load_whitelist()
whitelist_fips = whitelist[whitelist["inference_ok"]==True]["fips"]

In [None]:
df_npi = load_data.load_public_implementations_data()
df_npi_whitelisted = df_npi.loc[whitelist_fips]

In [None]:
def make_intervention_design(row):    
    ts = pd.Series({(v, k): 1 for k, v in row.iteritems()})
    return ts.unstack(level=1).fillna(method='ffill').fillna(0)

npi_ts = {idx: make_intervention_design(row) for idx, row in df_npi_whitelisted.iterrows()}

In [None]:
%%time
intervention_ts_accum = []
interventions = ['500_gatherings', '50_gatherings', 'entertainment_gym',
       'federal_guidelines', 'foreign_travel_ban', 'public_schools',
       'restaurant_dine-in', 'stay_at_home']
fips_series_choice = {}
for k, npi in npi_ts.items():
    try:
        engine = RtInferenceEngine(fips=k)
        r_t_all = engine.infer_all(plot=False, shift_deaths=0)
    except KeyError:
        print(f"{k} not inferred")
        continue
    # Select the MAP estimate with the most history
    map_columns = [c for c in r_t_all.columns if "MAP" in c]    
    idx = r_t_all[map_columns]\
            .fillna(0)\
            .applymap(lambda x: 1 if x > 0 else 0)\
            .sum()\
            .idxmax()
    fips_series_choice[k] = idx
    rt = r_t_all[idx].rename("Rt")
    # Combine the NPI timeseries for the fips `k` with the timeseries for the chosen series for R_t
    fips_design = pd.merge(npi, rt, 
                           how='outer', right_index=True, left_index=True)\
                      .fillna(method='ffill').fillna(0)
    # Take the cumulative sum of the interventions columns to get the number of days the intervention was in place
    fips_design[interventions] = fips_design[interventions].cumsum()
    # Set the index
    fips_design["fips"] = k
    # Remove rows where R_t is zero
    fips_design = fips_design[fips_design["Rt"] > 0]
    fips_design.set_index("fips", append=True, inplace=True)
    intervention_ts_accum.append(fips_design)


In [None]:
fips_series_choice = pd.Series(fips_series_choice)
fips_series_choice[fips_series_choice != "Rt_MAP__new_cases"]

In [None]:
intervention_ts = pd.concat(intervention_ts_accum)

In [None]:
intervention_ts.index = intervention_ts.index.rename(["date", "fips"])
intervention_ts.index = intervention_ts.index.reorder_levels(["fips", "date"])

In [None]:
class NNLSModel(RegressorMixin):

    def __init__(self, fit_intercept=False):
        self.fit_intercept = fit_intercept

    def fit(self, X, y):
        if self.fit_intercept:
            intercept_col = np.ones(X.shape[1]).reshape(-1, 1)
            X = np.hstack((X, intercept_col))
        self.coef_, self.residual_ = nnls(X, y)
        return self
        
    def predict(self, X):
        return np.dot(X, self.coef_)


In [None]:
%%time
ridge_regressors = {}
nonneg_regressors = {}
R0 = intervention_ts.groupby("fips")["Rt"].max()
intervention_ts["Rt_percent_decrease"] = 1 - intervention_ts["Rt"] / R0

for lag in range(29):
    df_accum = []
    for fips, df in intervention_ts.groupby("fips"):
        df["Rt_percent_decrease"] = pd.Series(df["Rt_percent_decrease"].values[lag:], 
                                              index=df.index.values[:-lag if lag else None])
        df_accum.append(df.dropna())
    lagged_df = pd.concat(df_accum) 
    X = lagged_df[interventions].applymap(lambda x: 1 if x > 0 else 0)
    y = lagged_df["Rt_percent_decrease"]
    
    ridge = RidgeCV(fit_intercept=False)
    ridge_regressors[lag] = {"model": ridge.fit(X, y), "score": ridge.score(X, y)}
    for i, c in zip(interventions, ridge.coef_):
        ridge_regressors[lag][i] = c
    
    nonneg_model = NNLSModel()
    nonneg_regressors[lag] = {"model": nonneg_model.fit(X, y), "score": nonneg_model.score(X, y)}
    for i, c in zip(interventions, nonneg_model.coef_):
        nonneg_regressors[lag][i] = c

In [None]:
ridge_results = pd.DataFrame(ridge_regressors).T
ridge_results[interventions + ["score"]]

In [None]:
nonneg_results = pd.DataFrame(nonneg_regressors).T
nonneg_results[interventions + ["score"]]

In [None]:
ridge_results.iloc[np.argmax(ridge_results["score"].values)]

In [None]:
nonneg_results.iloc[np.argmax(nonneg_results["score"].values)]

In [None]:
intervention_ts["predicted_change_lag8"] = ridge_results.iloc[8]["model"].predict(intervention_ts[interventions])

In [None]:
random_fips = np.random.choice(intervention_ts.index.values)[0]
fig, ax = plt.subplots(figsize=(5, 5))
intervention_ts.xs(random_fips)["predicted_change_lag8"].plot(ax=ax)
ax2 = ax.twiny()
intervention_ts.xs(random_fips)["Rt_percent_decrease"].plot(style='r', ax=ax2, secondary_y=True)
print(random_fips)