In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from datetime import datetime

from db_queries import get_location_metadata

import statsmodels.api as sm

pd.options.display.max_rows = 200

import warnings
warnings.simplefilter('ignore')


In [None]:
def slope_func(df, window, slope_var):
    """Assumes cumulative!!!"""
    rise = df.loc[df['Window'] == window, slope_var].max() - df.loc[df['Window'] == window, slope_var].min()
    run = df.loc[df['Window'] == window, 'Window length'].values[0]
    
    return rise / run


def get_slope(df, threshold=-15):
    location_id = df.index.unique().item()
    df['intercept'] = 1
    
    df = df.sort_values('Date').set_index('Date')
    df = df.asfreq('D', method='pad')
    
    df['Confirmed case rate'] = df['Confirmed case rate'].fillna(method='pad')
    df.loc[(df['Confirmed case rate'] == 0) | (df['Confirmed case rate'].isnull()), 
           'Confirmed case rate'] = 0.1 / df['population'].unique().item()
    df['ln(case rate)'] = np.log(df['Confirmed case rate'])
    
    df['Testing rate'] = df['Testing rate'].fillna(method='pad')
    df.loc[(df['Testing rate'] == 0) | (df['Testing rate'].isnull()), 
           'Testing rate'] = 0.1 / df['population'].unique().item()
    df['ln(testing rate)'] = np.log(df['Testing rate'])

    df['Death rate'] = df['Death rate'].fillna(method='pad')
    df.loc[(df['Death rate'] == 0) | (df['Death rate'].isnull()), 
           'Death rate'] = 0.1 / df['population'].unique().item()
    df['ln(death rate)'] = np.log(df['Death rate'])

    df = df.loc[df['ln(death rate)'] >= threshold]

    df['Window'] = np.floor(np.arange(len(df)) / 7)
    df['Window'] = df['Window'].astype(int)
    df['Window length'] = df.groupby('Window')['Window'].transform('count')
    # df = df.loc[df['Window length'] == 7]

    coef_dfs = []
    for window in range(df['Window'].max()+1):
        # case_slope = sm.OLS(df.loc[df['Window'] == window, 'ln(case rate)'],
        #                     np.vstack([
        #                         np.ones(7), np.arange(7)
        #                     ]).T).fit().params[1]
        case_slope = slope_func(df, window, 'ln(case rate)')
        # testing_slope = sm.OLS(df.loc[df['Window'] == window, 'ln(testing rate)'],
        #                        np.vstack([
        #                            np.ones(7), np.arange(7)
        #                        ]).T).fit().params[1]
        testing_slope = slope_func(df, window, 'ln(testing rate)')
        # death_slope = sm.OLS(df.loc[df['Window'] == window, 'ln(death rate)'],
        #                     np.vstack([
        #                         np.ones(7), np.arange(7)
        #                     ]).T).fit().params[1]
        death_slope = slope_func(df, window, 'ln(death rate)')
        coef_dfs.append(pd.DataFrame({'Window': window,
                                      'Case slope': case_slope,
                                      'Testing slope':testing_slope,
                                      'Death slope': death_slope}, index=[0]))
    coef_df = pd.concat(coef_dfs)
    coef_df['location_id'] = location_id
    
    min_test_df = df.groupby('Window')['ln(testing rate)'].min()
    max_test_df = df.groupby('Window')['ln(testing rate)'].max()
    test_df = np.exp(max_test_df) - np.exp(min_test_df)
    test_df = test_df.reset_index()
    test_df = test_df.rename(index=str, columns={'ln(testing rate)':'Weekly tests'})
    coef_df = coef_df.merge(test_df)
    
    df = df.reset_index()
    df['location_id'] = location_id
    
    return df, coef_df


In [None]:
loc_df = get_location_metadata(location_set_id=111, location_set_version_id=664)
loc_df = loc_df.loc[loc_df['path_to_top_parent'].str.startswith('102,')]
loc_df = loc_df.loc[loc_df['most_detailed'] == 1]
loc_df = loc_df[['location_id', 'location_name']].reset_index(drop=True)

df = pd.read_csv('/ihme/covid-19/model-inputs/best/full_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = df.loc[df['location_id'].isin(loc_df['location_id'].to_list())]
df['location_id'] = df['location_id'].astype(int)
df['Confirmed case rate'] = df['Confirmed'] / df['population']
df = df[['location_id', 'Date', 'Confirmed case rate', 'Death rate', 'population']].reset_index(drop=True)
df = df.sort_values(['location_id', 'Date']).reset_index(drop=True)


test_df = pd.read_csv('/ihme/covid-19/snapshot-data/best/covid_onedrive/Testing/us_states_tests.csv')
test_df['Date'] = pd.to_datetime(test_df['date'], format='%d.%m.%Y')
test_df = test_df.loc[test_df['location_id'].isin(loc_df['location_id'].to_list())]
test_df['location_id'] = test_df['location_id'].astype(int)
test_df = test_df.rename(index=str, columns={'totaltestresults':'Tests'})

df = df.merge(test_df[['location_id', 'Date', 'Tests']])
df['Testing rate'] = df['Tests'] / df['population']
del df['Tests']
df = df.sort_values(['location_id', 'Date']).reset_index(drop=True)


In [None]:
case_df = df[['location_id', 'Date', 'Confirmed case rate', 'Testing rate', 'population']].copy()
death_df = df[['location_id', 'Date', 'Death rate']].copy()
death_df['Date'] = death_df['Date'].apply(lambda x: x - pd.Timedelta(days=8))

shift_df = case_df.merge(death_df)
shift_df = shift_df.set_index('location_id')


In [None]:
slope_output = [get_slope(shift_df.loc[location_id]) for location_id in shift_df.index.unique()]
full_df = (pd.concat([s[0] for s in slope_output])).reset_index(drop=True)
slope_df = (pd.concat([s[1] for s in slope_output])).reset_index(drop=True)
full_df = full_df.merge(slope_df)

mod_df = slope_df.copy()
mod_df['intercept'] = 1
# mod_df['Testing rate'] = np.exp(mod_df['ln(testing rate)'])
mod_df['Window'] = f'Week ' + (mod_df['Window'] + 1).astype(str)
mod_df = pd.concat([mod_df, pd.get_dummies(mod_df['Window'])], axis=1)
dep_var = 'Death slope'
indep_vars = ['Case slope', 'Weekly tests'] + [f'Week {i + 1}' for i in list(range(8))]
mod = sm.OLS(mod_df[dep_var], mod_df[indep_vars]).fit()
mod_df['predicted_slope'] = mod.predict()

full_df['Window'] = f'Week ' + (full_df['Window'] + 1).astype(str)
full_df = pd.concat([full_df, pd.get_dummies(full_df['Window'])], axis=1)
full_df['predicted_slope'] = mod.predict(full_df[indep_vars])

mod.summary()


In [None]:
plt.scatter(mod_df['Death slope'],
            mod_df['predicted_slope'])
plt.plot((0, 0.4), (0, 0.4), color='red')
plt.show()
slope_df

In [None]:
plt.figure(figsize=(11, 8.5))
for window in slope_df['Window'].unique():
    plt.scatter(slope_df.loc[slope_df['Window'] == window, 'Case slope'],
                slope_df.loc[slope_df['Window'] == window, 'Death slope'],
                label=f'Week {window+1}')
    case_slopes = np.arange(-0.01, 0.61, 0.01)
    fit = mod.params['Case slope'] * case_slopes
    week = f'Week {window+1}'
    fit += mod.params[week]
    plt.plot(case_slopes, fit, alpha=0.5)
plt.plot((-0.01, 0.6), (-0.01, 0.6), 
         color='black', alpha=0.5, linestyle='--')
plt.legend(fontsize=10)
plt.xlabel('Slope in ln(cumulatve reported case rate)', fontsize=12)
plt.ylabel('Slope in ln(cumulatve death rate)', fontsize=12)
plt.ylim(-0.01, 0.6)
plt.xlim(-0.01, 0.6)
plt.show()


In [None]:
# slope_df['slope(c) - slope(d)'] = slope_df['Case slope'] - slope_df['Death slope']

# plt.figure(figsize=(11, 8.5))
# for window in slope_df['Window'].unique():
#     plt.scatter(slope_df.loc[slope_df['Window'] == window, 'Testing slope'],
#                 slope_df.loc[slope_df['Window'] == window, 'slope(c) - slope(d)'],
#                 label=window)
# plt.plot((0, 0.6), (0, 0.6))
# plt.axhline(0, color='black', alpha=0.5)
# plt.xlabel('Testing slope')
# plt.ylabel('Case slope - death slope')
# plt.legend()
# plt.show()


In [None]:
with PdfPages('/ihme/homes/rmbarber/covid-19/obs_v_fromcases.pdf') as pdf:
    for location_id in full_df['location_id'].unique():
        location_name = loc_df.loc[loc_df['location_id'] == location_id, 'location_name'].item()
        plot_df = full_df.loc[full_df['location_id'] == location_id].reset_index(drop=True)
        start_cumulative = plot_df['ln(death rate)'][0] - plot_df['predicted_slope'][0]
        #plot_df['predicted_cumulative'] = start_cumulative + plot_df['predicted_slope'].cumsum()
        for window in plot_df.Window.unique():
            plot_df.loc[plot_df['Window'] == window, 'predicted_cumulative'] = \
                start_cumulative + plot_df.loc[plot_df['Window'] == window, 'predicted_slope'].cumsum()
            start_cumulative = plot_df.loc[plot_df['Window'] == window, 'predicted_cumulative'].max()
        
        plot_df['deaths'] = np.exp(plot_df['ln(death rate)']) * plot_df['population']
        plot_df['pred_deaths'] = np.exp(plot_df['predicted_cumulative']) * plot_df['population']
        
        fig, ax = plt.subplots(1, 2, figsize=(16.5,8.5))
        ax[0].plot(plot_df['Date'], plot_df['deaths'])
        ax[0].plot(plot_df['Date'], plot_df['pred_deaths'])
        
        ax[1].plot(plot_df['Date'][1:], 
                   plot_df['deaths'].values[1:] - plot_df['deaths'].values[:-1])
        ax[1].plot(plot_df['Date'][1:], 
                   plot_df['pred_deaths'].values[1:] - plot_df['pred_deaths'].values[:-1])
        
        plt.suptitle(location_name)
        pdf.savefig()
