In [2]:
import sys
sys.path.append('../') 

In [3]:
%load_ext autoreload
%autoreload 2
import sklearn
import copy
import numpy as np

import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
# from viz import viz
from bokeh.plotting import figure, show, output_notebook, output_file, save
from functions import merge_data
from sklearn.model_selection import RandomizedSearchCV
import load_data


from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from fit_and_predict import fit_and_predict

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
preds_df = pd.read_pickle("multi_day_2.pkl")

In [6]:
def l1(arr1,arr2):
    return sum([np.abs(a1-a2) for (a1,a2) in zip(arr1,arr2)])/len(arr1)

In [7]:
outcome = np.array([preds_df['deaths'].values[i][-1] for i in range(len(preds_df))])
residuals = {}
for days_ahead in [1, 2, 3]:
    for lower_threshold in [0, 10]:
        colname = f'{days_ahead} day, deaths>={lower_threshold}'
        residuals[colname] = []
        for method in ['exponential', 'shared_exponential', 'ensemble']:
            key = f'predicted_deaths_{method}_{days_ahead}'
            preds = [p[-1] for p in preds_df[key][outcome > lower_threshold]]
            residuals[colname].append(l1(outcome[outcome > lower_threshold],preds))
            if method == 'shared_exponential':
                key = f'predicted_deaths_{method}_{days_ahead}_demographics'
                preds = [p[-1] for p in preds_df[key][outcome > lower_threshold]]
                residuals[colname].append(l1(outcome[outcome > lower_threshold],preds))    

In [81]:
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

In [8]:
res_df = pd.DataFrame(residuals, index=['exponential', 'shared', 'demographics', 'ensemble'])
res_df = res_df.astype(float).round(2)
res_df.style.highlight_min().format("{:.2f}")

Unnamed: 0,"1 day, deaths>=0","1 day, deaths>=10","2 day, deaths>=0","2 day, deaths>=10","3 day, deaths>=0","3 day, deaths>=10"
exponential,1.32,10.21,2.11,13.58,3.42,21.04
shared,1.81,7.25,3.79,12.56,6.02,13.21
demographics,2.12,8.7,4.58,7.97,8.03,10.62
ensemble,1.46,8.55,2.35,12.37,3.12,9.5


In [9]:
import plotly.express as px

In [10]:
outcome = np.array([preds_df['deaths'].values[i][-1] for i in range(len(preds_df))])
preds_df['true_outcome'] = outcome
print(np.array([p[-1] for p in preds_df['predicted_deaths_ensemble_3']]))
preds_df['3_day_ahead_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_3']]
preds_df = preds_df[preds_df.true_outcome > 10]
fig = px.scatter(preds_df, x='true_outcome', y='3_day_ahead_pred')

[280.80177531 164.12438191 176.03251198 ...   2.10943328   2.10943328
   2.10943328]


In [17]:
preds_df.keys()

Index(['predicted_deaths_exponential_1',
       'predicted_deaths_shared_exponential_1',
       'predicted_deaths_shared_exponential_1_demographics',
       'predicted_deaths_ensemble_1', 'predicted_deaths_exponential_2',
       'predicted_deaths_shared_exponential_2_demographics',
       'predicted_deaths_exponential_3',
       'predicted_deaths_shared_exponential_3_demographics',
       'predicted_deaths_exponential_7',
       'predicted_deaths_shared_exponential_7_demographics',
       'predicted_deaths_shared_exponential_2', 'predicted_deaths_ensemble_2',
       'predicted_deaths_shared_exponential_3', 'predicted_deaths_ensemble_3',
       'predicted_deaths_shared_exponential_7', 'predicted_deaths_ensemble_7',
       'countyFIPS', 'CountyNamew/StateAbbrev', 'deaths', 'true_outcome',
       '3_day_ahead_pred'],
      dtype='object')

In [44]:
preds_df_2 = preds_df[preds_df.true_outcome > 50]
fig = px.scatter(preds_df_2, x='true_outcome', y='3_day_ahead_pred', text='CountyNamew/StateAbbrev')

In [50]:
fig.update_traces(textposition='bottom center')
fig.update_layout(xaxis_type="log", yaxis_type="log")
fig.add_shape(
        # Line reference to the axes
            type="line",
            xref="x",
            yref="y",
            x0=50,
            y0=50,
            x1=320,
            y1=320,
            line=dict(
                color="LightSeaGreen",
                width=3,
            ),
        )
fig.update_layout(
    title="Actual deaths by 3/29 vs. our predictions on 3/26",
    xaxis_title="Actual deaths",
    yaxis_title="3 day ahead prediction",
    font = dict(
        family='sans-serif', 
        size=12,
    )
)
fig.update_layout(
    title={
        'text': "Actual deaths by 3/29 vs. our predictions on 3/26",
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [15]:
fig.show()

In [None]:
preds_dict = {}
for d in [1, 2, 3, 7]:
    fit_and_predict(train_df, 
                    test_df, 
                    outcome=outcome_type, 
                    mode='eval_mode',
                    method='ensemble',
                    demographic_vars=[],
                    target_day=np.array([d]))
    preds_dict[d] = np.array([p[-1] for p in test_df[f'predicted_deaths_ensemble_{d}']])
preds_dict['FIPS'] = test_df['countyFIPS']
preds_dict['outcome'] = np.array([df[outcome_type].values[i][-1] for i in range(len(df))])
preds_dict['county'] = df['CountyNamew/StateAbbrev'].values
preds_df = pd.DataFrame(preds_dict)
preds_df.to_pickle('multi_day_1.pkl')