In [1]:
import sys
sys.path.append('../') 

In [2]:
%load_ext autoreload
%autoreload 2
import sklearn
import copy
import numpy as np

import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
# from viz import viz
from bokeh.plotting import figure, show, output_notebook, output_file, save
from functions import merge_data
from sklearn.model_selection import RandomizedSearchCV
import load_data


from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from fit_and_predict import fit_and_predict

  import pandas.util.testing as tm


In [71]:
preds_df = pd.read_pickle("multi_day_6.pkl")

In [4]:
def l1(arr1,arr2):
    return sum([np.abs(a1-a2) for (a1,a2) in zip(arr1,arr2)])/len(arr1)

In [72]:
outcome = np.array([preds_df['deaths'].values[i][-1] for i in range(len(preds_df))])
residuals = {}
for days_ahead in [1, 2, 3]:
    for lower_threshold in [0, 20, 80]:
        colname = f'{days_ahead} day, deaths>={lower_threshold}'
        residuals[colname] = []
        for method in ['exponential', 'shared_exponential', 'ensemble']:
            key = f'predicted_deaths_{method}_{days_ahead}'
            preds = [p for p in preds_df[key][outcome > lower_threshold]]
            residuals[colname].append(l1(outcome[outcome > lower_threshold],preds))
            if method == 'shared_exponential':
                key = f'predicted_deaths_{method}_{days_ahead}_demographics'
                preds = [p[-1] for p in preds_df[key][outcome > lower_threshold]]
                residuals[colname].append(l1(outcome[outcome > lower_threshold],preds))    

In [6]:
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

In [73]:
res_df = pd.DataFrame(residuals, index=['exponential', 'shared', 'demographics', 'ensemble'])
res_df = res_df.astype(float).round(2)
res_df.style.highlight_min().format("{:.2f}")

Unnamed: 0,"1 day, deaths>=0","1 day, deaths>=20","1 day, deaths>=80","2 day, deaths>=0","2 day, deaths>=20","2 day, deaths>=80","3 day, deaths>=0","3 day, deaths>=20","3 day, deaths>=80"
exponential,1.28,10.8,22.07,2.86,22.57,41.04,4.03,29.76,23.34
shared,1.48,5.82,8.29,3.53,11.77,20.2,6.21,19.61,42.49
demographics,1.81,8.8,16.87,4.52,20.58,43.63,9.29,13.79,22.27
ensemble,1.18,6.84,11.76,2.31,13.16,24.29,3.4,13.4,16.17


In [8]:
import plotly.express as px

In [26]:
outcome = np.array([preds_df['deaths'].values[i][-1] for i in range(len(preds_df))])
preds_df['true_outcome'] = outcome
print(np.array([p[-1] for p in preds_df['predicted_deaths_ensemble_3']]))
preds_df['3_day_ahead_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_3']]
preds_df = preds_df[preds_df.true_outcome > 10]
fig = px.scatter(preds_df, x='true_outcome', y='3_day_ahead_pred')

[288.53517429 187.91087188 188.56513915 ...   2.05381077   2.05381077
   2.05381077]


In [15]:
preds_df.keys()

Index(['predicted_deaths_ensemble_1',
       'predicted_deaths_shared_exponential_1_demographics',
       'predicted_deaths_exponential_1',
       'predicted_deaths_shared_exponential_1',
       'predicted_deaths_exponential_2',
       'predicted_deaths_shared_exponential_2',
       'predicted_deaths_shared_exponential_2_demographics',
       'predicted_deaths_ensemble_2', 'predicted_deaths_exponential_3',
       'predicted_deaths_shared_exponential_3',
       'predicted_deaths_shared_exponential_3_demographics',
       'predicted_deaths_ensemble_3', 'countyFIPS', 'CountyNamew/StateAbbrev',
       'deaths', 'true_outcome', '3_day_ahead_pred'],
      dtype='object')

In [27]:
preds_df_2 = preds_df[preds_df.true_outcome > 80]
fig = px.scatter(preds_df_2, x='true_outcome', y='3_day_ahead_pred', text='CountyNamew/StateAbbrev')

In [28]:
fig.update_traces(textposition='bottom center')
fig.update_layout(xaxis_type="log", yaxis_type="log")
fig.add_shape(
        # Line reference to the axes
            type="line",
            xref="x",
            yref="y",
            x0=80,
            y0=80,
            x1=320,
            y1=320,
            line=dict(
                color="LightSeaGreen",
                width=3,
            ),
        )
fig.update_layout(
    title="Actual deaths by 3/29 vs. our predictions on 3/26",
    xaxis_title="Actual deaths",
    yaxis_title="3 day ahead prediction",
    font = dict(
        family='sans-serif', 
        size=12,
    )
)
fig.update_layout(
    title={
        'text': "Actual deaths by 3/29 vs. our predictions on 3/26",
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [None]:
fig.show()

In [31]:
preds_df['CountyName'] = preds_df['CountyNamew/StateAbbrev']
preds_df_3 = preds_df[preds_df.CountyName.isin(['Wayne, MI',
                                                'Orleans, LA',
                                                'Los Angeles, CA',
                                                'Santa Clara, CA',
                                                'Snohomish, WA',
                                                'Dougherty, GA'])]
fig = px.scatter(preds_df_3, x='true_outcome', y='3_day_ahead_pred', text='CountyNamew/StateAbbrev')

In [None]:
fig.update_traces(textposition='bottom center')
fig.update_layout(xaxis_type="log", yaxis_type="log")
fig.add_shape(
        # Line reference to the axes
            type="line",
            xref="x",
            yref="y",
            x0=10,
            y0=10,
            x1=80,
            y1=80,
            line=dict(
                color="LightSeaGreen",
                width=3,
            ),
        )
fig.update_layout(
    title="Recorded deaths by 3/29 vs. our predictions on 3/26",
    xaxis_title="Actual deaths",
    yaxis_title="3 day ahead prediction",
    font = dict(
        family='sans-serif', 
        size=12,
    )
)
fig.update_layout(
    title={
        'text': "Recorded deaths by 3/29 vs. our predictions on 3/26",
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [16]:
preds_df_abr = preds_df[['predicted_deaths_ensemble_1', 
                         'predicted_deaths_ensemble_2',
                        'predicted_deaths_ensemble_3',
                        'true_outcome',
                        'CountyName']]

In [34]:
preds_df['3_day_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_3']]
preds_df['2_day_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_2']]
preds_df['1_day_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_1']]
preds_df['Recorded deaths 3/30'] = preds_df['true_outcome']

In [35]:
preds_df_abr = preds_df[['3_day_pred', 
                         '2_day_pred',
                        '1_day_pred',
                        'Recorded deaths 3/30',
                        'CountyName']]

In [36]:
preds_df_abr

Unnamed: 0,3_day_pred,2_day_pred,1_day_pred,Recorded deaths 3/30,CountyName
1841,288.535174,339.942444,316.247157,305.0,"Queens, NY"
1824,187.910872,273.286416,239.75613,216.0,"Kings, NY"
1803,188.565139,247.304473,240.532654,215.0,"Bronx, NY"
2941,171.575725,183.973404,165.554366,150.0,"King, WA"
1831,106.926821,140.500309,125.229781,119.0,"New York, NY"
1121,107.389246,106.009022,88.076551,86.0,"Orleans, LA"
1285,81.180554,74.967307,70.088589,83.0,"Wayne, MI"
1843,102.618831,67.332868,56.770569,58.0,"Richmond, NY"
1830,66.310665,56.047427,44.193175,48.0,"Nassau, NY"
1266,94.063903,58.916275,39.812686,48.0,"Oakland, MI"


In [37]:
preds_df_abr.to_csv("ensemble_predictions.csv")

In [44]:
np.diff(preds_df['deaths'].values[6])

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  1.,  0.,  2.,  1.,  1.,  3.,  5.,  8.,  5., 11.,
        9., 10., 27.])

In [69]:
weighted_preds = [np.zeros(1) for i in range(5)]

In [70]:
weighted_preds[0]

array([0.])