In [1]:
import sys
sys.path.append('../') 

In [2]:
%load_ext autoreload
%autoreload 2
import sklearn
import copy
import numpy as np

import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
# from viz import viz
from bokeh.plotting import figure, show, output_notebook, output_file, save
from functions import merge_data
from sklearn.model_selection import RandomizedSearchCV
import load_data


from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from fit_and_predict import fit_and_predict

  import pandas.util.testing as tm


In [3]:
preds_df = pd.read_pickle("multi_day_2.pkl")

In [4]:
def l1(arr1,arr2):
    return sum([np.abs(a1-a2) for (a1,a2) in zip(arr1,arr2)])/len(arr1)

In [5]:
outcome = np.array([preds_df['deaths'].values[i][-1] for i in range(len(preds_df))])
residuals = {}
for days_ahead in [1, 2, 3]:
    for lower_threshold in [0, 10]:
        colname = f'{days_ahead} day, deaths>={lower_threshold}'
        residuals[colname] = []
        for method in ['exponential', 'shared_exponential', 'ensemble']:
            key = f'predicted_deaths_{method}_{days_ahead}'
            preds = [p[-1] for p in preds_df[key][outcome > lower_threshold]]
            residuals[colname].append(l1(outcome[outcome > lower_threshold],preds))
            if method == 'shared_exponential':
                key = f'predicted_deaths_{method}_{days_ahead}_demographics'
                preds = [p[-1] for p in preds_df[key][outcome > lower_threshold]]
                residuals[colname].append(l1(outcome[outcome > lower_threshold],preds))    

In [6]:
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

In [7]:
res_df = pd.DataFrame(residuals, index=['exponential', 'shared', 'demographics', 'ensemble'])
res_df = res_df.astype(float).round(2)
res_df.style.highlight_min().format("{:.2f}")

Unnamed: 0,"1 day, deaths>=0","1 day, deaths>=10","2 day, deaths>=0","2 day, deaths>=10","3 day, deaths>=0","3 day, deaths>=10"
exponential,1.34,9.67,2.29,14.84,3.63,22.91
shared,1.8,6.49,3.83,11.7,6.08,12.35
demographics,2.09,7.85,4.57,7.87,7.88,10.84
ensemble,1.46,7.89,2.44,12.5,3.21,10.15


In [8]:
import plotly.express as px

In [9]:
outcome = np.array([preds_df['deaths'].values[i][-1] for i in range(len(preds_df))])
preds_df['true_outcome'] = outcome
print(np.array([p[-1] for p in preds_df['predicted_deaths_ensemble_3']]))
preds_df['3_day_ahead_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_3']]
preds_df = preds_df[preds_df.true_outcome > 10]
fig = px.scatter(preds_df, x='true_outcome', y='3_day_ahead_pred')

[280.47081928 164.04023725 175.94218632 ...   2.14455634   2.14455634
   2.14455634]


In [15]:
preds_df.keys()

Index(['predicted_deaths_ensemble_1',
       'predicted_deaths_shared_exponential_1_demographics',
       'predicted_deaths_exponential_1',
       'predicted_deaths_shared_exponential_1',
       'predicted_deaths_exponential_2',
       'predicted_deaths_shared_exponential_2',
       'predicted_deaths_shared_exponential_2_demographics',
       'predicted_deaths_ensemble_2', 'predicted_deaths_exponential_3',
       'predicted_deaths_shared_exponential_3',
       'predicted_deaths_shared_exponential_3_demographics',
       'predicted_deaths_ensemble_3', 'countyFIPS', 'CountyNamew/StateAbbrev',
       'deaths', 'true_outcome', '3_day_ahead_pred'],
      dtype='object')

In [10]:
preds_df_2 = preds_df[preds_df.true_outcome > 80]
fig = px.scatter(preds_df_2, x='true_outcome', y='3_day_ahead_pred', text='CountyNamew/StateAbbrev')

In [None]:
fig.update_traces(textposition='bottom center')
fig.update_layout(xaxis_type="log", yaxis_type="log")
fig.add_shape(
        # Line reference to the axes
            type="line",
            xref="x",
            yref="y",
            x0=80,
            y0=80,
            x1=320,
            y1=320,
            line=dict(
                color="LightSeaGreen",
                width=3,
            ),
        )
fig.update_layout(
    title="Actual deaths by 3/29 vs. our predictions on 3/26",
    xaxis_title="Actual deaths",
    yaxis_title="3 day ahead prediction",
    font = dict(
        family='sans-serif', 
        size=12,
    )
)
fig.update_layout(
    title={
        'text': "Actual deaths by 3/29 vs. our predictions on 3/26",
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [None]:
fig.show()

In [12]:
preds_df['CountyName'] = preds_df['CountyNamew/StateAbbrev']
preds_df_3 = preds_df[preds_df.CountyName.isin(['Wayne, MI',
                                                'Orleans, LA',
                                                'Los Angeles, CA',
                                                'Santa Clara, CA',
                                                'Snohomish, WA',
                                                'Dougherty, GA'])]
fig = px.scatter(preds_df_3, x='true_outcome', y='3_day_ahead_pred', text='CountyNamew/StateAbbrev')

In [None]:
fig.update_traces(textposition='bottom center')
fig.update_layout(xaxis_type="log", yaxis_type="log")
fig.add_shape(
        # Line reference to the axes
            type="line",
            xref="x",
            yref="y",
            x0=10,
            y0=10,
            x1=80,
            y1=80,
            line=dict(
                color="LightSeaGreen",
                width=3,
            ),
        )
fig.update_layout(
    title="Recorded deaths by 3/29 vs. our predictions on 3/26",
    xaxis_title="Actual deaths",
    yaxis_title="3 day ahead prediction",
    font = dict(
        family='sans-serif', 
        size=12,
    )
)
fig.update_layout(
    title={
        'text': "Recorded deaths by 3/29 vs. our predictions on 3/26",
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [16]:
preds_df_abr = preds_df[['predicted_deaths_ensemble_1', 
                         'predicted_deaths_ensemble_2',
                        'predicted_deaths_ensemble_3',
                        'true_outcome',
                        'CountyName']]

In [17]:
preds_df['3_day_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_3']]
preds_df['2_day_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_2']]
preds_df['1_day_pred'] = [p[-1] for p in preds_df['predicted_deaths_ensemble_1']]
preds_df['Recorded deaths'] = preds_df['true_outcome']

In [18]:
preds_df_abr = preds_df[['3_day_pred', 
                         '2_day_pred',
                        '1_day_pred',
                        'Recorded deaths',
                        'CountyName']]

In [19]:
preds_df_abr

Unnamed: 0,3_day_pred,2_day_pred,1_day_pred,Recorded deaths,CountyName
1841,280.470819,233.100785,269.767828,253,"Queens, NY"
1803,164.040237,153.740151,192.646812,188,"Bronx, NY"
1824,175.942186,154.131096,211.072976,185,"Kings, NY"
2941,142.494289,155.249127,158.673913,144,"King, WA"
1831,97.256739,91.600224,114.239915,103,"New York, NY"
1121,91.446599,87.586452,86.36197,73,"Orleans, LA"
1285,65.674691,62.528152,58.976603,56,"Wayne, MI"
1843,69.606319,70.735256,53.774192,46,"Richmond, NY"
1852,59.399289,43.394429,45.95581,40,"Suffolk, NY"
583,34.572107,30.60334,35.053611,40,"Cook, IL"


In [20]:
preds_df_abr.to_csv("ensemble_predictions.csv")