In [2]:
import pandas as pd
import numpy as np
import os, time, stat, io, glob, pickle
from scipy.stats import gamma, norm
from sklearn.linear_model import LinearRegression
from IPython.core.display import display, HTML

import matplotlib.pyplot as plt
import matplotlib as mpl
plt.style.use('fivethirtyeight')
import plotly.io as pio

%load_ext autoreload
%autoreload 2
from covid_data_helper import *
from coronita_chart_helper import *
from coronita_model_helper import *
from coronita_web_helper import *
from coronita_bokeh_helper import *

from matplotlib.backends.backend_pdf import PdfPages
from PyPDF2 import PdfFileWriter, PdfFileReader

def footnote_str_maker():
    footnote_str = 'Author: Michael Donnelly | twtr: @donnellymjd | www.covidoutlook.info\nChart created on {}'.format(
        pd.Timestamp.today().strftime("%d %b, %Y at %I:%M %p"))
    return footnote_str

def add_plotly_footnote(fig):
    fig.update_layout(
                  annotations=[
                      dict(x = 0, y = -0.06, font_size=10, showarrow=False,
                           xref='paper', yref='paper',
                           xanchor='left', yanchor='auto', xshift=0, yshift=0,
                           text='Author: Michael Donnelly | twtr: <a href="https://twitter.com/donnellymjd">@donnellymjd</a> | <a href="http://{0}">{0}</a>'.format(
                           'www.michaeldonnel.ly')
                          ),
                      dict(x = 0, y = -0.09, font_size=10, showarrow=False,
                           xref='paper', yref='paper',
                           xanchor='left', yanchor='auto', xshift=0, yshift=0,
                           text='Chart created on {}'.format(pd.Timestamp.today().strftime("%d %b %Y"))
                          )
                  ]
                 )
    return fig


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
## DATA INGESTION ##

df_st_testing = get_covid19_tracking_data()

df_census = get_census_pop()

counties_geo = get_counties_geo()

# df_jhu_counties = get_jhu_counties()

df_st_testing_fmt = df_st_testing.copy()
df_st_testing_fmt = df_st_testing_fmt.rename(columns={'death':'deaths','positive':'cases'}).unstack('code')

try:
    df_interventions = get_state_policy_events()
except:
    df_interventions = pd.DataFrame()

df_goog_mob_us = get_goog_mvmt_us()
df_goog_mob_state = get_goog_mvmt_state(df_goog_mob_us)

df_counties = get_complete_county_data(df_census, df_goog_mob_us)

df_hhs_hosp = get_hhs_hosp()

df_can = get_can_data()
df_cancounties = get_can_counties_data()

df_vax_hes = get_vax_hesitancy_data()

df_nys_pub = get_nysdoh_data()


State Testing Data Last Observation:  20210307
Got COVID19 Tracking Data
Got CDC Death Data
Got HHS testing data
Got HHS hospital data.
Got Census Data
Got counties geo json
Got KFF Policy dates
Got Google Movement Data
Got NYS DOH data
Got NYT county level data.
Got NYC DOH data
Got Complete County Time Series Data
Got HHS hospitalization data.
Got COVID Act Now data.
Got Census Data
Got HHS Vaccine Hesitancy Data.
Got NYS DOH data


In [4]:
list_of_files = glob.glob('./output/nys_fore/df_fore_allregions_*.pkl') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)
df_fore_allregions = pd.read_pickle(latest_file)

list_of_files = glob.glob('./output/nys_fore/allregion_model_dicts_*.pkl') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)
with open(latest_file, 'rb') as handle:
    allregion_model_dicts = pickle.load(handle)
    
list_of_files = glob.glob('./data/202*-NYS-COVID-19-Data-vShare.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)
dict_of_dfs = pd.read_excel(latest_file, sheet_name=None, skiprows=0, engine='openpyxl')

list_of_files = glob.glob('./data/*-Interventions-v*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)
df_interventions = pd.read_excel(latest_file, engine='openpyxl').iloc[:,1:]

df_interventions = df_interventions.rename(columns={'Date':'dt','Region(s)':'region','Event':'event_name'})
keepevents = ['State of emergency declaration','Schools closed','Closure of non-essential businesses']
df_interventions = df_interventions[((df_interventions.region == 'Statewide (all)') 
                        & (df_interventions.event_name.isin(keepevents))) 
                       | (df_interventions.region != 'Statewide (all)')]
df_interventions['social_distancing_direction'] = 'holiday'
df_interventions.head()


./output/nys_fore/df_fore_allregions_20210422.pkl
./output/nys_fore/allregion_model_dicts_20210422.pkl
./data/20210426-NYS-COVID-19-Data-vShare.xlsx
./data/20210426-Interventions-v1000.xlsx


Unnamed: 0,dt,region,event_name,Source Title,Source Link,social_distancing_direction
3,2020-03-07,Statewide (all),State of emergency declaration,NYS EO 202,https://www.governor.ny.gov/news/no-202-declar...,holiday
9,2020-03-18,Statewide (all),Schools closed,NYS EO 202.4,https://www.governor.ny.gov/news/no-2024-conti...,holiday
13,2020-03-22,Statewide (all),Closure of non-essential businesses,NYS EO 202.8,https://www.governor.ny.gov/news/no-2028-conti...,holiday
14,2020-05-15,Central New York,Phase I re-opening (Construction and Manufactu...,NYS EO 202.31,https://www.governor.ny.gov/news/no-20231-cont...,holiday
15,2020-05-15,Finger Lakes,Phase I re-opening (Construction and Manufactu...,NYS EO 202.31,https://www.governor.ny.gov/news/no-20231-cont...,holiday


In [5]:
def read_nys_priv_hist_data(df_hist, skiprows=1, df_census=df_census):
    df_hist.columns = df_hist.iloc[skiprows-1]
    df_hist = df_hist.iloc[skiprows:].copy()
    df_hist['County'] = df_hist['County'].replace({'St.Lawrence':'St. Lawrence'})
    df_hist.columns.name = 'dt'
    df_hist = df_hist.loc[df_hist['County'] !='Total'].dropna(how='all')
    
    if 'Region' in df_hist.columns:
        df_hist = df_hist.loc[~df_hist['Region'].isnull()]
    else:
        df_hist['nys_region'] = np.nan
    
    if 'Governor\'s Economic Region' not in df_hist.columns:
        df_hist['gov_econ_region'] = np.nan
    
    df_hist = df_hist.rename(columns = {'County':'county', 
                              'Region':'nys_region',
                              'Governor\'s Economic Region':'gov_econ_region'})

    df_hist = df_hist.set_index(['county', 'nys_region', 'gov_econ_region'])
    df_hist = pd.to_numeric(df_hist.stack('dt'), errors='coerce')
    df_hist = df_hist.reset_index()
    df_hist['dt'] = pd.to_datetime(df_hist['dt']).dt.normalize()
    df_hist = df_hist.set_index(['county', 'nys_region', 'gov_econ_region','dt'])
    
    df_ny_pop = df_census[(df_census.state=='NY') & (df_census.SUMLEV==50)].copy()
    df_ny_pop = pd.merge(
        df_hist.reset_index()[['county', 'nys_region', 'gov_econ_region']].drop_duplicates(),
        df_ny_pop, 
        on='county', how='left')
    
    return df_hist

def get_nys_static_data(df_census):
    df_ny_pop = df_census.loc[(df_census['state']=='NY') & (df_census['SUMLEV']==50),
                             ['state','county','fips','pop2019', 'pop2019_18plus']].copy()
    df_ny_pop = pd.merge(df_ny_pop,
                         df_nys.reset_index()[['county','nys_region','gov_econ_region']].drop_duplicates(),
                         on='county', how='left'
                        )
    return df_ny_pop


In [6]:
df_nys = pd.DataFrame()

for tab in dict_of_dfs.keys():
    print(tab)
    if tab in ['Summary', 'ICU county', 'BED county']: #,'New hosp death_county_occurred']:
        print('passing this one: ', tab)
        pass
#     elif tab in ['Cumulative hosp deaths_county', 
#                  'Chng hosp death_county_reported',
#                  'New_hosp_admissions_county',
#                  'Daily tests_county',
#                  'Positive tests_county'
#                 ]:
#         this_tab = read_nys_priv_hist_data(dict_of_dfs[tab], nyregionmap, 2)
#         df_nys = pd.concat([df_nys, pd.DataFrame(this_tab, columns=[tab])])
    else:
        try:
            this_tab = read_nys_priv_hist_data(dict_of_dfs[tab], 1)
        except:
            this_tab = read_nys_priv_hist_data(dict_of_dfs[tab], 2)
        this_tab.columns = ['value']
        this_tab['metric'] = tab
        df_nys = pd.concat([df_nys, this_tab], axis=0)

df_ny_pop = get_nys_static_data(df_census)
df_nys = df_nys.reset_index().set_index(
    ['county', 'nys_region', 'gov_econ_region','dt','metric']).unstack('metric')['value']
df_nys = df_nys.sort_index()

display(df_ny_pop.head())
display(df_nys.head())


Summary
passing this one:  Summary
Daily tests_county
Positive tests_county
Total hospitalizations_county
Net change total hosp_county
Gross change total hosp_county
Total ICU intubated_county
Net change ICU intubated_county
Cumulative discharge_county
Change in discharge_county
Cumulative hosp deaths_county
New_hosp_admissions_county
Chng hosp death_county_reported
New hosp death_county_occurred
ICU county
passing this one:  ICU county
BED county
passing this one:  BED county


Unnamed: 0,state,county,fips,pop2019,pop2019_18plus,nys_region,gov_econ_region
0,NY,Albany,36001,305506,242266.258,Rest of NYS,Capital Region
1,NY,Allegany,36003,46091,36550.163,Rest of NYS,Western New York
2,NY,Bronx,36005,1418207,1124638.151,NYC,New York City
3,NY,Broome,36007,190488,151056.984,Rest of NYS,Southern Tier
4,NY,Cattaraugus,36009,76117,60360.781,Rest of NYS,Western New York


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,Change in discharge_county,Chng hosp death_county_reported,Cumulative discharge_county,Cumulative hosp deaths_county,Daily tests_county,Gross change total hosp_county,Net change ICU intubated_county,Net change total hosp_county,New hosp death_county_occurred,New_hosp_admissions_county,Positive tests_county,Total ICU intubated_county,Total hospitalizations_county
county,nys_region,gov_econ_region,dt,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Albany,Rest of NYS,Capital Region,2020-03-03,,,,,0.0,,,,,,0.0,,
Albany,Rest of NYS,Capital Region,2020-03-04,,,,,0.0,,,,0.0,,0.0,,
Albany,Rest of NYS,Capital Region,2020-03-05,,,,,3.0,,,,,,0.0,,
Albany,Rest of NYS,Capital Region,2020-03-06,,,,,0.0,,,,,,0.0,,
Albany,Rest of NYS,Capital Region,2020-03-07,,,,,5.0,,,,,,0.0,,


In [7]:
df_hist = dict_of_dfs['BED county'].copy()
df_hist.iloc[:,0] = df_hist.iloc[:,0].replace({'St.Lawrence':'St. Lawrence',
                                               'New York State': 'Total'
                                              })
first_albany = df_hist.iloc[:,0][df_hist.iloc[:,0] == 'Albany'].index[0]
if first_albany > 0:
    df_hist.columns = df_hist.iloc[first_albany-1]
df_hist = df_hist.loc[first_albany:first_albany+61]
df_hist = df_hist.rename(columns = {'County':'county'})
df_hist = df_hist.set_index(['county'])
df_hist.columns.name = 'dt'
df_hist = pd.to_numeric(df_hist.stack('dt'), errors='coerce')
df_hist = df_hist.reset_index()
df_hist['dt'] = pd.to_datetime(df_hist['dt']).dt.normalize()
df_hist = df_hist.rename(columns={0:'hosp_beds_tot'})
df_nys = pd.merge(df_nys.reset_index(), df_hist, how='left', on=['county','dt']).set_index(df_nys.index.names)



df_hist = dict_of_dfs['BED county'].copy()
df_hist.iloc[:,0] = df_hist.iloc[:,0].replace({'St.Lawrence':'St. Lawrence',
                                               'New York State': 'Total'
                                              })
second_albany = df_hist.iloc[:,0][df_hist.iloc[:,0] == 'Albany'].index[1]

df_hist.columns = df_hist.iloc[second_albany-1]
df_hist = df_hist.loc[second_albany:second_albany+61]
df_hist = df_hist.rename(columns = {'County':'county'})
df_hist = df_hist.set_index(['county'])
df_hist.columns.name = 'dt'
df_hist = pd.to_numeric(df_hist.stack('dt'), errors='coerce')
df_hist = df_hist.reset_index()
df_hist['dt'] = pd.to_datetime(df_hist['dt']).dt.normalize()
df_hist = df_hist.rename(columns={0:'hosp_beds_avail'})
df_nys = pd.merge(df_nys.reset_index(), df_hist, how='left', on=['county','dt']).set_index(df_nys.index.names)
df_nys.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Change in discharge_county,Chng hosp death_county_reported,Cumulative discharge_county,Cumulative hosp deaths_county,Daily tests_county,Gross change total hosp_county,Net change ICU intubated_county,Net change total hosp_county,New hosp death_county_occurred,New_hosp_admissions_county,Positive tests_county,Total ICU intubated_county,Total hospitalizations_county,hosp_beds_tot,hosp_beds_avail
county,nys_region,gov_econ_region,dt,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Albany,Rest of NYS,Capital Region,2020-03-03,,,,,0.0,,,,,,0.0,,,,
Albany,Rest of NYS,Capital Region,2020-03-04,,,,,0.0,,,,0.0,,0.0,,,,
Albany,Rest of NYS,Capital Region,2020-03-05,,,,,3.0,,,,,,0.0,,,,
Albany,Rest of NYS,Capital Region,2020-03-06,,,,,0.0,,,,,,0.0,,,,
Albany,Rest of NYS,Capital Region,2020-03-07,,,,,5.0,,,,,,0.0,,,,


In [8]:
df_ny_pop = pd.merge(df_ny_pop,
                     df_counties.droplevel([1,2])[['cases_per100k']].unstack('fips').iloc[-1].unstack(0).reset_index(),
                     how='left', on='fips')
df_ny_pop.to_csv('./data/nys_region_fips.csv')
df_ny_pop.head()

Unnamed: 0,state,county,fips,pop2019,pop2019_18plus,nys_region,gov_econ_region,cases_per100k
0,NY,Albany,36001,305506,242266.258,Rest of NYS,Capital Region,7847.963706
1,NY,Allegany,36003,46091,36550.163,Rest of NYS,Western New York,7033.911176
2,NY,Bronx,36005,1418207,1124638.151,NYC,New York City,
3,NY,Broome,36007,190488,151056.984,Rest of NYS,Southern Tier,9322.372013
4,NY,Cattaraugus,36009,76117,60360.781,Rest of NYS,Western New York,6923.5519


In [9]:
df_nys_pub_enhanced = pd.merge(df_nys_pub.reset_index(),
                               df_ny_pop[['county','gov_econ_region','fips']],
                               how='left', on='county')

df_nys_pub_enhanced.head()


Unnamed: 0,county,dt,new_positives,cumulative_number_of_positives,total_number_of_tests_performed,cumulative_number_of_tests_performed,gov_econ_region,fips
0,Albany,2020-03-01,0,0,0,0,Capital Region,36001
1,Albany,2020-03-02,0,0,0,0,Capital Region,36001
2,Albany,2020-03-03,0,0,0,0,Capital Region,36001
3,Albany,2020-03-04,0,0,0,0,Capital Region,36001
4,Albany,2020-03-05,0,0,3,3,Capital Region,36001


In [10]:
mvmt_cols = ['retail_and_recreation_percent_change_from_baseline',
       'grocery_and_pharmacy_percent_change_from_baseline',
       'parks_percent_change_from_baseline',
       'transit_stations_percent_change_from_baseline',
       'workplaces_percent_change_from_baseline',
       'residential_percent_change_from_baseline']

df_ny_mvmt = df_counties.query('state=="NY"')[['pop2019','cases_per100k']+mvmt_cols].copy().reset_index()
df_ny_mvmt = pd.merge(df_ny_mvmt, df_ny_pop[['fips','nys_region','gov_econ_region']], on='fips')
df_ny_mvmt.head()

Unnamed: 0,dt,state,county,fips,pop2019,cases_per100k,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline,nys_region,gov_econ_region
0,2020-02-15,NY,Albany,36001,,,8.0,-4.0,45.0,4.0,0.0,0.0,Rest of NYS,Capital Region
1,2020-02-16,NY,Albany,36001,,,7.0,-6.0,8.0,-1.0,2.0,-1.0,Rest of NYS,Capital Region
2,2020-02-17,NY,Albany,36001,,,11.0,2.0,-23.0,-7.0,-41.0,9.0,Rest of NYS,Capital Region
3,2020-02-18,NY,Albany,36001,,,-7.0,-7.0,-33.0,0.0,-9.0,3.0,Rest of NYS,Capital Region
4,2020-02-19,NY,Albany,36001,,,4.0,-3.0,-21.0,2.0,-8.0,2.0,Rest of NYS,Capital Region


In [11]:
this_region_type='gov_econ_region'
df_counties_4map = df_ny_mvmt[['dt','state','county','fips',this_region_type,'cases_per100k']].copy()
df_counties_4map.state = df_counties_4map[this_region_type]
df_counties_4map = df_counties_4map.set_index(['dt','state','county','fips'])
df_counties_4map.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,gov_econ_region,cases_per100k
dt,state,county,fips,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-15,Capital Region,Albany,36001,Capital Region,
2020-02-16,Capital Region,Albany,36001,Capital Region,
2020-02-17,Capital Region,Albany,36001,Capital Region,
2020-02-18,Capital Region,Albany,36001,Capital Region,
2020-02-19,Capital Region,Albany,36001,Capital Region,


In [12]:
region_name = 'New York City'
l_fips = df_ny_pop[df_ny_pop.gov_econ_region == region_name]['fips'].to_list()
l_fips

['36005', '36047', '36061', '36081', '36085']

In [13]:
df_cancounties.query(f'fips in {l_fips}')['actuals.vaccinationsInitiated'].groupby(level='dt').sum()

dt
2020-01-22          0.0
2020-01-23          0.0
2020-01-24          0.0
2020-01-25          0.0
2020-01-26          0.0
                ...    
2021-04-20    3333996.0
2021-04-21          0.0
2021-04-22          0.0
2021-04-23          0.0
2021-04-24          0.0
Name: actuals.vaccinationsInitiated, Length: 459, dtype: float64

In [14]:
df_vax_hes.query(f'fips in {l_fips}')


Unnamed: 0,fips,county,state,est_vax_hes,est_vax_hes_strong,pop2019,pop2019_18plus,est_vax_hes_pop_18plus,est_vax_hes_strong_pop_18plus
1949,36047,"Kings County, New York",NY,0.14,0.06,2559903,2030003.079,284200.43106,121800.18474
2001,36081,"Queens County, New York",NY,0.11,0.05,2253858,1787309.394,196604.03334,89365.4697
2038,36005,"Bronx County, New York",NY,0.16,0.07,1418207,1124638.151,179942.10416,78724.67057
2043,36061,"New York County, New York",NY,0.11,0.05,1628706,1291563.858,142072.02438,64578.1929
2065,36085,"Richmond County, New York",NY,0.12,0.05,476143,377581.399,45309.76788,18879.06995


In [15]:
df_vax_hes.query(f'fips in {l_fips}').sum()[['pop2019', 'pop2019_18plus', 'est_vax_hes_pop_18plus', 'est_vax_hes_strong_pop_18plus']]

pop2019                               8336817
pop2019_18plus                    6611095.881
est_vax_hes_pop_18plus           848128.36082
est_vax_hes_strong_pop_18plus    373347.58786
dtype: object

In [16]:
df_ny_pop.query(f'fips in {l_fips}').sum()

state                                                     NYNYNYNYNY
county                              BronxKingsNew YorkQueensRichmond
fips                                       3600536047360613608136085
pop2019                                                      8336817
pop2019_18plus                                           6611095.881
nys_region                                           NYCNYCNYCNYCNYC
gov_econ_region    New York CityNew York CityNew York CityNew Yor...
cases_per100k                                                    0.0
dtype: object

In [17]:
def make_model_dict_nys(region_name, df_ny_pop, df_nys, covid_params, d_to_forecast = 75,
                        df_mvmt=pd.DataFrame(), df_interventions=pd.DataFrame(), df_can=pd.DataFrame()):
    model_dict = {}
    
    l_fips = df_ny_pop[df_ny_pop.gov_econ_region == region_name]['fips'].to_list()

    model_dict['region_code'] = region_name
    model_dict['region_name'] = region_name
    model_dict['tot_pop'] = df_ny_pop.query(f'fips in {l_fips}').sum()['pop2019']
    model_dict['tot_pop_18plus'] = df_ny_pop.query(f'fips in {l_fips}').sum()['pop2019_18plus']

    model_dict['df_hist'] = pd.DataFrame()

    if df_nys['Cumulative hosp deaths_county'][model_dict['region_code']].dropna().shape[0] > 14:
        model_dict['df_hist']['deaths_tot'] = df_nys['Cumulative hosp deaths_county'][model_dict['region_code']]
        model_dict['df_hist'].loc['2020-03-28', 'deaths_tot'] = np.nan
        model_dict['df_hist']['deaths_tot'] = model_dict['df_hist']['deaths_tot'].interpolate()
        deaths_daily = model_dict['df_hist']['deaths_tot'].diff()
        model_dict['df_hist']['deaths_daily'] = deaths_daily.mask(deaths_daily < 0)

    if df_nys['Total hospitalizations_county'][model_dict['region_code']].dropna().shape[0] > 14:
        model_dict['df_hist']['hosp_concur'] = df_nys['Total hospitalizations_county'][model_dict['region_code']]

    if df_nys['Gross change total hosp_county'][model_dict['region_code']].dropna().shape[0] > 14:
        hosp_admits = df_nys['Gross change total hosp_county'][model_dict['region_code']]
        if hosp_admits.idxmax() == pd.Timestamp('2020-03-28'):
            hosp_admits.loc['2020-03-28':'2020-03-29'] = np.nan
        model_dict['df_hist']['hosp_admits'] = hosp_admits.mask(hosp_admits < 0)
            
    model_dict['df_hist']['cases_tot'] = df_nys['Positive tests_county'][model_dict['region_code']].cumsum()
    cases_daily = model_dict['df_hist']['cases_tot'].diff()
    model_dict['df_hist']['cases_daily'] = cases_daily.mask(cases_daily < 0)
    model_dict['df_hist']['pos_neg_tests_tot'] = df_nys['Daily tests_county'][model_dict['region_code']].cumsum()
    pos_neg_tests_daily = model_dict['df_hist']['pos_neg_tests_tot'].diff()
    model_dict['df_hist']['pos_neg_tests_daily'] = pos_neg_tests_daily.mask(pos_neg_tests_daily < 0)
    
    model_dict['df_hist']['hosp_beds_tot'] = df_nys['hosp_beds_tot'][model_dict['region_code']]
    model_dict['df_hist']['hosp_beds_avail'] = df_nys['hosp_beds_avail'][model_dict['region_code']]

    model_dict['covid_params'] = covid_params.copy()
        
    model_dict = est_all_rts(model_dict)
    model_dict['df_rts'] = model_dict['df_rts_conf'].unstack().swaplevel(axis=1)['rt']
    model_dict['covid_params']['basic_r0'] = model_dict['df_rts']['weighted_average'].max()

    model_dict['d_to_forecast'] = int(d_to_forecast)
    
    model_dict['df_mvmt'] = df_mvmt
    model_dict['df_interventions'] = df_interventions
    
    model_dict['df_hist']['vax_initiated'] = df_cancounties.query(f'fips in {l_fips}')[
        'actuals.vaccinationsInitiated'].groupby(level='dt').sum().replace(0,np.nan)
    model_dict['df_hist']['vax_completed'] = df_cancounties.query(f'fips in {l_fips}')[
        'actuals.vaccinationsCompleted'].groupby(level='dt').sum().replace(0,np.nan)
    model_dict['df_hist']['vax_halfcompleted'] = model_dict['df_hist']['vax_initiated'] - model_dict['df_hist']['vax_completed']

    model_dict['df_vax_hes'] = df_vax_hes.query(f'fips in {l_fips}')
    df_vax_hes_region = df_vax_hes.query(f'fips in {l_fips}').sum()
    model_dict['covid_params']['est_vax_hes_pop_18plus'] = df_vax_hes_region['est_vax_hes_pop_18plus']
    model_dict['covid_params']['est_vax_hes_strong_pop_18plus'] = df_vax_hes_region['est_vax_hes_strong_pop_18plus']
    model_dict['df_vax_fore'] = forecast_vaccines(model_dict)
    
    model_dict['footnote_str'] = footnote_str_maker()
    model_dict['chart_title'] = ''

    return model_dict

In [21]:
covid_params = {}
covid_params['d_incub'] = 3.
covid_params['d_infect'] = 4.
covid_params['mort_rt'] = 0.01
covid_params['d_in_hosp'] = 11
covid_params['hosp_rt'] = 0.04
covid_params['d_to_hosp'] = 7.0
covid_params['d_in_hosp_mild'] = 11.0
covid_params['icu_rt'] = 13./41.
covid_params['d_in_icu'] = 13.0
covid_params['vent_rt'] = 0.4
covid_params['d_til_death'] =  17.0 ## Try 24.0 later
covid_params['policy_trigger'] = True
covid_params['policy_trigger_once'] = True
this_region_type = 'gov_econ_region'
covid_params['voc_transmissibility'] = 1.2


In [22]:
if 'df_fore_allregions' in globals().keys():
    df_prevfore_allregions = df_fore_allregions.copy()
    
allregion_model_dicts = {}
df_rts_allregs = pd.DataFrame()
df_wavg_rt_conf_allregs = pd.DataFrame()
df_fore_allregions = pd.DataFrame()

nyreg_rep_order = ['Capital Region', 'Central New York', 'Finger Lakes', 'Mid-Hudson', 'Long Island',
 'Mohawk Valley', 'New York City', 'North Country', 'Southern Tier', 'Western New York']


for region in sorted(df_ny_pop[this_region_type].unique()):
    print(region)
    
    model_dict = make_model_dict_nys(region, 
                                     df_ny_pop, 
                                     df_nys.groupby(level=[this_region_type,'dt']).sum(), 
                                     covid_params, 200,
                                     df_mvmt=df_ny_mvmt.groupby([this_region_type,'dt']).mean().loc[region],
                                     df_interventions=df_interventions[
                                         df_interventions.region.isin(['Statewide (all)', region])],
                                     df_can=df_cancounties
                                    )

    this_reg_df_rts = pd.DataFrame(model_dict['df_rts'].stack(), columns=[region])
    this_reg_df_wavg = pd.DataFrame(
        model_dict['df_rts_conf'].sort_index().unstack('metric')['weighted_average'].stack(), columns=[region])

    df_rts_allregs = pd.concat([df_rts_allregs, this_reg_df_rts], axis=1)
    df_wavg_rt_conf_allregs = pd.concat([df_wavg_rt_conf_allregs, this_reg_df_wavg], axis=1)

    try:
        first_guess = df_prevfore_allregions[region].first_valid_index()[0]
    except:
        first_guess = pd.Timestamp('2020-02-17')

    model_dict = model_find_start(first_guess, model_dict)
    df_agg = model_dict['df_agg']
    df_all_cohorts = model_dict['df_all_cohorts']

    print('Peak Hospitalization Date: ', df_agg.hospitalized.idxmax().strftime("%d %b, %Y"))
    print('Peak Hospitalization #: {:.0f}'.format(df_agg.hospitalized.max()))
    print('Peak ICU #: {:.0f}'.format(df_agg.icu.max()))
    print('Peak Ventilator #: {:.0f}'.format(df_agg.vent.max()))

    allregion_model_dicts[region] = model_dict
    df_fore_allregions = pd.concat([df_fore_allregions,pd.DataFrame(df_agg.stack(), columns=[region])], axis=1)

    model_dict['chart_title'] = r'No Change in Future $R_{t}$ Until Reaching Hospital Capacity Triggers Lockdown'

df_fore_allregions = df_fore_allregions.sort_index()

df_rts_allregs.index.names = ['dt','metric']

df_rt_report = df_wavg_rt_conf_allregs.dropna().unstack('metric').iloc[-1].unstack('metric')[
    ['rt','rt_l95','rt_u95']]

df_rt_report['rt'] = df_rt_report['rt'].apply(lambda x: '{:.2f}'.format(x))
df_rt_report['ci95'] = df_rt_report[['rt_l95','rt_u95']].apply(
    lambda x: '{:.2f} - {:.2f}'.format(x[0],x[1]), axis=1)
df_rt_report.loc[nyreg_rep_order, ['rt','ci95']].to_csv(
    './output/nys_fore/df_rts4nys_{}.csv'.format(pd.Timestamp.today().strftime("%Y%m%d")))

### Save Output ###
df_rts_allregs.index.names = ['dt','metric']

df_wavg_rt_conf_allregs.unstack('metric').to_csv(
    './output/nys_fore/df_wavg_rt_conf_allregs_{}.csv'.format(pd.Timestamp.today().strftime("%Y%m%d")),
    encoding='utf-8')
df_wavg_rt_conf_allregs.to_pickle('./output/nys_fore/df_wavg_rt_conf_allregs_{}.pkl'.format(
    pd.Timestamp.today().strftime("%Y%m%d")))

df_fore_allregions.unstack('metric').to_csv(
    './output/nys_fore/df_fore_allregions_{}.csv'.format(pd.Timestamp.today().strftime("%Y%m%d")),
    encoding='utf-8')
df_fore_allregions.to_pickle('./output/nys_fore/df_fore_allregions_{}.pkl'.format(pd.Timestamp.today().strftime("%Y%m%d")))

asmd_filename = './output/nys_fore/allregion_model_dicts_{}.pkl'.format(pd.Timestamp.today().strftime("%Y%m%d"))

with open(asmd_filename, 'wb') as handle:
    pickle.dump(allregion_model_dicts, handle, protocol=pickle.HIGHEST_PROTOCOL)

display(df_rt_report.loc[nyreg_rep_order, ['rt','ci95']])


Capital Region
deaths shifted by:  -20
This guess:  2020-02-17 00:00:00
This rmse:  4342.063350679423
Change in rmse:  -1
Average Error:  -995.1399219979925
This guess:  2020-02-24 00:00:00
This rmse:  1512.8757208159057
Change in rmse:  -2829.187629863517
Average Error:  -321.0077488060597
This guess:  2020-03-02 00:00:00
This rmse:  278.93806731170963
Change in rmse:  -1233.937653504196
Average Error:  16.349678604096397
This guess:  2020-03-01 00:00:00
This rmse:  316.65861977774085
Change in rmse:  37.72055246603122
Average Error:  -18.783399902091666
Best starting date:  2020-03-02 00:00:00
Peak Hospitalization Date:  16 Jan, 2021
Peak Hospitalization #: 323
Peak ICU #: 103
Peak Ventilator #: 41
Central New York
deaths shifted by:  -20
This guess:  2020-02-17 00:00:00
This rmse:  7189.214404133761
Change in rmse:  -1
Average Error:  -975.9224788071216
This guess:  2020-02-24 00:00:00
This rmse:  3894.7132739924987
Change in rmse:  -3294.501130141262
Average Error:  -509.0375753365

This rmse:  4164.236279316855
Change in rmse:  -2240.9898169056887
Average Error:  -169.8375393678396
This guess:  2020-03-02 00:00:00
This rmse:  2576.2646057475054
Change in rmse:  -1587.9716735693496
Average Error:  -101.56100960360368
This guess:  2020-03-09 00:00:00
This rmse:  1466.3898937553113
Change in rmse:  -1109.874711992194
Average Error:  -52.70542953032183
This guess:  2020-03-16 00:00:00
This rmse:  868.8619625486257
Change in rmse:  -597.5279312066856
Average Error:  -23.18218279184561
This guess:  2020-03-17 00:00:00
This rmse:  825.2107494343477
Change in rmse:  -43.651213114277994
Average Error:  -20.54538878220494
This guess:  2020-03-18 00:00:00
This rmse:  783.1778943078249
Change in rmse:  -42.032855126522804
Average Error:  -17.843239309723007
This guess:  2020-03-19 00:00:00
This rmse:  741.6335638967367
Change in rmse:  -41.54433041108814
Average Error:  -14.959856619870559
This guess:  2020-03-20 00:00:00
This rmse:  701.1076270644716
Change in rmse:  -40.52

metric,rt,ci95
Capital Region,0.81,0.55 - 1.07
Central New York,0.9,0.54 - 1.25
Finger Lakes,0.9,0.66 - 1.13
Mid-Hudson,0.76,0.61 - 0.91
Long Island,0.7,0.49 - 0.91
Mohawk Valley,0.81,0.52 - 1.11
New York City,0.77,0.59 - 0.95
North Country,0.93,0.58 - 1.28
Southern Tier,1.02,0.85 - 1.18
Western New York,0.79,0.61 - 0.97


In [23]:
df_tab = df_ny_pop.groupby(this_region_type).sum()[['pop2019']].copy()
# df_tab = df_tab.set_index('state')
df_ger = df_nys.groupby(level=[this_region_type,'dt']).sum().unstack(0)

# Days to Hosp Capacity
# df_tab['hosp_cap'] = df_tab['pop2019'] / 1000 * 2.7 * 0.2
df_tab['hosp_cap'] = df_ger['hosp_beds_avail'].fillna(method='ffill').add(
    df_ger['Total hospitalizations_county'].fillna(method='ffill')).iloc[-1]
df_hosp_conur = df_fore_allregions[[col for col in df_fore_allregions.columns if col != 'US']] \
                    .unstack('dt').loc['hospitalized'].T.unstack(0).loc[
                pd.Timestamp.today() - pd.Timedelta(days=30):]
df_hosp_cap_rat = df_hosp_conur.div(df_tab['hosp_cap'])
df_tab['Forecasted Hospital Capacity Date'] = pd.to_datetime(
    df_hosp_cap_rat.mask(df_hosp_cap_rat < 1).apply(lambda x: x.first_valid_index()) )
df_tab['Days to Hospital Capacity'] = (df_tab['Forecasted Hospital Capacity Date']-pd.Timestamp.today()).dt.days
df_tab['Forecasted Hospital Capacity Date'] = df_tab['Forecasted Hospital Capacity Date'].dt.strftime("%m/%d/%y")
df_tab['Forecasted Hospital Capacity Date'] = df_tab['Forecasted Hospital Capacity Date'].fillna(
    'Beds Available Through Forecast')
    
# Deaths
df_tab['Total Deaths per 100k'] = df_ger['Cumulative hosp deaths_county'].fillna(method='ffill').iloc[-1] \
    .div(df_tab.pop2019).mul(1e5)
df_tab['14-Day Avg Daily Deaths per 100k'] = df_ger['Cumulative hosp deaths_county'].fillna(method='ffill') \
    .diff().rolling(14).mean().iloc[-1].div(df_tab.pop2019).mul(1e5)
df_tab['deaths_trend'] = calc_trend(
    df_ger['Cumulative hosp deaths_county'].fillna(method='ffill').div(df_tab.pop2019).mul(1e5).diff(),
    0.02)
# Cases
df_tab['Total Cases per 100k'] = df_ger['Positive tests_county'].cumsum().fillna(method='ffill').iloc[-1] \
    .div(df_tab.pop2019).mul(1e5)
df_tab['14-Day Avg Daily Cases per 100k'] = df_ger['Positive tests_county'].cumsum().fillna(method='ffill') \
    .diff().rolling(14).mean().iloc[-1].div(df_tab.pop2019).mul(1e5)
df_tab['cases_trend'] = calc_trend(
    df_ger['Positive tests_county'].cumsum().fillna(method='ffill').div(df_tab.pop2019).mul(1e5).diff(),
    0.5)
# Positivity Rate
df_tab['Positivity Rate'] = df_ger['Positive tests_county'].rolling(14).sum().iloc[-1].div(
    df_ger['Daily tests_county'].rolling(14).sum().iloc[-1])
df_tab['positivity_trend'] = calc_trend(
    df_ger['Positive tests_county'].rolling(14).sum().div(
        df_ger['Daily tests_county'].rolling(14).sum()),
    0.0005)
# Hospitalizations
df_tab['Hospitalized per 100k'] = df_ger['Total hospitalizations_county'].fillna(method='ffill').iloc[-1] \
    .div(df_tab.pop2019).mul(1e5)
df_tab['hospconcur_trend'] = calc_trend(
    df_ger['Total hospitalizations_county'].fillna(method='ffill').div(df_tab.pop2019).mul(1e5),
    0.01)
df_tab['14-Day Avg Daily Hosp Admits per 100k'] = df_ger['Gross change total hosp_county'].fillna(method='ffill') \
    .rolling(14).mean().iloc[-1].div(df_tab.pop2019).mul(1e5)
df_tab['hospadmits_trend'] = calc_trend(
    df_ger['Gross change total hosp_county'].fillna(method='ffill').div(df_tab.pop2019).mul(1e5),
    0.01)

# Modeled
df_tab['Model Est\'d Active Infections per 100k'] = \
    df_fore_allregions.loc[pd.Timestamp.today().normalize()].query("metric in ['exposed', 'infectious']").sum()\
    .div(df_tab.pop2019).mul(1e5)
df_tab = df_tab.sort_values(by='Model Est\'d Active Infections per 100k', ascending=False)
df_tab['Current Reproduction Rate (Rt)'] = df_wavg_rt_conf_allregs.unstack('metric').swaplevel(axis=1)['rt'].fillna(method='ffill').iloc[-1]
df_tab['rt_l68'] = df_wavg_rt_conf_allregs.unstack('metric').swaplevel(axis=1).fillna(method='ffill')['rt_l68'].iloc[-1]
df_tab['rt_u68'] = df_wavg_rt_conf_allregs.unstack('metric').swaplevel(axis=1).fillna(method='ffill')['rt_u68'].iloc[-1]

for model_dict in allregion_model_dicts.values():
    df_tab.loc[model_dict['region_name'], 'Vaccine Hesistant, % of Adults'] = model_dict['covid_params']\
        ['est_vax_hes_pop_18plus'] / model_dict['tot_pop_18plus']
    df_tab.loc[model_dict['region_name'], 'Daily Vaccines Initiated'] = model_dict['df_hist']['vax_initiated']\
        .diff().rolling(7).mean().dropna().iloc[-1]
    df_tab.loc[model_dict['region_name'], 'Vaccines Initiated, % of Pop.'] = model_dict['df_hist']['vax_initiated'].dropna()\
        .iloc[-1] / model_dict['tot_pop']
    
    df_tab.loc[model_dict['region_name'], 'Vaccine & Acquired Immunity, % of Pop.'] = model_dict['df_agg'][
        ['recovered', 'vaccinated_never_infected']].sum(axis=1).div(model_dict['tot_pop']).loc[
        pd.Timestamp.today().normalize()]
    
    eff_r0_postimmune = model_dict['covid_params']['basic_r0'] * model_dict['df_agg']['susceptible'].div(
        model_dict['tot_pop'])
    df_tab.loc[model_dict['region_name'], 'Forecasted Date Herd Immunity Achieved'] = eff_r0_postimmune.mask(
        eff_r0_postimmune>0.9).first_valid_index()

# Remove Index
df_tab = df_tab.reset_index()
df_tab['Riskiest Region Rank'] = df_tab.index + 1


# Formatting
df_tab['Region'] = df_tab.gov_econ_region  # + ' (' + df_tab.state + ')'
dict_col_names = {'pop2019': 'Population'}
df_tab = df_tab.rename(columns=dict_col_names)

format_dict = {
    'Riskiest Region Rank': '{0:.0f}',
    'Population': '{0:,.0f}',
    'Model Est\'d Active Infections per 100k': '{0:,.0f}',
    'Current Reproduction Rate (Rt)': '{0:.2f}',
    'Total Cases per 100k': '{0:,.0f}',
    '14-Day Avg Daily Cases per 100k': '{0:,.1f}',
    'Positivity Rate': '{:.1%}',
    'Total Deaths per 100k': '{0:,.0f}',
    '14-Day Avg Daily Deaths per 100k': '{0:,.1f}',
    'Hospitalized per 100k': '{0:,.2f}',
    '14-Day Avg Daily Hosp Admits per 100k': '{0:,.2f}',
    'Model Est\'d Active Infections': '{0:,.0f}',
    'Total Cases': '{0:,.0f}',
    '14-Day Avg Daily Cases': '{0:,.1f}',
    'Total Deaths': '{0:,.0f}',
    '14-Day Avg Daily Deaths': '{0:,.1f}',
    'Hospitalized': '{0:,.0f}',
    '14-Day Avg Daily Hosp Admits': '{0:,.2f}',
    'Days to Hospital Capacity': '{0:,.0f}',
    'Vaccine Hesistant, % of Adults': '{0:,.0%}',
    'Daily Vaccines Initiated': '{0:,.0f}',
    'Vaccines Initiated, % of Pop.': '{0:,.0%}',
    'Vaccine & Acquired Immunity, % of Pop.': '{0:,.0%}'
               }

output_cols = ['Riskiest Region Rank', 'Region', 'Population',
               'Model Est\'d Active Infections per 100k', 'Current Reproduction Rate (Rt)',
               'Vaccine Hesistant, % of Adults', 'Daily Vaccines Initiated',
               'Vaccines Initiated, % of Pop.', 'Vaccine & Acquired Immunity, % of Pop.',
               'Forecasted Date Herd Immunity Achieved',
               'Days to Hospital Capacity', 'Forecasted Hospital Capacity Date',
               'Total Cases per 100k', '14-Day Avg Daily Cases per 100k',
               'Positivity Rate',
               'Total Deaths per 100k', '14-Day Avg Daily Deaths per 100k',
               'Hospitalized per 100k', '14-Day Avg Daily Hosp Admits per 100k'
               ]

for key in output_cols:
    try:
        df_tab[key] = df_tab[key].map(format_dict[key].format)
    except:
        print('couldnt find ', key)
df_tab['Forecasted Date Herd Immunity Achieved'] = df_tab['Forecasted Date Herd Immunity Achieved'].dt.strftime(
    '%B %d, %Y')

df_tab['Current Reproduction Rate (Rt)'] = df_tab['Current Reproduction Rate (Rt)'] \
    + df_tab['rt_l68'].map(" ({0:.2f} - ".format) + df_tab['rt_u68'].map(" {0:.2f})".format)

df_tab_nofmt = df_tab[output_cols].copy()

df_tab['14-Day Avg Daily Deaths per 100k'] = df_tab['14-Day Avg Daily Deaths per 100k'] \
                                             + df_tab['deaths_trend'].astype(str)
df_tab['14-Day Avg Daily Cases per 100k'] = df_tab['14-Day Avg Daily Cases per 100k'] \
                                            + df_tab['cases_trend'].astype(str)
df_tab['Positivity Rate'] = df_tab['Positivity Rate'] \
                            + df_tab['positivity_trend'].astype(str)
df_tab['Hospitalized per 100k'] = df_tab['Hospitalized per 100k'] \
                                            + df_tab['hospconcur_trend'].astype(str)
df_tab['14-Day Avg Daily Hosp Admits per 100k'] = df_tab['14-Day Avg Daily Hosp Admits per 100k'] \
                                            + df_tab['hospadmits_trend'].astype(str)

df_tab = df_tab[output_cols]

rt_temp = df_tab['Current Reproduction Rate (Rt)'].copy()
df_tab.loc[rt_temp > '1.1', 'Current Reproduction Rate (Rt)'] = rt_temp + '<span style="color: red">🟥</span>'
df_tab.loc[(rt_temp <= '1.1') & (rt_temp > '1.0'), 'Current Reproduction Rate (Rt)'] = rt_temp + '<span style="color: #ffcc00">🟡</span>'
df_tab.loc[rt_temp <= '1.0', 'Current Reproduction Rate (Rt)'] = rt_temp + '<span style="color: green">🟢</span>'

hosp_temp = pd.to_numeric(df_tab['Days to Hospital Capacity'], errors='coerce')
df_tab.loc[hosp_temp <= 14, 'Days to Hospital Capacity'] = '<img src="https://media.giphy.com/media/daDPy7kxfE1TfxLzNg/giphy.gif" width=15 height=15><span>' + df_tab['Days to Hospital Capacity'] + '</span>'
df_tab.loc[hosp_temp >= 120, 'Days to Hospital Capacity'] = '120+'
df_tab['Days to Hospital Capacity'] = df_tab['Days to Hospital Capacity'].replace('nan', '120+')

tab_html = df_tab.to_html(index=False, border=0, justify='center')
tab_html = '<meta charset="utf-8"/><script src="https://www.covidoutlook.info/assets/js/sorttable.js" type="text/javascript"></script>' \
           + '<style> table tr:nth-child(even) { background-color:#dddddd; }</style>' \
           + tab_html.replace('<table', '<table class="sortable"')
tab_html = tab_html.replace('▼','<span style="color: green">▼</span>').replace('▲','<span style="color: red">▲</span>')
tab_html = tab_html.replace('&lt;','<').replace('&gt;','>')

text_file = open('./output/nys_fore/nys_summ_table_{}.html'.format(pd.Timestamp.today().strftime("%Y%m%d")), "w")
text_file.write(tab_html)
text_file.close()
df_tab_nofmt.to_csv('./output/nys_fore/nys_summ_table_{}.csv'.format(pd.Timestamp.today().strftime("%Y%m%d")), 
              index=False)

display(HTML(tab_html))


couldnt find  Region
couldnt find  Forecasted Date Herd Immunity Achieved
couldnt find  Forecasted Hospital Capacity Date


Riskiest Region Rank,Region,Population,Model Est'd Active Infections per 100k,Current Reproduction Rate (Rt),"Vaccine Hesistant, % of Adults",Daily Vaccines Initiated,"Vaccines Initiated, % of Pop.","Vaccine & Acquired Immunity, % of Pop.",Forecasted Date Herd Immunity Achieved,Days to Hospital Capacity,Forecasted Hospital Capacity Date,Total Cases per 100k,14-Day Avg Daily Cases per 100k,Positivity Rate,Total Deaths per 100k,14-Day Avg Daily Deaths per 100k,Hospitalized per 100k,14-Day Avg Daily Hosp Admits per 100k
1,Western New York,1377094,292,0.79 (0.70 - 0.88)🟢,14%,5592,42%,38%,"May 13, 2021",120+,Beds Available Through Forecast,8784,38.0▼,4.2%▲,108,0.3▲,22.73▲,3.17▲
2,Southern Tier,628855,249,1.02 (0.93 - 1.10)🟡,15%,2042,41%,36%,"May 02, 2021",120+,Beds Available Through Forecast,7258,22.2▶,0.8%▶,67,0.2▲,13.68▲,2.12▲
3,New York City,8336817,213,0.77 (0.68 - 0.86)🟢,13%,42858,40%,45%,"June 08, 2021",120+,Beds Available Through Forecast,10887,28.6▼,2.7%▼,243,0.3▼,18.53▼,2.24▼
4,Mid-Hudson,2323778,186,0.76 (0.69 - 0.84)🟢,13%,9894,43%,43%,"June 30, 2021",120+,Beds Available Through Forecast,11993,24.4▼,2.9%▼,166,0.2▼,13.56▼,1.90▼
5,Mohawk Valley,483086,181,0.81 (0.66 - 0.96)🟢,15%,1698,38%,38%,"April 18, 2021",120+,Beds Available Through Forecast,8222,19.0▼,1.7%▶,109,0.1▶,9.94▼,1.54▼
6,Finger Lakes,1200799,177,0.90 (0.78 - 1.01)🟢,14%,4866,43%,38%,"May 06, 2021",120+,Beds Available Through Forecast,7815,28.0▶,2.9%▲,79,0.1▶,17.49▲,2.38▲
7,Central New York,772750,166,0.90 (0.72 - 1.08)🟢,15%,3425,44%,39%,"May 06, 2021",120+,Beds Available Through Forecast,7471,18.0▶,1.4%▲,79,0.1▲,8.02▲,1.64▲
8,Capital Region,1081179,130,0.81 (0.68 - 0.94)🟢,13%,5038,47%,40%,"May 23, 2021",120+,Beds Available Through Forecast,6989,16.2▼,1.9%▼,82,0.2▲,8.69▼,1.26▼
9,Long Island,2833525,130,0.70 (0.60 - 0.81)🟢,11%,10809,44%,47%,"June 06, 2021",120+,Beds Available Through Forecast,13258,26.6▼,2.7%▼,212,0.2▼,16.80▼,2.17▼
10,North Country,415678,108,0.93 (0.75 - 1.11)🟢,18%,763,41%,37%,"April 17, 2021",120+,Beds Available Through Forecast,5599,13.5▼,1.5%▶,34,0.1▶,6.01▲,1.00▲


In [29]:
cover_file = './output/nys_fore/coverpage.pdf'
chart_file = './output/nys_fore/charts.pdf'
l_pdfs_out = []

l_charts = ['ch_rt_confid',
           'ch_positivetests', 'ch_totaltests', 'ch_postestshare',
            'ch_vax_status', 'ch_vax_daily', 'ch_rt_scen_explanation',
           'ch_detection_rt',
           'ch_statemap', 'ch_googmvmt',
           'ch_rts', 'ch_exposed_infectious', 'ch_hosp_concur','ch_deaths_tot',
           'ch_population_share',
           'ch_cumul_infections', 'ch_daily_exposures', 'ch_hosp_admits', 'ch_daily_deaths'
           ]

d_chart_fns = {'ch_rt_confid': ch_rt_confid,
 'ch_positivetests': ch_positivetests,
 'ch_totaltests': ch_totaltests,
 'ch_postestshare': ch_postestshare,
 'ch_detection_rt': ch_detection_rt,
 'ch_googmvmt': ch_googmvmt,
 'ch_rts': ch_rts,
 'ch_exposed_infectious': ch_exposed_infectious,
#  'ch_hosp': ch_hosp,
 'ch_hosp_concur': ch_hosp_concur,
 'ch_deaths_tot': ch_deaths_tot,
 'ch_population_share': ch_population_share,
 'ch_cumul_infections': ch_cumul_infections,
 'ch_daily_exposures': ch_daily_exposures,
 'ch_hosp_admits': ch_hosp_admits,
 'ch_daily_deaths': ch_daily_deaths,
#  'ch_doubling_rt': ch_doubling_rt
               'ch_vax_status':ch_vax_status, 'ch_vax_daily': ch_vax_daily,
               'ch_rt_scen_explanation':ch_rt_scen_explanation
              }

forecast_charts = ['ch_exposed_infectious', 'ch_hosp', 'ch_hosp_concur', 'ch_deaths_tot', 
                   'ch_population_share', 'ch_cumul_infections', 'ch_daily_exposures', 
                   'ch_hosp_admits', 'ch_daily_deaths']

# if pd.Timestamp.today() < pd.Timestamp('2020-11-25'):
#     d_chart_fns = {'ch_deaths_tot': ch_deaths_tot}
#     forecast_charts = ['ch_deaths_tot']

l_parent_bookmarks = []
l_child_bookmarks = []

for region in sorted(df_ny_pop[this_region_type].unique()):
    print(region)
    l_parent_bookmarks.append(region)
    model_dict = allregion_model_dicts[region]
    
    model_dict['footnote_str'] = footnote_str_maker()
    model_dict['df_counties'] = df_counties_4map[df_counties_4map.gov_econ_region == model_dict['region_name']]
    fig = ch_statemap_casechange(model_dict, counties_geo)
    fig = add_plotly_footnote(fig)
    pio.orca.shutdown_server()

#     try:
#         pio.orca.shutdown_server()
#         fig.write_image(cover_file, scale=2)
#         l_child_bookmarks.append('New Cases Map')
#     except:
#         pio.orca.shutdown_server()

    pio.orca.shutdown_server()
    fig.write_image(cover_file, scale=2)
    l_child_bookmarks.append('New Cases Map')

    pdf_obj = PdfPages(chart_file)

    for ch_name, ch_fn in d_chart_fns.items():
        try:
            if ch_name == 'ch_rt_scen_explanation':
                ax = ch_fn(model_dict, verbose=True)
            else:
                ax = ch_fn(model_dict)
            pdf_obj.savefig(bbox_inches='tight', pad_inches=1, optimize=True, facecolor='white')
            plt.close()
            l_child_bookmarks.append(ax.get_title().split(': ')[1].split('\n')[0])
        except:
            print('Couldn\'t create {} {} chart.'.format(model_dict['region_code'], ch_name))

    pdf_obj.close()

    pdf_out = './output/nys_fore/coronita_forecast_{}_{}.pdf'.format(
        region.replace(' ','_'), pd.Timestamp.today().strftime("%Y%m%d"))
    gs_cmd = 'gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress -sOutputFile='
    cmd_str = '{0}{1} {2} {3}'.format(
        gs_cmd, pdf_out, cover_file, chart_file)
    os.system(cmd_str)
    l_pdfs_out.append(pdf_out)

Capital Region
Central New York
Finger Lakes
Long Island
Mid-Hudson
Mohawk Valley
New York City
North Country
Southern Tier
Western New York


In [30]:
pdf_out = './output/nys_fore/coronita_forecast_{}_{}.pdf'.format(
    'nys', pd.Timestamp.today().strftime("%Y%m%d"))
gs_cmd = 'gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -dPDFSETTINGS=/prepress -sOutputFile='
cmd_str = '{0}{1} {2}'.format(
    gs_cmd, pdf_out, ' '.join(sorted(l_pdfs_out)))
os.system(cmd_str)

from PyPDF2 import PdfFileWriter, PdfFileReader
output = PdfFileWriter()
input1 = PdfFileReader(open(pdf_out, 'rb'))

pdf_out_bkmrk = './output/nys_fore/covidoutlook_forecast_{}_{}.pdf'.format(
    'nys', pd.Timestamp.today().strftime("%Y%m%d"))

region_idx = 0
for page_num, bookmark in enumerate(l_child_bookmarks):
    output.addPage(input1.getPage(page_num))
    if bookmark == 'New Cases Map':
        parent = output.addBookmark(sorted(l_parent_bookmarks)[region_idx], page_num) # add parent bookmark
        region_idx += 1
    elif bookmark != 'NO BOOKMARK':
        output.addBookmark(bookmark, page_num, parent) # add child bookmark
    
resultPdf = open(pdf_out_bkmrk, 'wb')
output.write(resultPdf)
resultPdf.close() 

In [31]:
series_dict = {'deaths':'Cumulative hosp deaths_county',
               'hospitalized':'Total hospitalizations_county',
               'exposed_daily':''
              }

for fore_series, hist_series in series_dict.items():
    forecast = df_fore_allregions.unstack(1).swaplevel(axis=1)[fore_series]
    df_deaths = pd.DataFrame(index=forecast.index, columns=forecast.columns)
    if fore_series != 'exposed_daily':
        df_deaths = df_deaths.fillna(df_nys.groupby(level=[this_region_type,'dt']).sum()[hist_series].unstack(0))
    df_deaths = df_deaths.fillna(forecast)
    df_deaths['Statewide Total'] = df_deaths.sum(axis=1)
    df_deaths.to_csv('./output/nys_fore/nys_{}_newvaccineforecast_{}.csv'.format(fore_series,
        pd.Timestamp.today().strftime("%Y%m%d")))
