In [34]:
%load_ext autoreload
%autoreload 2
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from os.path import join as oj
import math
import pygsheets
import pickle as pkl
import pandas as pd
import load_data
import seaborn as sns
sys.path.append('modeling')
from viz import viz, viz_interactive
from exponential_modeling import estimate_deaths
from fit_and_predict import fit_and_predict

outcomes = ['tot_cases', 'tot_deaths']
df_hospital = load_data.load_hospital_level()
df_county = load_data.load_county_level()
df_county = df_county.sort_values('tot_deaths', ascending=False)
NUM_DAYS_LIST = [1, 2, 3]
# df_county = exponential_modeling.estimate_deaths(df_county) # adds key 
for num_days_in_future in NUM_DAYS_LIST: # 1 is tomorrow
    output_key = f'Predicted Deaths {num_days_in_future}-day'    
    df_county = fit_and_predict(df_county, 
                                method='ensemble', 
                                outcome='deaths',
                                mode='predict_future',
                                target_day=np.array([num_days_in_future]),
                                output_key=output_key
                               )
    
    # extract out vals from list
    vals = df_county[output_key].values
    out = []
    for i in range(vals.shape[0]):
        if np.isnan(vals[i]):
            out.append(0)
        else:
            out.append(vals[i][0])
    df_county[output_key] = out
df = df_hospital.merge(df_county, how='left', on='countyFIPS')
df[outcomes] = df[outcomes].fillna(0)

# aggregate employees by county
total_emp_county = df.groupby('countyFIPS').agg({'Hospital Employees': 'sum'})
total_emp_county = total_emp_county.rename(columns={'Hospital Employees': 'Hospital Employees in County'})
df_county = pd.merge(df_county, total_emp_county, how='left', on='countyFIPS')
df = pd.merge(df, total_emp_county, how='left', on='countyFIPS')

# filter hospitals
df = df[~df['countyFIPS'].isna() & df['IsAcademicHospital'] & df['Hospital Employees'] > 0]
df = df.sort_values(by=['tot_deaths', 'Hospital Employees'], ascending=False)


# fraction of employees out of all county hospitals
df['Frac Hospital Employees of County'] = df['Hospital Employees'] / df['Hospital Employees in County']

In [43]:
for num_days in NUM_DAYS_LIST:
    df[f'Predicted Deaths Hospital {num_days}-day'] = df['Predicted Deaths 1-day'] * df['Frac Hospital Employees of County']
    df[f'Severity {num_days}-day'] = pd.qcut(df[f'Predicted Deaths Hospital {num_days}-day'], 5, labels=False) + 1
#     df[f'Quantile {num_days}-day'] = 

In [42]:
print('max, min', df[f'Quantile 1-day'].max(), df[f'Quantile 1-day'].min())

max, min 4 0


**look at most affected hospitals**

In [None]:
# d.groupby('countyFIPS').head(1).head(5) # look at top counties

In [None]:
# df.head(100)

In [None]:
(df[f'Predicted Deaths {num_days}-day'][nonzero]).head(5)

In [None]:
R, C  = 1, 2
num_days = 1
nonzero = df[f'Predicted Deaths {num_days}-day'] > 0
plt.figure(dpi=300)
plt.subplot(R, C, 1)
plt.hist(df[f'Predicted Deaths {num_days}-day'][nonzero])
plt.xlabel('Predicted Number of Deaths')
plt.ylabel('Num Hospitals')

plt.subplot(R, C, 2)
plt.hist(df[f'Predicted Deaths Hospital {num_days}-day'][nonzero])
plt.xlabel('Predicted Number of Deaths * Num Employees')
plt.ylabel('Num Hospitals')
plt.tight_layout()
plt.show()

In [87]:
df.keys()

Index(['Unnamed: 0', 'CMS Certification Number', 'Hospital Name',
       'Street Address_x', 'City_x', 'State_x', 'ZIP', 'System Affiliation',
       'Type of Facility', 'Hospital Employees',
       ...
       'deaths', 'cases', 'tot_deaths', 'tot_cases', 'Predicted Deaths 1-day',
       'Predicted Deaths 2-day', 'Predicted Deaths 3-day',
       'Hospital Employees in County', 'Frac Hospital Employees of County',
       'Predicted Deaths Hospital-level'],
      dtype='object', length=7518)

In [None]:
df.sort_values('Predicted Deaths Hospital-level', ascending=False)[['Predicted Deaths Hospital-level', 
                                                                    'Hospital Name', 'State_x', 
                                                                    'Hospital Employees', 'tot_deaths']].head(30)

# correlations

In [None]:
plt.plot(df['Predicted Deaths Hospital-level'], df['Occupancy Rate'])

In [None]:
ks = ['ICU Beds', 'Total Beds', 
      'Hospital Employees', 'Registered Nurses',
      'ICU Occupancy Rate', 'Total Occupancy Rate',
      'Mortality national comparison',
      
#       'IsAcademicHospital', 
      'IsUrbanHospital',
      'IsAcuteCareHospital', 
      
      # preds
      'Predicted Deaths 1-day', 'Predicted Deaths 2-day', 'Predicted Deaths Hospital-level',
      
      # county-level stuff
      'tot_deaths', 'tot_cases', 'Hospital Employees in County']



viz.corrplot(df[ks], SIZE=6)

**predicted num deaths**

# useful county-level plots

In [None]:
d = df

R, C = 1, 2
NUM_COUNTIES = 7
plt.figure(dpi=300, figsize=(7, 3.5))


plt.subplot(R, C, 1)
c = 'County Name'
county_names = d[c].unique()[:NUM_COUNTIES]
num_academic_hospitals = []
# d = df[outcome_keys + hospital_keys]
# d = d.sort_values('New Deaths', ascending=False)
for county in county_names:
    num_academic_hospitals.append(d[d[c] == county].shape[0])
plt.barh(county_names[::-1], num_academic_hospitals[::-1]) # reverse to plot top down
plt.xlabel('Number academic hospitals\n(for hospitals where we have data)')

plt.subplot(R, C, 2)
plt.barh(df_county.CountyName[:NUM_COUNTIES].values[::-1], df_county['Hospital Employees in County'][:NUM_COUNTIES][::-1]) # reverse to plot top down
plt.xlabel('# Hospital Employees')

plt.tight_layout()
plt.show()

In [None]:
d = df

R, C = 1, 2
NUM_COUNTIES = 7
plt.figure(dpi=500, figsize=(8, 4))


r = df_county
# cs = sns.diverging_palette(20, 220, n=NUM_COUNTIES)
cs = sns.color_palette("husl", 7)
for i in range(NUM_COUNTIES):
    row = df_county.iloc[i]
    deaths = np.array([x for x in row['deaths'] if x > 0])
    cases = np.array([x for x in row['cases'] if x > 0])
    
    CASES_ALIGNMENT = 100
    idx_align = np.where(cases > CASES_ALIGNMENT)[0][0]
    n = cases.size
    
    DEATHS_ALIGNMENT = 10
    idx_align_deaths = np.where(deaths > DEATHS_ALIGNMENT)[0][0]
    n2 = deaths.size

    
    plt.subplot(R, C, 1)
    plt.plot(np.arange(n) - idx_align, cases, alpha=0.5, label=row['CountyName'] + ' County', color=cs[i])
#     plt.yscale('log')
    plt.ylabel('Cumulative confirmed cases')
    plt.xlabel(f'Days since {CASES_ALIGNMENT} cases')
    plt.legend()
    
    plt.subplot(R, C, 2)
    plt.plot(np.arange(n2) - idx_align_deaths, deaths, alpha=0.5, color=cs[i])
#     plt.yscale('log')
    plt.ylabel('Cumulative deaths')
    plt.xlabel(f'Days since {DEATHS_ALIGNMENT} deaths')
plt.tight_layout()
plt.show()

# intra-county plots

In [None]:
county_names = d[c].unique()[:NUM_COUNTIES]
R, C = 4, 1
plt.figure(figsize=(C * 3, R * 3), dpi=200)
for i in range(R * C):
    plt.subplot(R, C, i + 1)
    cn = county_names[i]
    dc = d[d[c] == cn]
    plt.barh(dc['Facility Name'][::-1], dc['Hospital Employees'][::-1])
    plt.title(cn)
    plt.xlabel('# Hospital Employees')
plt.tight_layout()
# plt.subplots_adjust(bottom=1)
plt.show()

# write to excel sheets

In [17]:
ks_output = ['Severity 1-day', 'Severity 2-day', 'Severity 3-day'] + \
            ['Hospital Name'] + ['CMS Certification Number', 'countyFIPS']

In [18]:
gc = pygsheets.authorize(service_file='creds.json')

#open the google spreadsheet (where 'PY to Gsheet Test' is the name of my sheet)
sheet_name = 'COVID Pandemic Severity Index'
sh = gc.open(sheet_name) # name of the hospital

# ventilator
wks = sh[0] #select a sheet
wks.update_value('A1', "Note: this sheet is read-only (automatically generated by the data and model)")
wks.set_dataframe(df[ks_output], (3, 1)) #update the first sheet with df, starting at cell B2. 