# Compare hospitaliations across CA counties. 

This can be a useful benchmark for calibrating the model rates. We show the raw model output as well as that after hospitalization renormalization. 

In [None]:
import pandas as pd
from pyseir import load_data
from datetime import datetime, timedelta
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


compare_date = datetime.today() - timedelta(days=1)
# Counties must have greater than or equal to this number of current ICU visits to include in the comparison.
min_current_ICU = 5



# Load data from CA hospital dashboard.
df = pd.read_csv('https://data.chhs.ca.gov/dataset/6882c390-b2d7-4b9a-aefa-2068cee63e47/resource/6cd8d424-dfaa-4bdd-9410-a3d656e1176e/download/covid19data.csv')
df = df[df['Most Recent Date'] == compare_date.strftime('%m/%d/%Y')].sort_values('County Name')


df_metadata = load_data.load_county_metadata()
df_metadata['raw_county'] = df_metadata[df_metadata.state == 'California'].county.apply(lambda x: x.split(' County')[0])
df_merged = df_metadata[['fips', 'state', 'raw_county']].merge(df, left_on='raw_county', right_on='County Name')\
            .drop(['Total Count Confirmed', 'Total Count Deaths', 'raw_county'], axis=1)



def load_hgen(fips):
    try: 
        return load_data.get_compartment_value_on_date(fips, 'HGen', date=compare_date)
    except: 
        return None

def load_hicu(fips):
    try: 
        return load_data.get_compartment_value_on_date(fips, 'HICU', date=compare_date)
    except: 
        return None
    


df_merged['predicted_HGen'] = df_merged.fips.apply(load_hgen)
df_merged['predicted_HICU'] = df_merged.fips.apply(load_hicu)
df_merged['predicted_total_hosp'] = df_merged['predicted_HGen'] + df_merged['predicted_HICU']
df_merged['observed_total_hosp'] = df_merged['COVID-19 Positive Patients'] + df_merged['Suspected COVID-19 Positive Patients']
df_merged['observed_HICU'] = df_merged['ICU COVID-19 Positive Patients'] + df_merged['ICU COVID-19 Suspected Patients']


In [None]:


df_merged['fractional_error_total_hosp'] = (df_merged['predicted_total_hosp'] - df_merged['observed_total_hosp']) / df_merged['observed_total_hosp'] 
df_merged['fractional_error_HICU'] = (df_merged['predicted_HICU'] - df_merged['observed_HICU']) / (df_merged['observed_HICU'])

df_all = df_merged[(df_merged['observed_HICU'] >= min_current_ICU)]

sns.distplot(df_all['fractional_error_HICU'], bins=np.linspace(-4, 4, 51), label='Total ICU (No Normalization)')
sns.distplot(df_all['fractional_error_total_hosp'], bins=np.linspace(-4, 4, 51), label='Total Hospitalizations (No Normalization)')
plt.grid()
plt.legend()
plt.xlabel('Fractional Error (predicted - observed) / observed')
plt.xlim(-1, 2)


print('ICU Fractional Error Mean', np.mean(df_all['fractional_error_HICU']))
print('Hosp Fractional Error Mean', np.mean(df_all['fractional_error_total_hosp']))

In [None]:
df_all['renormalized_total_hosp'] = df_all['predicted_total_hosp'] * df_all['observed_total_hosp'] / df_all['predicted_total_hosp']
df_all['renormalized_ICU'] = df_all['predicted_HICU'] * df_all['observed_total_hosp'] / df_all['predicted_total_hosp']


sns.distplot((df_all['renormalized_ICU'] -  df_all['observed_HICU']) /  df_all['observed_HICU'], bins=np.linspace(-4, 4, 51), label='ICU (Normalized to Total Hosp)')
plt.grid()
plt.legend()
plt.xlabel('Fractional Error (predicted - observed) / observed')
plt.xlim(-1, 3)

In [None]:
df_all