In [None]:
import os
import requests
import pprint
import pandas as pd
import geopandas as gpd
import numpy as np

from covidcaremap.constants import *
from covidcaremap.data import (read_us_states_gdf,
                               read_us_counties_gdf,
                               external_data_path,
                               published_data_path)

In [None]:
AIRTABLE_KEY = os.environ.get('AIRTABLE_KEY')
if AIRTABLE_KEY is None:
    raise Exception('AIRTTABLE_KEY environment variable must be set to run this notebook.')

In [None]:
# thanks https://community.airtable.com/t/how-to-get-more-than-100-rows-using-airtable-api-in-python-using-offest/30536/3
offset = '0'
results = []

while True:
    url = 'https://api.airtable.com/v0/appiZGFluszFy8q2r/County%20Overrides'
    querystring = {
        'view': 'Validated',
        'maxRecords':10000,
        'pageSize':100,
        'api_key': {AIRTABLE_KEY},
        'offset': offset }
    try:
        resp = requests.get(url, params=querystring)        
        r = list(resp.json()['records'])
        results.append(r)
        print(r[0]['id'],len(r))
        
        try: offset = resp.json()['offset']            
        except Exception as ex:
#             print(ex)
            break
    
    except Exception as e:
        print(e)
        break

records = [item for sublist in results for item in sublist]
print(len(records))

In [None]:
def calc_icu_missing(fields_dict):
    fields_dict_keys = list(fields_dict.keys())
    
    cols = ['Staffed ICU Beds',
            'ICU Beds Currently Available',
            'ICU Beds Currently Occupied',
            ]
    
    # if 2 of the 3 above cols present, calc the missing col, otherwise leave empty cols as nans
    if len(set(fields_dict_keys) & set(cols)) >= 2:
   
        if 'ICU Beds Currently Available' in fields_dict_keys and 'ICU Beds Currently Occupied' in fields_dict_keys:
            fields_dict['Staffed ICU Beds'] = fields_dict['ICU Beds Currently Available'] + fields_dict['ICU Beds Currently Occupied']        
        elif 'ICU Beds Currently Occupied' in fields_dict_keys:
            fields_dict['ICU Beds Currently Available'] = fields_dict['Staffed ICU Beds'] - fields_dict['ICU Beds Currently Occupied']
        else:
            fields_dict['ICU Beds Currently Occupied'] = fields_dict['Staffed ICU Beds'] - fields_dict['ICU Beds Currently Available']

        fields_dict['ICU Bed Occupancy Rate'] = fields_dict['ICU Beds Currently Occupied'] / fields_dict['Staffed ICU Beds']

    else:
        for col in cols:
            if col not in fields_dict_keys: 
                print(f'not enough fields entered to calculate, leaving {col} as nan')
                fields_dict[col] = np.nan
        
    return fields_dict

In [None]:
state_records = []
county_records = []

for rec in records:
    if 'County Name' in rec['fields'].keys(): county_records.append(rec)
    else: state_records.append(rec)

In [None]:
county_records

In [None]:
state_records

In [None]:
state_override_list = []
cols = ['State Name','ICU Bed Occupancy Rate','Staffed ICU Beds','Source Date','Manual Override New Data Source']
    
for rec in state_records:
    try: 
        icu_complete_rec = calc_icu_missing(rec['fields'])
        state_override_list.append({k: v for k, v in icu_complete_rec.items() if k in cols})
    except AssertionError: print(f'not enough ICU fields completed for record {rec["id"]}')
    

state_df = pd.DataFrame.from_dict(state_override_list)
state_df['Source Date'] = pd.to_datetime(state_df['Source Date'])

In [None]:
county_override_list = []
cols = ['State Name','County Name','ICU Bed Occupancy Rate','Staffed ICU Beds','Source Date','Manual Override New Data Source']
    
for rec in county_records:
#     print(rec['fields']['County Name'])
    try: 
        icu_complete_rec = calc_icu_missing(rec['fields'])
        county_override_list.append({k: v for k, v in icu_complete_rec.items() if k in cols})
    except AssertionError: print(f'not enough ICU fields completed for record {rec["id"]}')
    

county_df = pd.DataFrame.from_dict(county_override_list)
county_df['Source Date'] = pd.to_datetime(county_df['Source Date'])

In [None]:
state_df['State'] = state_df['State Name'].apply(lambda x: state_name_to_abbreviation[x])
county_df['State'] = county_df['State Name'].apply(lambda x: state_name_to_abbreviation[x])

In [None]:
state_df_dedup = state_df.sort_values(['Source Date'],ascending=False).drop_duplicates('State')
county_df_dedup = county_df.sort_values(['Source Date'],ascending=False).drop_duplicates(['State', 'County Name'])

In [None]:
state_df

In [None]:
state_df_dedup

In [None]:
county_df

In [None]:
county_df_dedup

In [None]:
state_data = pd.read_csv(
    published_data_path('us_healthcare_capacity-state-CovidCareMap.csv'),
    dtype={'State': str }
)

county_data = pd.read_csv(
    published_data_path('us_healthcare_capacity-county-CovidCareMap.csv'),
    dtype={'State': str , 'fips_code': str}
)

In [None]:
county_data.dtypes

In [None]:
state_df_dedup = pd.merge(state_df_dedup, state_data, on='State', how='left', suffixes=('','_right'))


In [None]:
state_df_dedup.drop(axis=1, 
                    columns=[col for col in state_df_dedup.columns if '_right' in col]
                            +['ICU Bed Source','ICU Bed Source Last Updated'], 
                    inplace=True)

In [None]:
state_df_dedup

In [None]:
county_df_dedup

In [None]:
county_df_dedup = pd.merge(county_df_dedup, county_data, on=['State','County Name'], how='left', suffixes=('','_right'))

In [None]:
county_df_dedup.drop(axis=1, 
                     columns=[col for col in county_df_dedup.columns if '_right' in col]
                     +['ICU Bed Source','ICU Bed Source Last Updated'], 
                     inplace=True)

In [None]:
county_df_dedup

In [None]:
county_df_dedup['Manual Override Reason'] = 'Airtable update flow'
state_df_dedup['Manual Override Reason'] = 'Airtable update flow'

In [None]:
state_df_dedup['Staffed ICU Beds [Per 1000 People]'] = round(state_df_dedup['Staffed ICU Beds'] / (state_df_dedup['Population'] / PER_CAPITA_BASE),3)
state_df_dedup['Staffed ICU Beds [Per 1000 Adults (20+)]'] = round(state_df_dedup['Staffed ICU Beds'] / (state_df_dedup['Population (20+)'] / PER_CAPITA_BASE),3)
state_df_dedup['Staffed ICU Beds [Per 1000 Elderly (65+)]'] = round(state_df_dedup['Staffed ICU Beds'] / (state_df_dedup['Population (65+)'] / PER_CAPITA_BASE),3)

In [None]:
county_df_dedup['Staffed ICU Beds [Per 1000 People]'] = round(county_df_dedup['Staffed ICU Beds'] / (county_df_dedup['Population'] / PER_CAPITA_BASE),3)
county_df_dedup['Staffed ICU Beds [Per 1000 Adults (20+)]'] = round(county_df_dedup['Staffed ICU Beds'] / (county_df_dedup['Population (20+)'] / PER_CAPITA_BASE),3)
county_df_dedup['Staffed ICU Beds [Per 1000 Elderly (65+)]'] = round(county_df_dedup['Staffed ICU Beds'] / (county_df_dedup['Population (65+)'] / PER_CAPITA_BASE),3)

In [None]:
state_df_dedup.T

In [None]:
county_df_dedup.T

In [None]:
state_df_dedup.to_csv(external_data_path('covidcaremap-ushcsc-state-manual-override.csv'),index=False)
county_df_dedup.to_csv(external_data_path('covidcaremap-ushcsc-county-manual-override.csv'),index=False)